-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pushed all code used in the experiments
- Loading branch information
Showing
6 changed files
with
32,501 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
Create the pom_info table: | ||
|
||
```sql | ||
CREATE TABLE `pom_info` ( | ||
`id` int(11) NOT NULL AUTO_INCREMENT, | ||
`library_id` int(11) NOT NULL, | ||
`has_assembly_plugin` tinyint(1) DEFAULT 0, | ||
`has_shade_plugin` tinyint(1) DEFAULT 0, | ||
`has_dependency_reduced_pom` tinyint(1) DEFAULT 0, | ||
`has_minimize_jar` tinyint(1) DEFAULT 0, | ||
`has_relocations` tinyint(1) DEFAULT 0, | ||
`has_filters` tinyint(1) DEFAULT 0, | ||
`has_transformers` tinyint(1) DEFAULT 0, | ||
`parent_id` int(11) DEFAULT NULL, | ||
PRIMARY KEY (`id`), | ||
KEY `idx_library_id` (`library_id`), | ||
KEY `fk_parent_id` (`parent_id`), | ||
CONSTRAINT `fk_library_info` FOREIGN KEY (`library_id`) REFERENCES `libraries` (`id`), | ||
CONSTRAINT `fk_parent_id` FOREIGN KEY (`parent_id`) REFERENCES `libraries` (`id`) | ||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci | ||
``` | ||
|
||
|
||
|
||
CREATE TABLE `signatures_memory` ( | ||
`id` int(11) NOT NULL AUTO_INCREMENT, | ||
`library_id` int(11) NOT NULL, | ||
`class_hash` bigint(20) NOT NULL, | ||
`class_crc` bigint(20) NOT NULL, | ||
PRIMARY KEY (`id`) | ||
) ENGINE=InnoDB AUTO_INCREMENT=1175227255 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"ExecuteTime": { | ||
"end_time": "2024-03-10T17:45:32.372659Z", | ||
"start_time": "2024-03-10T17:45:32.368224Z" | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import os, json" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false, | ||
"jupyter": { | ||
"outputs_hidden": false | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": { | ||
"ExecuteTime": { | ||
"end_time": "2024-03-12T13:22:34.058895Z", | ||
"start_time": "2024-03-12T13:22:33.171578Z" | ||
} | ||
}, | ||
"outputs": [ | ||
{ | ||
"ename": "KeyboardInterrupt", | ||
"evalue": "", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | ||
"Cell \u001b[0;32mIn[7], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m project_dir \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../projects_metadata\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2\u001b[0m result_dir \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../evaluation\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m project_files \u001b[38;5;241m=\u001b[39m \u001b[43mos\u001b[49m\u001b[38;5;241m.\u001b[39mlistdir(project_dir)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompare_results\u001b[39m(expected, actual, threshold\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.95\u001b[39m):\n\u001b[1;32m 7\u001b[0m true_positives \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n", | ||
"Cell \u001b[0;32mIn[7], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m project_dir \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../projects_metadata\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2\u001b[0m result_dir \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../evaluation\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m project_files \u001b[38;5;241m=\u001b[39m \u001b[43mos\u001b[49m\u001b[38;5;241m.\u001b[39mlistdir(project_dir)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompare_results\u001b[39m(expected, actual, threshold\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.95\u001b[39m):\n\u001b[1;32m 7\u001b[0m true_positives \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n", | ||
"File \u001b[0;32m_pydevd_bundle/pydevd_cython.pyx:1457\u001b[0m, in \u001b[0;36m_pydevd_bundle.pydevd_cython.SafeCallWrapper.__call__\u001b[0;34m()\u001b[0m\n", | ||
"File \u001b[0;32m_pydevd_bundle/pydevd_cython.pyx:701\u001b[0m, in \u001b[0;36m_pydevd_bundle.pydevd_cython.PyDBFrame.trace_dispatch\u001b[0;34m()\u001b[0m\n", | ||
"File \u001b[0;32m_pydevd_bundle/pydevd_cython.pyx:1395\u001b[0m, in \u001b[0;36m_pydevd_bundle.pydevd_cython.PyDBFrame.trace_dispatch\u001b[0;34m()\u001b[0m\n", | ||
"File \u001b[0;32m_pydevd_bundle/pydevd_cython.pyx:1344\u001b[0m, in \u001b[0;36m_pydevd_bundle.pydevd_cython.PyDBFrame.trace_dispatch\u001b[0;34m()\u001b[0m\n", | ||
"File \u001b[0;32m_pydevd_bundle/pydevd_cython.pyx:312\u001b[0m, in \u001b[0;36m_pydevd_bundle.pydevd_cython.PyDBFrame.do_wait_suspend\u001b[0;34m()\u001b[0m\n", | ||
"File \u001b[0;32m~/tudelft/master_thesis/jar-vulnerability-detection/util/venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/pydevd.py:2070\u001b[0m, in \u001b[0;36mPyDB.do_wait_suspend\u001b[0;34m(self, thread, frame, event, arg, exception_type)\u001b[0m\n\u001b[1;32m 2067\u001b[0m from_this_thread\u001b[38;5;241m.\u001b[39mappend(frame_custom_thread_id)\n\u001b[1;32m 2069\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_threads_suspended_single_notification\u001b[38;5;241m.\u001b[39mnotify_thread_suspended(thread_id, thread, stop_reason):\n\u001b[0;32m-> 2070\u001b[0m keep_suspended \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_wait_suspend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mthread\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mframe\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mevent\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuspend_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrom_this_thread\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mframes_tracker\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2072\u001b[0m frames_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 2074\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m keep_suspended:\n\u001b[1;32m 2075\u001b[0m \u001b[38;5;66;03m# This means that we should pause again after a set next statement.\u001b[39;00m\n", | ||
"File \u001b[0;32m~/tudelft/master_thesis/jar-vulnerability-detection/util/venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/pydevd.py:2106\u001b[0m, in \u001b[0;36mPyDB._do_wait_suspend\u001b[0;34m(self, thread, frame, event, arg, suspend_type, from_this_thread, frames_tracker)\u001b[0m\n\u001b[1;32m 2103\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_input_hook()\n\u001b[1;32m 2105\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprocess_internal_commands()\n\u001b[0;32m-> 2106\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(\u001b[38;5;241m0.01\u001b[39m)\n\u001b[1;32m 2108\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcancel_async_evaluation(get_current_thread_id(thread), \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mid\u001b[39m(frame)))\n\u001b[1;32m 2110\u001b[0m \u001b[38;5;66;03m# process any stepping instructions\u001b[39;00m\n", | ||
"\u001b[0;31mKeyboardInterrupt\u001b[0m: " | ||
] | ||
} | ||
], | ||
"source": [ | ||
"project_dir = \"../projects_metadata\"\n", | ||
"result_dir = \"../evaluation\"\n", | ||
"project_files = os.listdir(project_dir)\n", | ||
"\n", | ||
"\n", | ||
"def compare_results(expected, actual, threshold=0.95):\n", | ||
" true_positives = 0\n", | ||
" false_positives = 0\n", | ||
" false_negatives = 0\n", | ||
"\n", | ||
" for dep in expected['effectiveDependencies']:\n", | ||
" if not dep['presentInDatabase']:\n", | ||
" continue # skip dependencies that are not present in the database\n", | ||
"\n", | ||
" if dep in actual['notFoundLibraries']:\n", | ||
" # not one class file of this dep was found in the uber-jar, then it most probably has no class files\n", | ||
" continue\n", | ||
"\n", | ||
" found = False\n", | ||
" gav = dep['groupId'] + \":\" + dep['artifactId'] + \":\" + dep['version']\n", | ||
"\n", | ||
" for inferred_dep in actual['inferredLibraries']:\n", | ||
" if inferred_dep['includedRatio'] < threshold:\n", | ||
" continue\n", | ||
" if gav == inferred_dep['gav'] or gav in inferred_dep['alternativeVersions']:\n", | ||
" true_positives += 1\n", | ||
" found = True\n", | ||
" break\n", | ||
"\n", | ||
" if not found:\n", | ||
" false_negatives += 1\n", | ||
"\n", | ||
" nb_actual = sum(1 for inferred_dep in actual if inferred_dep['includedRatio'] >= threshold)\n", | ||
" false_positives = nb_actual - true_positives\n", | ||
" return (true_positives, false_positives, false_negatives)\n", | ||
"\n", | ||
"\n", | ||
"shadeConfigurations = [(True, True), (True, False), (False, True), (False, False)]\n", | ||
"for shadeConfig in shadeConfigurations:\n", | ||
" precisions = {}\n", | ||
" recalls = {}\n", | ||
" f1s = {}\n", | ||
" for threshold in [0.5, 0.75, 0.9, 0.95, 0.99, 1.0]:\n", | ||
" precisions[threshold] = []\n", | ||
" recalls[threshold] = []\n", | ||
" f1s[threshold] = []\n", | ||
" for project_file in sorted(project_files):\n", | ||
" expected_data = None\n", | ||
" actual_data = None\n", | ||
" with open(os.path.join(project_dir, project_file), \"r\") as f:\n", | ||
" expected_data = json.load(f)\n", | ||
"\n", | ||
" if expected_data['shadeConfiguration']['minimizeJar'] and not shadeConfig[0]:\n", | ||
" continue\n", | ||
" if expected_data['shadeConfiguration']['relocation'] and not shadeConfig[1]:\n", | ||
" continue\n", | ||
" actual_file_path = os.path.join(result_dir, project_file.replace(\".json\", \"_libraries.json\"))\n", | ||
" if not os.path.exists(actual_file_path):\n", | ||
" continue\n", | ||
" with open(actual_file_path, \"r\") as f:\n", | ||
" actual_data = json.load(f)\n", | ||
"\n", | ||
" results = compare_results(expected_data, actual_data, threshold)\n", | ||
"\n", | ||
" precision = results[0] / (results[0] + results[1]) if results[0] + results[1] > 0 else 1\n", | ||
" recall = results[0] / (results[0] + results[2]) if results[0] + results[2] > 0 else 1\n", | ||
" f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0\n", | ||
"\n", | ||
" precisions[threshold].append(precision)\n", | ||
" recalls[threshold].append(recall)\n", | ||
" f1s[threshold].append(f1)\n", | ||
" # print(pfile, results, precision, recall, f1)\n", | ||
"\n", | ||
" pass\n", | ||
" print(\"minimizeJar:\", shadeConfig[0], \"relocation:\", shadeConfig[1])\n", | ||
" for threshold in [0.5, 0.75, 0.9, 0.95, 0.99, 1.0]:\n", | ||
" precision = sum(precisions[threshold]) / len(precisions[threshold])\n", | ||
" recall = sum(recalls[threshold]) / len(recalls[threshold])\n", | ||
" f1 = sum(f1s[threshold]) / len(f1s[threshold])\n", | ||
" print(\"%.2f & %.3f & %.3f & %.3f \\\\\\\\\" % (threshold, precision, recall, f1))\n", | ||
" print(\"\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false, | ||
"jupyter": { | ||
"outputs_hidden": false | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
import json | ||
import os | ||
|
||
project_dir = "../projects_metadata" | ||
result_dir = "../evaluation" | ||
project_files = os.listdir(project_dir) | ||
|
||
|
||
def compare_results(expected, actual, threshold=0.95): | ||
true_positives = 0 | ||
false_positives = 0 | ||
false_negatives = 0 | ||
|
||
for dep in expected["effectiveDependencies"]: | ||
if not dep["presentInDatabase"]: | ||
continue # skip dependencies that are not present in the database | ||
|
||
if dep in actual["notFoundLibraries"]: | ||
# not one class file of this dep was found in the uber-jar, then it most probably has no class files | ||
continue | ||
|
||
found = False | ||
gav = dep["groupId"] + ":" + dep["artifactId"] + ":" + dep["version"] | ||
|
||
for inferred_dep in actual["inferredLibraries"]: | ||
if inferred_dep["includedRatio"] < threshold: | ||
continue | ||
if gav == inferred_dep["gav"] or gav in inferred_dep["alternativeVersions"]: | ||
true_positives += 1 | ||
found = True | ||
break | ||
|
||
if not found: | ||
false_negatives += 1 | ||
|
||
nb_actual = sum( | ||
1 for inferred_dep in actual["inferredLibraries"] if inferred_dep["includedRatio"] >= threshold | ||
) | ||
false_positives = nb_actual - true_positives | ||
return (true_positives, false_positives, false_negatives) | ||
|
||
|
||
shadeConfigurations = [(True, True), (True, False), (False, True), (False, False)] | ||
for shadeConfig in shadeConfigurations: | ||
precisions = {} | ||
recalls = {} | ||
f1s = {} | ||
for threshold in [0.5, 0.75, 0.9, 0.95, 0.99, 1.0]: | ||
precisions[threshold] = [] | ||
recalls[threshold] = [] | ||
f1s[threshold] = [] | ||
for project_file in sorted(project_files): | ||
expected_data = None | ||
actual_data = None | ||
with open(os.path.join(project_dir, project_file), "r") as f: | ||
expected_data = json.load(f) | ||
|
||
if ( | ||
expected_data["shadeConfiguration"]["minimizeJar"] | ||
and not shadeConfig[0] | ||
): | ||
continue | ||
if ( | ||
expected_data["shadeConfiguration"]["relocation"] | ||
and not shadeConfig[1] | ||
): | ||
continue | ||
actual_file_path = os.path.join( | ||
result_dir, project_file.replace(".json", "_libraries.json") | ||
) | ||
if not os.path.exists(actual_file_path): | ||
continue | ||
with open(actual_file_path, "r") as f: | ||
actual_data = json.load(f) | ||
|
||
results = compare_results(expected_data, actual_data, threshold) | ||
|
||
precision = ( | ||
results[0] / (results[0] + results[1]) | ||
if results[0] + results[1] > 0 | ||
else 1 | ||
) | ||
recall = ( | ||
results[0] / (results[0] + results[2]) | ||
if results[0] + results[2] > 0 | ||
else 1 | ||
) | ||
f1 = ( | ||
2 * precision * recall / (precision + recall) | ||
if precision + recall > 0 | ||
else 0 | ||
) | ||
|
||
precisions[threshold].append(precision) | ||
recalls[threshold].append(recall) | ||
f1s[threshold].append(f1) | ||
# print(pfile, results, precision, recall, f1) | ||
|
||
print("minimizeJar:", shadeConfig[0], "relocation:", shadeConfig[1]) | ||
for threshold in [0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.0]: | ||
precision = sum(precisions[threshold]) / len(precisions[threshold]) | ||
recall = sum(recalls[threshold]) / len(recalls[threshold]) | ||
f1 = sum(f1s[threshold]) / len(f1s[threshold]) | ||
print("%.2f & %.3f & %.3f & %.3f \\\\" % (threshold, precision, recall, f1)) | ||
print("") |
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.