Skip to content

Commit

Permalink
pushed all code used in the experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
Cornul11 committed Mar 18, 2024
1 parent 4fddc48 commit cf6c0c3
Show file tree
Hide file tree
Showing 6 changed files with 32,501 additions and 0 deletions.
31 changes: 31 additions & 0 deletions util/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
Create the pom_info table:

```sql
CREATE TABLE `pom_info` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`library_id` int(11) NOT NULL,
`has_assembly_plugin` tinyint(1) DEFAULT 0,
`has_shade_plugin` tinyint(1) DEFAULT 0,
`has_dependency_reduced_pom` tinyint(1) DEFAULT 0,
`has_minimize_jar` tinyint(1) DEFAULT 0,
`has_relocations` tinyint(1) DEFAULT 0,
`has_filters` tinyint(1) DEFAULT 0,
`has_transformers` tinyint(1) DEFAULT 0,
`parent_id` int(11) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `idx_library_id` (`library_id`),
KEY `fk_parent_id` (`parent_id`),
CONSTRAINT `fk_library_info` FOREIGN KEY (`library_id`) REFERENCES `libraries` (`id`),
CONSTRAINT `fk_parent_id` FOREIGN KEY (`parent_id`) REFERENCES `libraries` (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci
```



CREATE TABLE `signatures_memory` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`library_id` int(11) NOT NULL,
`class_hash` bigint(20) NOT NULL,
`class_crc` bigint(20) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1175227255 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci
184 changes: 184 additions & 0 deletions util/evaluation_results.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-10T17:45:32.372659Z",
"start_time": "2024-03-10T17:45:32.368224Z"
}
},
"outputs": [],
"source": [
"import os, json"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-12T13:22:34.058895Z",
"start_time": "2024-03-12T13:22:33.171578Z"
}
},
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[7], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m project_dir \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../projects_metadata\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2\u001b[0m result_dir \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../evaluation\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m project_files \u001b[38;5;241m=\u001b[39m \u001b[43mos\u001b[49m\u001b[38;5;241m.\u001b[39mlistdir(project_dir)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompare_results\u001b[39m(expected, actual, threshold\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.95\u001b[39m):\n\u001b[1;32m 7\u001b[0m true_positives \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n",
"Cell \u001b[0;32mIn[7], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m project_dir \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../projects_metadata\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2\u001b[0m result_dir \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../evaluation\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m project_files \u001b[38;5;241m=\u001b[39m \u001b[43mos\u001b[49m\u001b[38;5;241m.\u001b[39mlistdir(project_dir)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompare_results\u001b[39m(expected, actual, threshold\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.95\u001b[39m):\n\u001b[1;32m 7\u001b[0m true_positives \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n",
"File \u001b[0;32m_pydevd_bundle/pydevd_cython.pyx:1457\u001b[0m, in \u001b[0;36m_pydevd_bundle.pydevd_cython.SafeCallWrapper.__call__\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m_pydevd_bundle/pydevd_cython.pyx:701\u001b[0m, in \u001b[0;36m_pydevd_bundle.pydevd_cython.PyDBFrame.trace_dispatch\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m_pydevd_bundle/pydevd_cython.pyx:1395\u001b[0m, in \u001b[0;36m_pydevd_bundle.pydevd_cython.PyDBFrame.trace_dispatch\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m_pydevd_bundle/pydevd_cython.pyx:1344\u001b[0m, in \u001b[0;36m_pydevd_bundle.pydevd_cython.PyDBFrame.trace_dispatch\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m_pydevd_bundle/pydevd_cython.pyx:312\u001b[0m, in \u001b[0;36m_pydevd_bundle.pydevd_cython.PyDBFrame.do_wait_suspend\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m~/tudelft/master_thesis/jar-vulnerability-detection/util/venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/pydevd.py:2070\u001b[0m, in \u001b[0;36mPyDB.do_wait_suspend\u001b[0;34m(self, thread, frame, event, arg, exception_type)\u001b[0m\n\u001b[1;32m 2067\u001b[0m from_this_thread\u001b[38;5;241m.\u001b[39mappend(frame_custom_thread_id)\n\u001b[1;32m 2069\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_threads_suspended_single_notification\u001b[38;5;241m.\u001b[39mnotify_thread_suspended(thread_id, thread, stop_reason):\n\u001b[0;32m-> 2070\u001b[0m keep_suspended \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_do_wait_suspend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mthread\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mframe\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mevent\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuspend_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrom_this_thread\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mframes_tracker\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2072\u001b[0m frames_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 2074\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m keep_suspended:\n\u001b[1;32m 2075\u001b[0m \u001b[38;5;66;03m# This means that we should pause again after a set next statement.\u001b[39;00m\n",
"File \u001b[0;32m~/tudelft/master_thesis/jar-vulnerability-detection/util/venv/lib/python3.11/site-packages/debugpy/_vendored/pydevd/pydevd.py:2106\u001b[0m, in \u001b[0;36mPyDB._do_wait_suspend\u001b[0;34m(self, thread, frame, event, arg, suspend_type, from_this_thread, frames_tracker)\u001b[0m\n\u001b[1;32m 2103\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_input_hook()\n\u001b[1;32m 2105\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprocess_internal_commands()\n\u001b[0;32m-> 2106\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(\u001b[38;5;241m0.01\u001b[39m)\n\u001b[1;32m 2108\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcancel_async_evaluation(get_current_thread_id(thread), \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mid\u001b[39m(frame)))\n\u001b[1;32m 2110\u001b[0m \u001b[38;5;66;03m# process any stepping instructions\u001b[39;00m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"project_dir = \"../projects_metadata\"\n",
"result_dir = \"../evaluation\"\n",
"project_files = os.listdir(project_dir)\n",
"\n",
"\n",
"def compare_results(expected, actual, threshold=0.95):\n",
" true_positives = 0\n",
" false_positives = 0\n",
" false_negatives = 0\n",
"\n",
" for dep in expected['effectiveDependencies']:\n",
" if not dep['presentInDatabase']:\n",
" continue # skip dependencies that are not present in the database\n",
"\n",
" if dep in actual['notFoundLibraries']:\n",
" # not one class file of this dep was found in the uber-jar, then it most probably has no class files\n",
" continue\n",
"\n",
" found = False\n",
" gav = dep['groupId'] + \":\" + dep['artifactId'] + \":\" + dep['version']\n",
"\n",
" for inferred_dep in actual['inferredLibraries']:\n",
" if inferred_dep['includedRatio'] < threshold:\n",
" continue\n",
" if gav == inferred_dep['gav'] or gav in inferred_dep['alternativeVersions']:\n",
" true_positives += 1\n",
" found = True\n",
" break\n",
"\n",
" if not found:\n",
" false_negatives += 1\n",
"\n",
" nb_actual = sum(1 for inferred_dep in actual if inferred_dep['includedRatio'] >= threshold)\n",
" false_positives = nb_actual - true_positives\n",
" return (true_positives, false_positives, false_negatives)\n",
"\n",
"\n",
"shadeConfigurations = [(True, True), (True, False), (False, True), (False, False)]\n",
"for shadeConfig in shadeConfigurations:\n",
" precisions = {}\n",
" recalls = {}\n",
" f1s = {}\n",
" for threshold in [0.5, 0.75, 0.9, 0.95, 0.99, 1.0]:\n",
" precisions[threshold] = []\n",
" recalls[threshold] = []\n",
" f1s[threshold] = []\n",
" for project_file in sorted(project_files):\n",
" expected_data = None\n",
" actual_data = None\n",
" with open(os.path.join(project_dir, project_file), \"r\") as f:\n",
" expected_data = json.load(f)\n",
"\n",
" if expected_data['shadeConfiguration']['minimizeJar'] and not shadeConfig[0]:\n",
" continue\n",
" if expected_data['shadeConfiguration']['relocation'] and not shadeConfig[1]:\n",
" continue\n",
" actual_file_path = os.path.join(result_dir, project_file.replace(\".json\", \"_libraries.json\"))\n",
" if not os.path.exists(actual_file_path):\n",
" continue\n",
" with open(actual_file_path, \"r\") as f:\n",
" actual_data = json.load(f)\n",
"\n",
" results = compare_results(expected_data, actual_data, threshold)\n",
"\n",
" precision = results[0] / (results[0] + results[1]) if results[0] + results[1] > 0 else 1\n",
" recall = results[0] / (results[0] + results[2]) if results[0] + results[2] > 0 else 1\n",
" f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0\n",
"\n",
" precisions[threshold].append(precision)\n",
" recalls[threshold].append(recall)\n",
" f1s[threshold].append(f1)\n",
" # print(pfile, results, precision, recall, f1)\n",
"\n",
" pass\n",
" print(\"minimizeJar:\", shadeConfig[0], \"relocation:\", shadeConfig[1])\n",
" for threshold in [0.5, 0.75, 0.9, 0.95, 0.99, 1.0]:\n",
" precision = sum(precisions[threshold]) / len(precisions[threshold])\n",
" recall = sum(recalls[threshold]) / len(recalls[threshold])\n",
" f1 = sum(f1s[threshold]) / len(f1s[threshold])\n",
" print(\"%.2f & %.3f & %.3f & %.3f \\\\\\\\\" % (threshold, precision, recall, f1))\n",
" print(\"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
105 changes: 105 additions & 0 deletions util/evaluation_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import json
import os

project_dir = "../projects_metadata"
result_dir = "../evaluation"
project_files = os.listdir(project_dir)


def compare_results(expected, actual, threshold=0.95):
true_positives = 0
false_positives = 0
false_negatives = 0

for dep in expected["effectiveDependencies"]:
if not dep["presentInDatabase"]:
continue # skip dependencies that are not present in the database

if dep in actual["notFoundLibraries"]:
# not one class file of this dep was found in the uber-jar, then it most probably has no class files
continue

found = False
gav = dep["groupId"] + ":" + dep["artifactId"] + ":" + dep["version"]

for inferred_dep in actual["inferredLibraries"]:
if inferred_dep["includedRatio"] < threshold:
continue
if gav == inferred_dep["gav"] or gav in inferred_dep["alternativeVersions"]:
true_positives += 1
found = True
break

if not found:
false_negatives += 1

nb_actual = sum(
1 for inferred_dep in actual["inferredLibraries"] if inferred_dep["includedRatio"] >= threshold
)
false_positives = nb_actual - true_positives
return (true_positives, false_positives, false_negatives)


shadeConfigurations = [(True, True), (True, False), (False, True), (False, False)]
for shadeConfig in shadeConfigurations:
precisions = {}
recalls = {}
f1s = {}
for threshold in [0.5, 0.75, 0.9, 0.95, 0.99, 1.0]:
precisions[threshold] = []
recalls[threshold] = []
f1s[threshold] = []
for project_file in sorted(project_files):
expected_data = None
actual_data = None
with open(os.path.join(project_dir, project_file), "r") as f:
expected_data = json.load(f)

if (
expected_data["shadeConfiguration"]["minimizeJar"]
and not shadeConfig[0]
):
continue
if (
expected_data["shadeConfiguration"]["relocation"]
and not shadeConfig[1]
):
continue
actual_file_path = os.path.join(
result_dir, project_file.replace(".json", "_libraries.json")
)
if not os.path.exists(actual_file_path):
continue
with open(actual_file_path, "r") as f:
actual_data = json.load(f)

results = compare_results(expected_data, actual_data, threshold)

precision = (
results[0] / (results[0] + results[1])
if results[0] + results[1] > 0
else 1
)
recall = (
results[0] / (results[0] + results[2])
if results[0] + results[2] > 0
else 1
)
f1 = (
2 * precision * recall / (precision + recall)
if precision + recall > 0
else 0
)

precisions[threshold].append(precision)
recalls[threshold].append(recall)
f1s[threshold].append(f1)
# print(pfile, results, precision, recall, f1)

print("minimizeJar:", shadeConfig[0], "relocation:", shadeConfig[1])
for threshold in [0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.0]:
precision = sum(precisions[threshold]) / len(precisions[threshold])
recall = sum(recalls[threshold]) / len(recalls[threshold])
f1 = sum(f1s[threshold]) / len(f1s[threshold])
print("%.2f & %.3f & %.3f & %.3f \\\\" % (threshold, precision, recall, f1))
print("")
121 changes: 121 additions & 0 deletions util/graph_creator.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit cf6c0c3

Please sign in to comment.