Skip to content

Commit

Permalink
script to infer vulnerabilities
Browse files Browse the repository at this point in the history
  • Loading branch information
Cornul11 committed Nov 28, 2023
1 parent 94b0f57 commit e197ae3
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 8 deletions.
42 changes: 34 additions & 8 deletions util/artifact_downloader.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,67 @@
import json
import logging
import os

import requests
from tqdm import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm

logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)


def download_jar(artifact, version_key, folder="artifacts"):
"""Downloads jar from Maven Central"""

version_info = artifact.get(version_key)
if version_info and "Central" in version_info.get("repositories", []):
group_path = artifact["groupId"].replace(".", "/")
group_id = artifact["groupId"]
group_path = group_id.replace(".", "/")
artifact_id = artifact["artifactId"]
version = version_info["version"]

url = f"https://repo1.maven.org/maven2/{group_path}/{artifact_id}/{version}/{artifact_id}-{version}.jar"
file_path = os.path.join(folder, f"{artifact_id}-{version}.jar")

response = requests.head(url)
if response.status_code == 404:
tqdm_log(f"File {url} does not exist")
url = url.replace(".jar", ".pom")
response = requests.head(url)
if response.status_code == 200:
tqdm_log(f"File {url} exists, but is a POM file")
return

file_path = os.path.join(folder, group_id, f"{artifact_id}-{version}.jar")

if os.path.exists(file_path):
tqdm_log(f"File {file_path} already exists")
return

response = requests.get(url)
if response.status_code == 200:
os.makedirs(folder, exist_ok=True)
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, "wb") as f:
f.write(response.content)
else:
print(f"Failed to download {url} with status code {response.status_code}")
tqdm_log(
f"Failed to download {url} with status code {response.status_code}"
)


def tqdm_log(msg):
with logging_redirect_tqdm():
logging.error(msg)


def main(file_path):
with open(file_path, "r") as file:
artifacts = json.load(file)

for i, artifact in enumerate(artifacts, start=1):
for artifact in tqdm(artifacts, desc="Downloading artifacts", unit="artifact"):
download_jar(artifact, "mostUsedVersion")
download_jar(artifact, "mostUsedVulnerableVersion")

if i >= 5:
break


if __name__ == "__main__":
import sys
Expand Down
56 changes: 56 additions & 0 deletions util/infer_and_save.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import logging
import os
import subprocess

from tqdm import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm

logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)


def main(artifacts_path, custom_cwd):
java_command = "/usr/lib/jvm/java-11-openjdk-amd64/bin/java"
java_options = "-Xmx8g"
classpath = "target/dependency/*:target/thesis-1.0-SNAPSHOT.jar"
main_class = "nl.tudelft.cornul11.thesis.corpus.MainApp"
mode = "-m IDENTIFICATION_MODE"

for root, dirs, files in tqdm(os.walk(artifacts_path)):
for file in files:
if file.endswith(".jar"):
jar_path = os.path.join(root, file)
output_path = jar_path.replace(".jar", ".json")
command = [
java_command,
java_options,
"-cp",
classpath,
main_class,
mode,
"-f",
jar_path,
"-o",
output_path,
]

logging.info(f"Running command: {' '.join(command)}")
result = subprocess.run(command, cwd=custom_cwd, shell=True)
if result.returncode != 0:
tqdm_log(f"Failed to run command: {' '.join(command)}")
else:
tqdm_log(f"Successfully ran command: {' '.join(command)}")


def tqdm_log(msg):
with logging_redirect_tqdm():
logging.error(msg)


if __name__ == "__main__":
import sys

artifacts_path = sys.argv[1]
custom_cwd = sys.argv[2]
main(artifacts_path, custom_cwd)

0 comments on commit e197ae3

Please sign in to comment.