From c87c685de49d563799e9ea17ffd3b6e588434b5b Mon Sep 17 00:00:00 2001 From: Carl Flottmann Date: Thu, 30 Jan 2025 15:09:39 +1000 Subject: [PATCH] feat: support for semgrep rules, currently two implemented, with custom options --- src/macaron/config/defaults.ini | 6 +- .../sourcecode/pypi_sourcecode_analyzer.py | 130 +++------ .../pypi_malware_rules/exfiltration.yaml | 146 ++++++++++ .../pypi_malware_rules/obfuscation.yaml | 256 ++++++++++++++++++ .../package_registry/pypi_registry.py | 2 +- 5 files changed, 441 insertions(+), 99 deletions(-) create mode 100644 src/macaron/resources/pypi_malware_rules/exfiltration.yaml create mode 100644 src/macaron/resources/pypi_malware_rules/obfuscation.yaml diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini index 150929e55..999b3782c 100644 --- a/src/macaron/config/defaults.ini +++ b/src/macaron/config/defaults.ini @@ -595,6 +595,6 @@ epoch_threshold = 3 # The number of days +/- the day of publish the calendar versioning day may be. day_publish_error = 4 -# yaml configuration file containing suspicious patterns. Can be full path or relative to -# folder where macaron is installed. This will be normalised to the OS path type. -suspicious_patterns_file = src/macaron/malware_analyzer/pypi_heuristics/sourcecode/suspicious_patterns.yaml +# absolute path to where a custom set of semgrep rules for source code analysis are stored. These will be included +# with Macaron's default rules. The path will be normalised to the OS path type. +custom_semgrep_rules = diff --git a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py index d372c4fd3..beb5e553b 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py @@ -18,11 +18,9 @@ import subprocess # nosec import tempfile from collections import defaultdict -from typing import Any - -import yaml from macaron.config.defaults import defaults +from macaron.config.global_config import global_config from macaron.errors import ConfigurationError, HeuristicAnalyzerValueError, SourceCodeError from macaron.json_tools import JsonType, json_extract from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult @@ -58,79 +56,61 @@ class PyPISourcecodeAnalyzer: def __init__(self) -> None: """Collect required data for analysing the source code.""" - self.suspicious_patterns = self._load_defaults() - self.rule_files: list = [] + self.default_rule_path, self.custom_rule_path = self._load_defaults() + + def _load_defaults(self) -> tuple[str, str | None]: + """ + Load the default semgrep rules and, if present, the custom semgrep rules provided by the user. - def _load_defaults(self) -> dict[str, dict[str, list]]: - """Load the suspicious pattern from suspicious_pattern.yaml. + Semgrep validation is run on the custom rules provided by the user. Returns ------- - dict[str: dict[str, list]] - The suspicious pattern. + tuple[str, str | None] + The default rule path and the custom rule path or None if one was not provided Raises ------ ConfigurationError - if the suspicious pattern file is not in the expected format or cannot be accessed. + If the heuristic.pypi entry is not present, or if the semgrep validation of the custom rule path failed. """ - suspicious_patterns: dict[str, dict[str, list]] = {} + default_rule_path = os.path.join(global_config.resources_path, "pypi_malware_rules") section_name = "heuristic.pypi" if defaults.has_section(section_name): section = defaults[section_name] else: - error_msg = f"Unable to find section {section_name}, which is required to load suspicious patterns." + error_msg = f"Unable to find section {section_name}, which must be present." logger.debug(error_msg) raise ConfigurationError(error_msg) - configuration_name = "suspicious_patterns_file" - filename = section.get(configuration_name) - if filename is None: - error_msg = f"Unable to find {configuration_name} in configuration file." + configuration_name = "custom_semgrep_rules" + custom_rule_path = section.get(configuration_name) + if not custom_rule_path: # i.e. None or empty string + logger.debug("No custom path listed under %s, using default rules only.", configuration_name) + return default_rule_path, None + + custom_rule_path = os.path.normpath(custom_rule_path) + if not os.path.exists(custom_rule_path): + error_msg = f"Unable to locate path {custom_rule_path}" logger.debug(error_msg) raise ConfigurationError(error_msg) - filename = os.path.normpath(filename) + semgrep_commands: list[str] = ["semgrep", "scan", "--validate", "--config", custom_rule_path] try: - with open(filename, encoding="utf-8") as file: - configured_patterns: dict[str, JsonType] = yaml.safe_load(file) - except FileNotFoundError as file_error: - error_msg = f"Unable to locate {filename}" - logger.debug(error_msg) - raise ConfigurationError(error_msg) from file_error - except yaml.YAMLError as yaml_error: - error_msg = f"Unable to parse {filename} as a yaml file." + process = subprocess.run(semgrep_commands, check=True, capture_output=True) # nosec + except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as semgrep_error: + error_msg = f"Unable to run semgrep validation on {custom_rule_path} with arguments {semgrep_commands}: {semgrep_error}" logger.debug(error_msg) - raise ConfigurationError(error_msg) from yaml_error + raise ConfigurationError(error_msg) from semgrep_error - for expected_category in self.EXPECTED_PATTERN_CATEGORIES: - if expected_category not in configured_patterns: - error_msg = ( - f"Expected suspicious pattern category {expected_category} present in" - + f" {filename}: must have categories {self.EXPECTED_PATTERN_CATEGORIES}" - ) - logger.debug(error_msg) - raise ConfigurationError(error_msg) - - for category, patterns in configured_patterns.items(): - suspicious_patterns[category] = {} - if isinstance(patterns, list): - suspicious_patterns[category][category] = patterns - elif isinstance(patterns, dict): - for subcategory, subpatterns in patterns.items(): - if not isinstance(subpatterns, list): - error_msg = f"Expected subcategory {subcategory} items to be" + f" a list in {filename}" - logger.debug(error_msg) - raise ConfigurationError(error_msg) - - suspicious_patterns[category][subcategory] = subpatterns - else: - error_msg = f"Expected category {category} to be either a list" + f" or dictionary in {filename}" - logger.debug(error_msg) - raise ConfigurationError(error_msg) + if process.returncode != 0: + error_msg = f"Error running semgrep validation on {custom_rule_path} with arguments" f" {process.args}" + logger.debug(error_msg) + raise ConfigurationError(error_msg) - return suspicious_patterns + logger.debug("Including custom ruleset from %s.", custom_rule_path) + return default_rule_path, custom_rule_path def analyze_patterns(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]: """Analyze the source code of the package for malicious patterns. @@ -162,9 +142,9 @@ def analyze_patterns(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[Heu logger.debug(error_msg) raise HeuristicAnalyzerValueError(error_msg) - self._create_rules() - for rule_file in self.rule_files: - semgrep_commands.extend(["--config", rule_file.name]) + semgrep_commands.extend(["--config", self.default_rule_path]) + if self.custom_rule_path: + semgrep_commands.extend(["--config", self.custom_rule_path]) semgrep_commands.append(source_code_path) with tempfile.NamedTemporaryFile(mode="w+", delete=True) as output_json_file: @@ -203,8 +183,6 @@ def analyze_patterns(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[Heu end = json_extract(finding, ["end", "line"], int) analysis_result[category].append({"file": file, "start": start, "end": end}) - self._clear_rules() - return result, dict(analysis_result) def analyze_dataflow(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]: @@ -258,44 +236,6 @@ def analyze_dataflow(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[Heu return result, analysis_result - def _create_rules(self) -> None: - rule_list: list[dict[str, Any]] = [] - contents: dict = {} - - if self.rule_files: - self._clear_rules() - - # import rules - for category, patterns in self.suspicious_patterns[IMPORTS].items(): - rule: dict[str, Any] = {} - pattern_list: list = [] - - rule["id"] = category - rule["severity"] = "ERROR" - rule["languages"] = ["python"] - rule["message"] = f"Detected suspicious imports from the '{category}' category" - - for pattern in patterns: - pattern_list.append({"pattern": f"import {pattern}"}) - pattern_list.append({"pattern": f"from {pattern} import $X"}) - pattern_list.append({"pattern": f'__import__("{pattern}")'}) - - rule["pattern-either"] = pattern_list - rule_list.append(rule) - - contents = {"rules": rule_list} - - with tempfile.NamedTemporaryFile( - "w", prefix=f"{IMPORTS}_", suffix=".yaml", delete=False - ) as import_patterns_file: - yaml.dump(contents, import_patterns_file) - self.rule_files.append(import_patterns_file) - - def _clear_rules(self) -> None: - for file in self.rule_files: - file.close() - self.rule_files.clear() - class DataFlowTracer(ast.NodeVisitor): """The class is used to create the symbol table and analyze the dataflow.""" diff --git a/src/macaron/resources/pypi_malware_rules/exfiltration.yaml b/src/macaron/resources/pypi_malware_rules/exfiltration.yaml new file mode 100644 index 000000000..b0c8b078a --- /dev/null +++ b/src/macaron/resources/pypi_malware_rules/exfiltration.yaml @@ -0,0 +1,146 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +rules: +- id: remote-exfiltration + metadata: + description: Detected the exfiltration of data to a remote endpoint + message: Detected exfiltration of sensitive data to a remote endpoint. + languages: + - python + severity: ERROR + mode: taint + options: + symbolic_propagation: true + pattern-sources: + - pattern-either: + # result of code/command evaluation + - pattern: exec(...) + - pattern: eval(...) + - pattern: ast.literal_eval(...) + - pattern: builtins.exec(...) + - pattern: builtins.eval(...) + - pattern: __import__('builtins').exec(...) + - pattern: __import__('builtins').eval(...) + + # environment variables + - pattern: os.environ + - pattern: os.environ[...] + - pattern: os.environ.get(...) + - pattern: os.environb + - pattern: os.environb[...] + - pattern: os.environb.get(...) + - pattern: os.getenv(...) + - pattern: os.getenvb(...) + + # system information + - pattern: os.uname(...) + - pattern: os.confstr(...) + - pattern: os.confstr_names + - pattern: os.sysconf(...) + - pattern: os.sysconf_names + - pattern: platform.release(...) + - pattern: platform.version(...) + - pattern: platform.uname(...) + - pattern: platform.win32_ver(...) + - pattern: platform.win32_edition(...) + - pattern: platform.win32_is_iot(...) + - pattern: platform.mac_ver(...) + - pattern: platform.ios_ver(...) + - pattern: platform.libc_ver(...) + - pattern: platform.freedesktop_os_release(...) + - pattern: platform.android_ver(...) + + # network information + - pattern: psutil.net_connections(...) + - pattern: psutil.net_if_addrs(...) + - pattern: psutil.net_if_stats(...) + - pattern: platform.node(...) + - pattern: platform.platform(...) + - pattern: socket.gethostname(...) + - pattern: socket.gethostbyname(...) + - pattern: socket.gethostbyname_ex(...) + - pattern: socket.getfqdn(...) + - pattern: socket.if_nameindex(...) + + # user information + - pattern: psutil.users(...) + + # sensitive information + - pattern: getpass.getpass(...) + - pattern: getpass.unix_getpass(...) + - pattern: getpass.win_getpass(...) + - pattern: getpass.getuser(...) + - pattern: pwd.getpwuid(...) + - pattern: pwd.getpwnam(...) + - pattern: pwd.getpwall(...) + - pattern: keyring.get_keyring(...) + - pattern: keyring.get_password(...) + - pattern: keyring.get_credential(...) + - pattern: winreg.ConnectRegistry(...) + - pattern: winreg.LoadKey(...) + - pattern: winreg.OpenKey(...) + - pattern: winreg.OpenKeyEx(...) + - pattern: winreg.QueryInfoKey(...) + - pattern: winreg.QueryValue(...) + - pattern: winreg.QueryValueEx(...) + + pattern-sinks: + - pattern-either: + # remote connection + # using socket module + - pattern: socket.socket(...) + - pattern: $SOC.accept(...) + - pattern: $SOC.bind(...) + - pattern: $SOC.connect(...) + - pattern: $SOC.connect_ex(...) + - pattern: $SOC.listen(...) + - pattern: $SOC.recv(...) + - pattern: $SOC.recvfrom(...) + - pattern: $SOC.recvmsg(...) + - pattern: $SOC.recvmsg_into(...) + - pattern: $SOC.recvfrom_into(...) + - pattern: $SOC.recv_into(...) + - pattern: $SOC.send(...) + - pattern: $SOC.sendall(...) + - pattern: $SOC.sendto(...) + - pattern: $SOC.sendmsg(...) + - pattern: $SOC.sendmsg_afalg(...) + - pattern: $SOC.sendfile(...) + # using requests module + - pattern: requests.get(...) + - pattern: requests.post(...) + - pattern: requests.put(...) + - pattern: requests.delete(...) + - pattern: requests.head(...) + - pattern: requests.options(...) + - pattern: requests.Session(...) + - pattern: requests.Request(...) + # using urllib3 module + - pattern: urllib3.PoolManager(...) + - pattern: urllib3.request(...) + - pattern: urllib3.HTTPConnectionPool(...) + - pattern: urllib3.HTTPSConnectionPool(...) + - pattern: urllib3.ConnectionPool(...) + - pattern: urllib3.ProxyManager(...) + - pattern: urllib3.contrib.socks.SOCKSProxyManager(...) + # using urllib + - pattern: urllib.request(...) + - pattern: urllib.request.urlopen(...) + # using urlrequest module + - pattern: UrlRequest(...) + - pattern: UrlRequestRequests(...) + - pattern: UrlRequestUrllib(...) + # using httpx + - pattern: httpx.request(...) + - pattern: httpx.get(...) + - pattern: httpx.post(...) + - pattern: httpx.put(...) + - pattern: httpx.delete(...) + - pattern: httpx.head(...) + - pattern: httpx.options(...) + - pattern: httpx.stream(...) + - pattern: httpx.AsyncClient(...) + - pattern: httpx.AsyncHTTPTransport(...) + - pattern: httpx.Client(...) + - pattern: httpx.Request(...) diff --git a/src/macaron/resources/pypi_malware_rules/obfuscation.yaml b/src/macaron/resources/pypi_malware_rules/obfuscation.yaml new file mode 100644 index 000000000..5f3bf329c --- /dev/null +++ b/src/macaron/resources/pypi_malware_rules/obfuscation.yaml @@ -0,0 +1,256 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +rules: +- id: default-assigning + metadata: + description: Identifies when a default python function is assigned to another variable + message: Found an instance of assigning a builtin python function to a variable + languages: + - python + severity: ERROR + pattern-either: + # assigning, many obfuscation tools listed below do this + - pattern: $VAR = __import__ + - pattern: $VAR = getattr + - pattern: $VAR = bytes + - pattern: $VAR = bytearray + - pattern: $VAR = exec + - pattern: $VAR = eval + - pattern: $VAR = setattr + - pattern: $VAR = compile + - pattern: $VAR = map + - pattern: $VAR = open + - pattern: $VAR = zip + - pattern: $VAR = vars + - pattern: $VAR = dir + # doing the same using the builtins module + - pattern: $VAR = builtins.__import__ + - pattern: $VAR = builtins.getattr + - pattern: $VAR = builtins.bytes + - pattern: $VAR = builtins.bytearray + - pattern: $VAR = builtins.exec + - pattern: $VAR = builtins.eval + - pattern: $VAR = builtins.setattr + - pattern: $VAR = builtins.compile + - pattern: $VAR = builtins.map + - pattern: $VAR = builtins.open + - pattern: $VAR = builtins.zip + - pattern: $VAR = builtins.vars + - pattern: $VAR = builtins.dir + - pattern: $VAR = __import__('builtins').__import__ + - pattern: $VAR = __import__('builtins').getattr + - pattern: $VAR = __import__('builtins').bytes + - pattern: $VAR = __import__('builtins').bytearray + - pattern: $VAR = __import__('builtins').exec + - pattern: $VAR = __import__('builtins').eval + - pattern: $VAR = __import__('builtins').setattr + - pattern: $VAR = __import__('builtins').compile + - pattern: $VAR = __import__('builtins').builtins.map + - pattern: $VAR = __import__('builtins').open + - pattern: $VAR = __import__('builtins').zip + - pattern: $VAR = __import__('builtins').vars + - pattern: $VAR = __import__('builtins').dir + +- id: obfuscation-tools + metadata: + description: detects the use of python obfuscation packages on the source code + message: Found an instance of import and/or using python obfuscation tools + languages: + - python + severity: ERROR + pattern-either: + # pyarmor: pyarmor.readthedocs.io/en/latest/index.html + - pattern: import __pyarmor__ + - pattern: from $MODULE import __pyarmor__ + - pattern: from $MODULE import pyarmor_runtime + - pattern: __import__('__pyarmor__') + # pyarmor RTF mode: pyarmor.readthedocs.io/en/latest/tutorial/advanced.html + - pattern: __assert_armored__($PAYLOAD) + - patterns: + - pattern: | + def $FUNC_NAME(...): + ... + - metavariable-regex: + metavariable: $FUNC_NAME + regex: ^pyarmor__\d+$ + # inline pyarmor marker: pyarmor.readthedocs.io/en/latest/tutorial/advanced.html + - pattern-regex: ^# pyarmor:.? + # obfuscated names using pyob.oxyry.com with O, o, 0 or github.com/QQuick/Opy and pyobfuscate using l, I, 1 + - patterns: + - pattern: | + def $OBF(...): + ... + - pattern: | + class $OBF(...): + ... + - pattern: $OBF = ... + - metavariable-regex: + metavariable: $OBF + regex: (^_?[Oo0]|[1Il]+$) + # obfuscated using pyobfuscate.com + - pattern: pyobfuscate=... + # obfuscated using liftoff.github.io/pyminifier + - pattern: import mystificate + +- id: inline-imports + metadata: + description: detects the use of the private inline import __import__(...) + message: detected use of inline imports + languages: + - python + severity: ERROR + pattern: __import__($MODULE) + +- id: decode-and-execute + metadata: + description: detects the flow of a decoded or constructed string to process execution, code evaluation, network connections, or file writes + message: detected the flow of a decoded string value to a remote endpoint, process, code evaluation, or file write + languages: + - python + severity: ERROR + mode: taint + options: + symbolic_propagation: true + pattern-sources: + - pattern-either: + # marshal encryption + - pattern: marshal.loads(...) + - pattern: __import__('marshal').loads(...) + # bytes decoding + - pattern: | + "...".decode(...) + - pattern: $BYTES.decode(...) + - pattern: bytes.decode(...) + - pattern: $BYTES.join(...).decode() + # decompression + - pattern: zlib.decompress(...) + - pattern: __import__('zlib').decompress(...) + # base64 decoded string values + - pattern: base64.b64decode(...) + - pattern: __import__('base64').decode(...) + - pattern: b64decode(...) + # hex encoded values + - pattern: bytes.fromhex(...) + # unicode construction + - patterns: + - pattern: $STRING.join(map($FOO, [...])) + - pattern: $STRING.join($FOO($VAL) for $VAL in [...]) + - pattern: $STRING.join($FOO($VAL) for $VAL in $GEN(...)) + - metavariable-regex: + metavariable: $FOO + regex: unicode|unichr|chr|ord + + pattern-sinks: + - pattern-either: + # remote connection + # using socket module + - pattern: socket.socket(...) + - pattern: $SOC.accept(...) + - pattern: $SOC.bind(...) + - pattern: $SOC.connect(...) + - pattern: $SOC.connect_ex(...) + - pattern: $SOC.listen(...) + - pattern: $SOC.recv(...) + - pattern: $SOC.recvfrom(...) + - pattern: $SOC.recvmsg(...) + - pattern: $SOC.recvmsg_into(...) + - pattern: $SOC.recvfrom_into(...) + - pattern: $SOC.recv_into(...) + - pattern: $SOC.send(...) + - pattern: $SOC.sendall(...) + - pattern: $SOC.sendto(...) + - pattern: $SOC.sendmsg(...) + - pattern: $SOC.sendmsg_afalg(...) + - pattern: $SOC.sendfile(...) + # using requests module + - pattern: requests.get(...) + - pattern: requests.post(...) + - pattern: requests.put(...) + - pattern: requests.delete(...) + - pattern: requests.head(...) + - pattern: requests.options(...) + - pattern: requests.Session(...) + - pattern: requests.Request(...) + # using urllib3 module + - pattern: urllib3.PoolManager(...) + - pattern: urllib3.request(...) + - pattern: urllib3.HTTPConnectionPool(...) + - pattern: urllib3.HTTPSConnectionPool(...) + - pattern: urllib3.ConnectionPool(...) + - pattern: urllib3.ProxyManager(...) + - pattern: urllib3.contrib.socks.SOCKSProxyManager(...) + # using urllib + - pattern: urllib.request(...) + - pattern: urllib.request.urlopen(...) + # using urlrequest module + - pattern: UrlRequest(...) + - pattern: UrlRequestRequests(...) + - pattern: UrlRequestUrllib(...) + # using httpx + - pattern: httpx.request(...) + - pattern: httpx.get(...) + - pattern: httpx.post(...) + - pattern: httpx.put(...) + - pattern: httpx.delete(...) + - pattern: httpx.head(...) + - pattern: httpx.options(...) + - pattern: httpx.stream(...) + - pattern: httpx.AsyncClient(...) + - pattern: httpx.AsyncHTTPTransport(...) + - pattern: httpx.Client(...) + - pattern: httpx.Request(...) + + # process spawning + # using subprocess module + - pattern: subprocess.check_output(...) + - pattern: subprocess.check_call(...) + - pattern: subprocess.run(...) + - pattern: subprocess.call(...) + - pattern: subprocess.Popen(...) + - pattern: subprocess.getoutput(...) + - pattern: subprocess.getstatusoutput(...) + # using os module + - pattern: os.execl(...) + - pattern: os.execle(...) + - pattern: os.execlp(...) + - pattern: os.execlpe(...) + - pattern: os.execv(...) + - pattern: os.execve(...) + - pattern: os.execvp(...) + - pattern: os.execvpe(...) + - pattern: os.popen(...) + - pattern: os.posix_spawn(...) + - pattern: os.posix_spawnp(...) + - pattern: os.spawnl(...) + - pattern: os.spawnle(...) + - pattern: os.spawnlp(...) + - pattern: os.spawnlpe(...) + - pattern: os.spawnv(...) + - pattern: os.spawnve(...) + - pattern: os.spawnvp(...) + - pattern: os.spawnvpe(...) + - pattern: os.system(...) + # using commands module + - pattern: commands.getstatusoutput(...) + - pattern: commands.getoutput(...) + # using runpy module + - pattern: runpy.run_module(...) + - pattern: runpy.run_path(...) + + # code evaluation/execution + - pattern: exec(...) + - pattern: eval(...) + - pattern: ast.literal_eval(...) + - pattern: builtins.exec(...) + - pattern: builtins.eval(...) + - pattern: __import__('builtins').exec(...) + - pattern: __import__('builtins').eval(...) + + # file write + - pattern: $FILE.write(...) + - pattern: $MODULE.dumps(...) + - pattern: os.write(...) + - pattern: os.writev(...) + - pattern: os.pwrite(...) + - pattern: os.pwritev(...) diff --git a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py index f9122ec1b..813d3883d 100644 --- a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py @@ -267,7 +267,7 @@ def download_package_sourcecode(self, url: str) -> str: raise InvalidHTTPResponseError(error_msg) from read_error extracted_dir = os.listdir(temp_dir) - if len(extracted_dir) == 1 and re.sub(".tar.gz$", "", file_name) == extracted_dir[0]: + if len(extracted_dir) == 1 and package_name == extracted_dir[0]: # structure used package name and version as top-level directory temp_dir = os.path.join(temp_dir, extracted_dir[0])