From e1760d9aabb357033b739e9325b45f2345522f7b Mon Sep 17 00:00:00 2001
From: mayeut <mayeut@users.noreply.github.com>
Date: Sun, 7 Jan 2024 14:57:17 +0100
Subject: [PATCH] fix: `--exclude libfoo.so` shall ignore dependencies of
 `libfoo.so`

When using `--exclude libfoo.so`, dependencies of `libfoo.so` are still being analyzed & grafted.
This commit moves the exclusion analysis to `lddtree` and filters  `libfoo.so` `DT_NEEDED` entries thus excluding its dependencies from the tree.
---
 src/auditwheel/lddtree.py                |  9 ++++-
 src/auditwheel/main_repair.py            |  7 ++--
 src/auditwheel/main_show.py              |  2 +-
 src/auditwheel/repair.py                 |  8 ++---
 src/auditwheel/wheel_abi.py              | 12 ++++---
 tests/integration/test_bundled_wheels.py |  6 ++--
 tests/integration/test_manylinux.py      | 44 ++++++++++--------------
 tests/unit/test_wheel_abi.py             |  2 +-
 8 files changed, 48 insertions(+), 42 deletions(-)

diff --git a/src/auditwheel/lddtree.py b/src/auditwheel/lddtree.py
index 22a79388..18064d87 100644
--- a/src/auditwheel/lddtree.py
+++ b/src/auditwheel/lddtree.py
@@ -300,6 +300,7 @@ def lddtree(
     prefix: str = "",
     ldpaths: dict[str, list[str]] | None = None,
     display: str | None = None,
+    exclude: frozenset[str] = frozenset(),
     _first: bool = True,
     _all_libs: dict = {},
 ) -> dict:
@@ -320,6 +321,8 @@ def lddtree(
         will be called.
     display
         The path to show rather than ``path``
+    exclude
+        List of soname (DT_NEEDED) to exclude from the tree
     _first
         Recursive use only; is this the first ELF?
     _all_libs
@@ -402,7 +405,10 @@ def lddtree(
                 elif t.entry.d_tag == "DT_RUNPATH":
                     runpaths = parse_ld_paths(t.runpath, path=path, root=root)
                 elif t.entry.d_tag == "DT_NEEDED":
-                    libs.append(t.needed)
+                    if t.needed in exclude:
+                        log.info(f"Excluding {t.needed}")
+                    else:
+                        libs.append(t.needed)
             if runpaths:
                 # If both RPATH and RUNPATH are set, only the latter is used.
                 rpaths = []
@@ -449,6 +455,7 @@ def lddtree(
                     prefix,
                     ldpaths,
                     display=fullpath,
+                    exclude=exclude,
                     _first=False,
                     _all_libs=_all_libs,
                 )
diff --git a/src/auditwheel/main_repair.py b/src/auditwheel/main_repair.py
index a89de776..2b7249da 100644
--- a/src/auditwheel/main_repair.py
+++ b/src/auditwheel/main_repair.py
@@ -109,6 +109,8 @@ def execute(args, p):
     from .repair import repair_wheel
     from .wheel_abi import NonPlatformWheel, analyze_wheel_abi
 
+    exclude = frozenset(args.EXCLUDE)
+    patcher = Patchelf()
     wheel_policy = WheelPolicies()
 
     for wheel_file in args.WHEEL_FILE:
@@ -121,7 +123,7 @@ def execute(args, p):
             os.makedirs(args.WHEEL_DIR)
 
         try:
-            wheel_abi = analyze_wheel_abi(wheel_policy, wheel_file)
+            wheel_abi = analyze_wheel_abi(wheel_policy, wheel_file, exclude)
         except NonPlatformWheel:
             logger.info(NonPlatformWheel.LOG_MESSAGE)
             return 1
@@ -168,7 +170,6 @@ def execute(args, p):
                 higher_policy = wheel_policy.get_policy_by_name(wheel_abi.overall_tag)
                 abis = [higher_policy["name"]] + higher_policy["aliases"] + abis
 
-        patcher = Patchelf()
         out_wheel = repair_wheel(
             wheel_policy,
             wheel_file,
@@ -177,7 +178,7 @@ def execute(args, p):
             out_dir=args.WHEEL_DIR,
             update_tags=args.UPDATE_TAGS,
             patcher=patcher,
-            exclude=args.EXCLUDE,
+            exclude=exclude,
             strip=args.STRIP,
         )
 
diff --git a/src/auditwheel/main_show.py b/src/auditwheel/main_show.py
index 576c0eb6..d2157ea6 100644
--- a/src/auditwheel/main_show.py
+++ b/src/auditwheel/main_show.py
@@ -35,7 +35,7 @@ def execute(args, p):
         p.error("cannot access %s. No such file" % args.WHEEL_FILE)
 
     try:
-        winfo = analyze_wheel_abi(wheel_policy, args.WHEEL_FILE)
+        winfo = analyze_wheel_abi(wheel_policy, args.WHEEL_FILE, frozenset())
     except NonPlatformWheel:
         logger.info(NonPlatformWheel.LOG_MESSAGE)
         return 1
diff --git a/src/auditwheel/repair.py b/src/auditwheel/repair.py
index c172b471..798ae7c0 100644
--- a/src/auditwheel/repair.py
+++ b/src/auditwheel/repair.py
@@ -39,10 +39,10 @@ def repair_wheel(
     out_dir: str,
     update_tags: bool,
     patcher: ElfPatcher,
-    exclude: list[str],
+    exclude: frozenset[str],
     strip: bool = False,
 ) -> str | None:
-    external_refs_by_fn = get_wheel_elfdata(wheel_policy, wheel_path)[1]
+    external_refs_by_fn = get_wheel_elfdata(wheel_policy, wheel_path, exclude)[1]
 
     # Do not repair a pure wheel, i.e. has no external refs
     if not external_refs_by_fn:
@@ -72,9 +72,7 @@ def repair_wheel(
             ext_libs: dict[str, str] = v[abis[0]]["libs"]
             replacements: list[tuple[str, str]] = []
             for soname, src_path in ext_libs.items():
-                if soname in exclude:
-                    logger.info(f"Excluding {soname}")
-                    continue
+                assert soname not in exclude
 
                 if src_path is None:
                     raise ValueError(
diff --git a/src/auditwheel/wheel_abi.py b/src/auditwheel/wheel_abi.py
index e4a9446b..16ae9d7e 100644
--- a/src/auditwheel/wheel_abi.py
+++ b/src/auditwheel/wheel_abi.py
@@ -52,7 +52,9 @@ class NonPlatformWheel(WheelAbiError):
 
 
 @functools.lru_cache
-def get_wheel_elfdata(wheel_policy: WheelPolicies, wheel_fn: str):
+def get_wheel_elfdata(
+    wheel_policy: WheelPolicies, wheel_fn: str, exclude: frozenset[str]
+):
     full_elftree = {}
     nonpy_elftree = {}
     full_external_refs = {}
@@ -80,7 +82,7 @@ def get_wheel_elfdata(wheel_policy: WheelPolicies, wheel_fn: str):
             # to fail and there's no need to do further checks
             if not shared_libraries_in_purelib:
                 log.debug("processing: %s", fn)
-                elftree = lddtree(fn)
+                elftree = lddtree(fn, exclude=exclude)
 
                 for key, value in elf_find_versioned_symbols(elf):
                     log.debug("key %s, value %s", key, value)
@@ -227,7 +229,9 @@ def get_symbol_policies(
     return result
 
 
-def analyze_wheel_abi(wheel_policy: WheelPolicies, wheel_fn: str) -> WheelAbIInfo:
+def analyze_wheel_abi(
+    wheel_policy: WheelPolicies, wheel_fn: str, exclude: frozenset[str]
+) -> WheelAbIInfo:
     external_refs = {
         p["name"]: {"libs": {}, "blacklist": {}, "priority": p["priority"]}
         for p in wheel_policy.policies
@@ -239,7 +243,7 @@ def analyze_wheel_abi(wheel_policy: WheelPolicies, wheel_fn: str) -> WheelAbIInf
         versioned_symbols,
         has_ucs2,
         uses_PyFPE_jbuf,
-    ) = get_wheel_elfdata(wheel_policy, wheel_fn)
+    ) = get_wheel_elfdata(wheel_policy, wheel_fn, exclude)
 
     for fn in elftree_by_fn.keys():
         update(external_refs, external_refs_by_fn[fn])
diff --git a/tests/integration/test_bundled_wheels.py b/tests/integration/test_bundled_wheels.py
index bf2454b5..02dcc247 100644
--- a/tests/integration/test_bundled_wheels.py
+++ b/tests/integration/test_bundled_wheels.py
@@ -29,14 +29,16 @@
 )
 def test_analyze_wheel_abi(file, external_libs):
     wheel_policies = WheelPolicies(libc=Libc.GLIBC, arch="x86_64")
-    winfo = analyze_wheel_abi(wheel_policies, str(HERE / file))
+    winfo = analyze_wheel_abi(wheel_policies, str(HERE / file), frozenset())
     assert set(winfo.external_refs["manylinux_2_5_x86_64"]["libs"]) == external_libs
 
 
 def test_analyze_wheel_abi_pyfpe():
     wheel_policies = WheelPolicies(libc=Libc.GLIBC, arch="x86_64")
     winfo = analyze_wheel_abi(
-        wheel_policies, str(HERE / "fpewheel-0.0.0-cp35-cp35m-linux_x86_64.whl")
+        wheel_policies,
+        str(HERE / "fpewheel-0.0.0-cp35-cp35m-linux_x86_64.whl"),
+        frozenset(),
     )
     assert (
         winfo.sym_tag == "manylinux_2_5_x86_64"
diff --git a/tests/integration/test_manylinux.py b/tests/integration/test_manylinux.py
index 39b33dc4..c6335f86 100644
--- a/tests/integration/test_manylinux.py
+++ b/tests/integration/test_manylinux.py
@@ -313,51 +313,45 @@ def test_build_repair_numpy(
         # at once in the same Python program:
         docker_exec(docker_python, ["python", "-c", "'import numpy; import foo'"])
 
-    @pytest.mark.skipif(
-        PLATFORM != "x86_64", reason="Only needs checking on one platform"
-    )
     def test_repair_exclude(self, any_manylinux_container, io_folder):
         """Test the --exclude argument to avoid grafting certain libraries."""
 
         policy, tag, manylinux_ctr = any_manylinux_container
 
-        orig_wheel = build_numpy(manylinux_ctr, policy, io_folder)
-        assert orig_wheel == ORIGINAL_NUMPY_WHEEL
+        test_path = "/auditwheel_src/tests/integration/testrpath"
+        build_cmd = (
+            f"cd {test_path} && "
+            "if [ -d ./build ]; then rm -rf ./build ./*.egg-info; fi && "
+            "python setup.py bdist_wheel -d /io"
+        )
+        docker_exec(manylinux_ctr, ["bash", "-c", build_cmd])
+        filenames = os.listdir(io_folder)
+        assert filenames == [f"testrpath-0.0.1-{PYTHON_ABI}-linux_{PLATFORM}.whl"]
+        orig_wheel = filenames[0]
         assert "manylinux" not in orig_wheel
 
-        # Exclude libgfortran from grafting into the wheel
-        excludes = {
-            "manylinux_2_5_x86_64": ["libgfortran.so.1", "libgfortran.so.3"],
-            "manylinux_2_12_x86_64": ["libgfortran.so.3", "libgfortran.so.5"],
-            "manylinux_2_17_x86_64": ["libgfortran.so.3", "libgfortran.so.5"],
-            "manylinux_2_28_x86_64": ["libgfortran.so.5"],
-            "musllinux_1_1_x86_64": ["libgfortran.so.5"],
-        }[policy]
-
         repair_command = [
+            "env",
+            f"LD_LIBRARY_PATH={test_path}/a:$LD_LIBRARY_PATH",
             "auditwheel",
             "repair",
-            "--plat",
-            policy,
+            f"--plat={policy}",
             "--only-plat",
             "-w",
             "/io",
+            "--exclude=liba.so",
+            f"/io/{orig_wheel}",
         ]
-        for exclude in excludes:
-            repair_command.extend(["--exclude", exclude])
-        repair_command.append(f"/io/{orig_wheel}")
         output = docker_exec(manylinux_ctr, repair_command)
-
-        for exclude in excludes:
-            assert f"Excluding {exclude}" in output
+        assert "Excluding liba.so" in output
         filenames = os.listdir(io_folder)
         assert len(filenames) == 2
-        repaired_wheel = f"numpy-{NUMPY_VERSION}-{PYTHON_ABI}-{tag}.whl"
+        repaired_wheel = f"testrpath-0.0.1-{PYTHON_ABI}-{tag}.whl"
         assert repaired_wheel in filenames
 
-        # Make sure we don't have libgfortran in the result
+        # Make sure we don't have liba.so & libb.so in the result
         contents = zipfile.ZipFile(os.path.join(io_folder, repaired_wheel)).namelist()
-        assert not any(x for x in contents if "/libgfortran" in x)
+        assert not any(x for x in contents if "/liba" in x or "/libb" in x)
 
     def test_build_wheel_with_binary_executable(
         self, any_manylinux_container, docker_python, io_folder
diff --git a/tests/unit/test_wheel_abi.py b/tests/unit/test_wheel_abi.py
index d0def8e8..c8d0e253 100644
--- a/tests/unit/test_wheel_abi.py
+++ b/tests/unit/test_wheel_abi.py
@@ -48,6 +48,6 @@ def test_finds_shared_library_in_purelib(self, filenames, message, monkeypatch):
         wheel_policy = WheelPolicies()
 
         with pytest.raises(RuntimeError) as exec_info:
-            wheel_abi.get_wheel_elfdata(wheel_policy, "/fakepath")
+            wheel_abi.get_wheel_elfdata(wheel_policy, "/fakepath", frozenset())
 
         assert exec_info.value.args == (message,)