Skip to content

Commit 0b75107

Browse files
authored
add wheel dependency check (#3064)
1 parent 4842b58 commit 0b75107

File tree

4 files changed

+374
-0
lines changed

4 files changed

+374
-0
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
name: PaddlePaddle Packages Dependency So Analysis
2+
description: "PaddlePaddle Packages Dependency So Analysis Tool"
3+
4+
inputs:
5+
ce_task_name:
6+
description: "Ce Task Name"
7+
required: true
8+
is_cuda:
9+
description: "Is CUDA"
10+
required: false
11+
default: "True"
12+
python_version:
13+
description: "Python Version"
14+
required: false
15+
default: "3.10"
16+
17+
runs:
18+
using: "composite"
19+
steps:
20+
- name: Set up Python
21+
uses: actions/setup-python@v5
22+
with:
23+
python-version: ${{ inputs.python_version }}
24+
25+
- name: Download and Analyze Wheel
26+
shell: bash
27+
run: |
28+
set -x
29+
python -m pip install requests
30+
mkdir -p SoAnalysisDir
31+
python publish_scripts/so_dependency_analyzer/wheel_download.py \
32+
--ce_task_name=${{ inputs.ce_task_name }} \
33+
--is_cuda=${{ inputs.is_cuda }} \
34+
--python_version=${{ inputs.python_version }} \
35+
--output_dir=SoAnalysisDir
36+
37+
so_dependency_file=$(readlink -f publish_scripts/so_dependency_analyzer/so_dependency_analyzer.py)
38+
cd SoAnalysisDir
39+
unzip *.whl -d .
40+
python $so_dependency_file extract
41+
result_file_path=$(readlink -f so_dependencies_static.json)
42+
description_file_path=$(readlink -f description.txt)
43+
echo "result_file_path=${result_file_path}" >> $GITHUB_ENV
44+
echo "description_file_path=${description_file_path}" >> $GITHUB_ENV
45+
echo "ce_task_name=${{ inputs.ce_task_name }}" >> $GITHUB_ENV
46+
echo "is_cuda=${{ inputs.is_cuda }}" >> $GITHUB_ENV
47+
echo "python_version=${{ inputs.python_version }}" >> $GITHUB_ENV
48+
49+
outputs:
50+
result_file_path:
51+
description: 'The path of the result file'
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
name: PaddlePaddle Packages Dependency So Analysis
2+
run-name: PaddlePaddle Packages Dependency So Analysis
3+
4+
on:
5+
workflow_dispatch:
6+
inputs:
7+
baselinesetting:
8+
description: 'so denpendency baseline setting'
9+
required: false
10+
default: 'OFF'
11+
branch_name:
12+
required: false
13+
type: string
14+
default: 'Develop'
15+
workflow_call:
16+
inputs:
17+
baselinesetting:
18+
required: false
19+
type: string
20+
default: 'OFF'
21+
branch_name:
22+
required: false
23+
type: string
24+
default: 'Develop'
25+
schedule:
26+
- cron: "0 20 * * *"
27+
28+
jobs:
29+
so-analysis:
30+
environment: CodeSync
31+
runs-on: ubuntu-latest
32+
strategy:
33+
fail-fast: false
34+
matrix:
35+
ce_task_name:
36+
- "TagBuild-Training-Linux-Gpu-Cuda11.8-Cudnn8.6-Mkl-Avx-Gcc8.2-SelfBuiltPypiUse"
37+
- "TagBuild-Training-Linux-Cpu-Mkl-Avx-Gcc82-SelfBuiltPypiUse"
38+
- "TagBuild-Training-Linux-Cpu-ARM-SelfBuiltPypiUse"
39+
- "TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse"
40+
python_version: ["3.8", "3.10", "3.13"]
41+
is_cuda: ["True", "False"]
42+
exclude:
43+
- ce_task_name: "TagBuild-Training-Linux-Cpu-Mkl-Avx-Gcc82-SelfBuiltPypiUse"
44+
is_cuda: "True"
45+
- ce_task_name: "TagBuild-Training-Linux-Cpu-ARM-SelfBuiltPypiUse"
46+
is_cuda: "True"
47+
- ce_task_name: "TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse"
48+
is_cuda: "False"
49+
- ce_task_name: "TagBuild-Training-Linux-Gpu-Cuda11.8-Cudnn8.6-Mkl-Avx-Gcc8.2-SelfBuiltPypiUse"
50+
is_cuda: "False"
51+
steps:
52+
- name: Checkout
53+
uses: actions/checkout@v3
54+
with:
55+
sparse-checkout: |
56+
.github
57+
publish_scripts
58+
tools
59+
fetch-depth: 1
60+
61+
- name: Wheel Download and Analysis
62+
id: analyze
63+
uses: ./.github/actions/so_dependency_analyzer
64+
with:
65+
ce_task_name: ${{ github.event.inputs.branch_name || inputs.branch_name || 'Develop'}}-${{ matrix.ce_task_name }}
66+
is_cuda: ${{ matrix.is_cuda }}
67+
python_version: ${{ matrix.python_version }}
68+
69+
- name: Upload so analysis result
70+
env:
71+
AK: ${{ secrets.BOS_AK }}
72+
SK: ${{ secrets.BOS_SK }}
73+
BASELINE_SETTING: ${{ github.event.inputs.baselinesetting || inputs.baselinesetting || 'OFF' }}
74+
run: |
75+
set -x
76+
tree -L 3
77+
echo "The ce_task_name input passed to the analyze step is: ${ce_task_name}"
78+
echo "The python_version input passed to the analyze step is: ${python_version}"
79+
echo "The result file is at: ${result_file_path}"
80+
cat ${result_file_path}
81+
echo "The description of the result file is:${description_file_path}"
82+
cp ${description_file_path} ./
83+
cp ${result_file_path} ./
84+
commit_id=$(grep "^commit_id:" description.txt | cut -d':' -f2)
85+
echo ${commit_id}
86+
python -m pip install bce-python-sdk==0.9.29
87+
if [[ ${BASELINE_SETTING} == "ON" ]];then
88+
python tools/bos_tools.py description.txt paddle-qa/so_analysis/${ce_task_name}/baseline/${python_version//./}
89+
python tools/bos_tools.py so_dependencies_static.json paddle-qa/so_analysis/${ce_task_name}/baseline/${python_version//./}
90+
else
91+
python tools/bos_tools.py description.txt paddle-qa/so_analysis/${ce_task_name}/${commit_id}/${python_version//./}
92+
python tools/bos_tools.py so_dependencies_static.json paddle-qa/so_analysis/${ce_task_name}/${commit_id}/${python_version//./}
93+
wget https://paddle-qa.bj.bcebos.com/so_analysis/${ce_task_name}/baseline/${python_version//./}/so_dependencies_static.json -O baseline_so_dependencies_static.json
94+
exit_code=0
95+
python publish_scripts/so_dependency_analyzer/so_dependency_analyzer.py compare baseline_so_dependencies_static.json so_dependencies_static.json || exit_code=$?
96+
if [ ${exit_code} -eq 0 ];then
97+
echo "No change detected in so dependencies."
98+
exit 0
99+
else
100+
echo "Change detected in so dependencies."
101+
exit 1
102+
fi
103+
fi
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import os
2+
import subprocess
3+
import json
4+
import sys
5+
6+
def find_so_files(root_dir):
7+
"""查找所有 .so 文件"""
8+
so_files = []
9+
for dirpath, _, filenames in os.walk(root_dir):
10+
for filename in filenames:
11+
if filename.endswith(".so") or ".so." in filename:
12+
full_path = os.path.join(dirpath, filename)
13+
so_files.append(full_path)
14+
return so_files
15+
16+
def get_so_dependencies_readelf(so_file):
17+
"""使用 readelf 获取 .so 的依赖"""
18+
try:
19+
output = subprocess.check_output(['readelf', '-d', so_file], stderr=subprocess.STDOUT, text=True)
20+
deps = []
21+
for line in output.splitlines():
22+
if 'Shared library' in line:
23+
start = line.find('[')
24+
end = line.find(']')
25+
if start != -1 and end != -1:
26+
lib_name = line[start+1:end]
27+
deps.append(lib_name)
28+
return deps
29+
except subprocess.CalledProcessError as e:
30+
return [f"Error: {e.output.strip()}"]
31+
32+
def compare_so_dependency_jsons(json1_path, json2_path):
33+
"""比较两个 .so 静态依赖 JSON 文件的差异"""
34+
with open(json1_path, 'r') as f1, open(json2_path, 'r') as f2:
35+
deps1 = json.load(f1)
36+
deps2 = json.load(f2)
37+
38+
so_files1 = set(deps1.keys())
39+
so_files2 = set(deps2.keys())
40+
41+
only_in_1 = so_files1 - so_files2
42+
only_in_2 = so_files2 - so_files1
43+
common_files = so_files1 & so_files2
44+
45+
diff_results = []
46+
47+
if only_in_1:
48+
diff_results.append(f"🔺 只在 {json1_path} 中存在的 .so 文件:\n" + "\n".join(sorted(only_in_1)))
49+
if only_in_2:
50+
diff_results.append(f"🔻 只在 {json2_path} 中存在的 .so 文件:\n" + "\n".join(sorted(only_in_2)))
51+
52+
for so_file in sorted(common_files):
53+
deps_1 = set(deps1[so_file])
54+
deps_2 = set(deps2[so_file])
55+
if deps_1 != deps_2:
56+
diff_results.append(
57+
f"🔁 文件依赖不同: {so_file}\n"
58+
f" ➤ {json1_path}: {sorted(deps_1)}\n"
59+
f" ➤ {json2_path}: {sorted(deps_2)}"
60+
)
61+
62+
if not diff_results:
63+
print("✅ 两个 JSON 文件中的所有 .so 文件及其依赖完全一致。")
64+
else:
65+
print("⚠️ 发现差异:\n")
66+
print("\n\n".join(diff_results))
67+
sys.exit(1)
68+
69+
def main(target_dir, output_json):
70+
# target_dir = "paddle" # TODO: 修改为你自己的路径
71+
# output_json = "so_dependencies_static.json"
72+
73+
so_files = find_so_files(target_dir)
74+
all_deps = {}
75+
76+
for so_file in so_files:
77+
deps = get_so_dependencies_readelf(so_file)
78+
all_deps[so_file] = deps
79+
80+
with open(output_json, "w") as f:
81+
json.dump(all_deps, f, indent=4)
82+
83+
print(f"✅ 静态依赖信息已保存到 {output_json}")
84+
85+
86+
if __name__ == "__main__":
87+
import argparse
88+
parser = argparse.ArgumentParser(description="Extract or compare .so static dependencies")
89+
subparsers = parser.add_subparsers(dest="command")
90+
91+
extract_parser = subparsers.add_parser("extract")
92+
extract_parser.add_argument("--target_dir", type=str, default="paddle", help="Target directory to search for .so files")
93+
extract_parser.add_argument("--output_json", type=str, default="so_dependencies_static.json", help="Output JSON file name")
94+
95+
compare_parser = subparsers.add_parser("compare")
96+
compare_parser.add_argument("json1", type=str, help="First JSON file to compare")
97+
compare_parser.add_argument("json2", type=str, help="Second JSON file to compare")
98+
99+
args = parser.parse_args()
100+
101+
if args.command == "extract":
102+
main(args.target_dir, args.output_json)
103+
elif args.command == "compare":
104+
compare_so_dependency_jsons(args.json1, args.json2)
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
#!/usr/bin/env python3
2+
import requests
3+
import os
4+
5+
def build_wheel_url(base_url, python_version="3.10", is_cuda=True):
6+
"""
7+
根据给定的基础URL、Python版本和是否使用CUDA生成PaddlePaddle的wheel文件下载链接。
8+
9+
Args:
10+
base_url (str): 基础URL,用于生成下载链接。
11+
python_version (str, optional): Python版本,默认为"3.10"。
12+
is_cuda (bool, optional): 是否使用CUDA,默认为True。
13+
14+
Returns:
15+
str: 生成的PaddlePaddle wheel文件下载链接。
16+
17+
Raises:
18+
Exception: 如果下载描述文件失败,抛出异常。
19+
ValueError: 如果无法从描述文件中提取commit_id或wheel_version,抛出异常。
20+
21+
"""
22+
desc_url = f"{base_url}/latest/description.txt"
23+
response = requests.get(desc_url)
24+
if response.status_code != 200:
25+
raise Exception(f"❌ 下载失败: {desc_url}")
26+
27+
commit_id = None
28+
wheel_version = None
29+
for line in response.text.splitlines():
30+
if line.startswith("commit_id:"):
31+
commit_id = line.split(":", 1)[1].strip()
32+
elif line.startswith("wheel_version:"):
33+
wheel_version = line.split(":", 1)[1].strip()
34+
35+
if not commit_id or not wheel_version:
36+
raise ValueError("❗ 无法从 description.txt 中提取 commit_id 或 wheel_version")
37+
38+
py_tag = f"cp{python_version.replace('.', '')}"
39+
if not is_cuda:
40+
package_name = "paddlepaddle"
41+
else:
42+
package_name = "paddlepaddle_gpu"
43+
if "ARM" in base_url:
44+
wheel_url = (
45+
f"{base_url}/{commit_id}/"
46+
f"{package_name}-{wheel_version}-{py_tag}-{py_tag}-linux_aarch64.whl"
47+
)
48+
else:
49+
wheel_url = (
50+
f"{base_url}/{commit_id}/"
51+
f"{package_name}-{wheel_version}-{py_tag}-{py_tag}-linux_x86_64.whl"
52+
)
53+
return wheel_url
54+
55+
56+
def download_file(url, save_path):
57+
"""
58+
从给定的URL下载文件并保存到指定的路径。
59+
60+
Args:
61+
url (str): 要下载文件的URL。
62+
save_path (str): 文件保存的路径。
63+
64+
Returns:
65+
None
66+
67+
Raises:
68+
Exception: 如果下载失败,将引发异常。
69+
70+
"""
71+
# 确保文件夹存在
72+
dir_name = os.path.dirname(save_path)
73+
if not os.path.exists(dir_name):
74+
os.makedirs(dir_name)
75+
print(f"创建目录: {dir_name}")
76+
print(f"📥 正在下载: {url}")
77+
response = requests.get(url, stream=True)
78+
if response.status_code != 200:
79+
raise Exception(f"❌ 下载失败: {url}")
80+
81+
with open(save_path, 'wb') as f:
82+
for chunk in response.iter_content(chunk_size=8192):
83+
if chunk:
84+
f.write(chunk)
85+
print(f"✅ 保存成功: {save_path}")
86+
87+
if __name__ == "__main__":
88+
import argparse
89+
def str2bool(v):
90+
if isinstance(v, bool):
91+
return v
92+
if v.lower() in ('yes', 'true', 't', '1'):
93+
return True
94+
elif v.lower() in ('no', 'false', 'f', '0'):
95+
return False
96+
else:
97+
raise argparse.ArgumentTypeError('Boolean value expected.')
98+
parser = argparse.ArgumentParser()
99+
parser.add_argument("--python_version", type=str,default='3.10', help="Python version")
100+
parser.add_argument("--ce_task_name", type=str, help="CE task name")
101+
parser.add_argument("--is_cuda", type=str2bool, default=True, help="Whether to use CUDA")
102+
parser.add_argument("--output_dir", type=str, default=".", help="wheel package output directory")
103+
args = parser.parse_args()
104+
# parser.add_argument("base_url", type=str, help="Base URL of the PaddlePaddle wheel")
105+
# 示例调用
106+
base_url =f"https://paddle-qa.bj.bcebos.com/paddle-pipeline/{args.ce_task_name}"
107+
108+
wheel_url = build_wheel_url(base_url, args.python_version, args.is_cuda)
109+
print("✅ 下载链接为:")
110+
print(wheel_url)
111+
description_url = f"{base_url}/latest/description.txt"
112+
description_save_path = os.path.join(args.output_dir, "description.txt")
113+
wheel_filename = os.path.basename(wheel_url)
114+
wheel_save_path = os.path.join(args.output_dir, wheel_filename)
115+
download_file(wheel_url, wheel_save_path)
116+
download_file(description_url, description_save_path)

0 commit comments

Comments
 (0)