Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve portability of reproducible tarballs by replacing external tar command with tarfile module #4660

Open
wants to merge 10 commits into
base: 5.0.x
Choose a base branch
from
Open
39 changes: 27 additions & 12 deletions easybuild/tools/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2772,15 +2772,32 @@ def get_source_tarball_from_git(filename, target_dir, git_config):

if keep_git_dir:
# create archive of git repo including .git directory
tar_cmd = ['tar', 'cfvz', archive_path, repo_name]
tar_cmd = f"tar cfvz {archive_path} {repo_name}"
else:
# create reproducible archive
# see https://reproducible-builds.org/docs/archives/
tar_cmd = [
tar_cmd = reproducible_archive_cmd(repo_name, archive_path)

run_shell_cmd(tar_cmd, work_dir=tmpdir, hidden=True, verbose_dry_run=True)

# cleanup (repo_name dir does not exist in dry run mode)
remove(tmpdir)

return archive_path


def reproducible_archive_cmd(dir_name, archive_name):
"""
Return string with command to make reproducible archive from a given directory
see https://reproducible-builds.org/docs/archives/
"""
try:
cmd_pipe = [
# stop on failure of any command in the pipe
'set', '-eo pipefail', ';',
# print names of all files and folders excluding .git directory
'find', repo_name, '-name ".git"', '-prune', '-o', '-print0',
# reset access and modification timestamps to epoch 0 (equivalent to --mtime in GNU tar)
'-exec', 'touch', '--date=@0', '{}', r'\;',
'find', str(dir_name), '-name ".git"', '-prune', '-o', '-print0',
# reset access and modification timestamps to epoch 0
'-exec', 'touch', '--date=1970-01-01T00:00:00.00Z', '{}', r'\;',
# reset file permissions of cloned repo (equivalent to --mode in GNU tar)
'-exec', 'chmod', '"go+u,go-w"', '{}', r'\;', '|',
# sort file list (equivalent to --sort in GNU tar)
Expand All @@ -2789,14 +2806,12 @@ def get_source_tarball_from_git(filename, target_dir, git_config):
'tar', '--create', '--no-recursion', '--owner=0', '--group=0', '--numeric-owner',
'--format=gnu', '--null', '--files-from', '-', '|',
# compress tarball with gzip without original file name and timestamp
'gzip', '--no-name', '>', archive_path
'gzip', '--no-name', '>', str(archive_name)
]
run_shell_cmd(' '.join(tar_cmd), work_dir=tmpdir, hidden=True, verbose_dry_run=True)
except TypeError as err:
raise EasyBuildError("reproducible_archive_cmd: wrong directory or archive name given") from err

# cleanup (repo_name dir does not exist in dry run mode)
remove(tmpdir)

return archive_path
return " ".join(cmd_pipe)


def move_file(path, target_path, force_in_dry_run=False):
Expand Down
21 changes: 8 additions & 13 deletions test/framework/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2939,16 +2939,11 @@ def run_check():
'git_repo': '[email protected]:easybuilders/testrepository.git',
'test_prefix': self.test_prefix,
}
reprod_tar_cmd_pattern = (
r' running shell command "find {} -name \".git\" -prune -o -print0 -exec touch -t 197001010100 {{}} \; |'
r' LC_ALL=C sort --zero-terminated | tar --create --no-recursion --owner=0 --group=0 --numeric-owner'
r' --format=gnu --null --files-from - | gzip --no-name > %(test_prefix)s/target/test.tar.gz'
)

expected = '\n'.join([
r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s"',
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
lexming marked this conversation as resolved.
Show resolved Hide resolved
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -2957,7 +2952,7 @@ def run_check():
expected = '\n'.join([
r' running shell command "git clone --depth 1 --branch tag_for_tests %(git_repo)s test123"',
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("test123"),
ft.reproducible_archive_cmd("test123", "%(test_prefix)s/target/test.tar.gz"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -2967,7 +2962,7 @@ def run_check():
expected = '\n'.join([
r' running shell command "git clone --depth 1 --branch tag_for_tests --recursive %(git_repo)s"',
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -2977,7 +2972,7 @@ def run_check():
' running shell command "git clone --depth 1 --branch tag_for_tests --recursive'
+ ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"',
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -2991,7 +2986,7 @@ def run_check():
+ ' clone --depth 1 --branch tag_for_tests --recursive'
+ ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' %(git_repo)s"',
r" \(in .*/tmp.*\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -3015,7 +3010,7 @@ def run_check():
r" \(in .*/tmp.*\)",
r' running shell command "git checkout 8456f86 && git submodule update --init --recursive"',
r" \(in testrepository\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -3026,7 +3021,7 @@ def run_check():
r" \(in .*/tmp.*\)",
r' running shell command "git checkout 8456f86"',
r" \(in testrepository\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
r" \(in .*/tmp.*\)",
]) % string_args
run_check()
Expand All @@ -3038,7 +3033,7 @@ def run_check():
r" \(in /.*\)",
r' running shell command "git checkout 8456f86"',
r" \(in /.*/testrepository\)",
reprod_tar_cmd_pattern.format("testrepository"),
ft.reproducible_archive_cmd("testrepository", "%(test_prefix)s/target/test.tar.gz"),
r" \(in /.*\)",
]) % string_args
run_check()
Expand Down
Loading