Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update findPythonDeps #4682

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 118 additions & 89 deletions easybuild/scripts/findPythonDeps.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,21 @@
#!/usr/bin/env python

"""
Find Python dependencies for a given Python package after loading dependencies specified in an EasyConfig.
This is intended for writing or updating PythonBundle EasyConfigs:
1. Create a EasyConfig with at least 'Python' as a dependency.
When updating to a new toolchain it is a good idea to reduce the dependencies to a minimum
as e.g. the new "Python" module might have different packages included.
2. Run this script
3. For each dependency found by this script search existing EasyConfigs for ones providing that Python package.
E.g many are contained in Python-bundle-PyPI. Some can be updated from an earlier toolchain.
4. Add those EasyConfigs as dependencies to your new EasyConfig.
5. Rerun this script so it takes the newly provided packages into account.
You can do steps 3-5 iteratively adding EasyConfig-dependencies one-by-one.
6. Finally you copy the packages found by this script as "exts_list" into the new EasyConfig.
You usually want the list printed as "in install order", the format is already suitable to be copied as-is.
"""

import argparse
import json
import os
Expand All @@ -8,12 +24,13 @@
import subprocess
import sys
import tempfile
import textwrap
from contextlib import contextmanager
from pprint import pprint
try:
import pkg_resources
except ImportError as e:
print('pkg_resources could not be imported: %s\nYou might need to install setuptools!' % e)
print(f'pkg_resources could not be imported: {e}\nYou might need to install setuptools!')
sys.exit(1)

try:
Expand All @@ -22,6 +39,7 @@
_canonicalize_regex = re.compile(r"[-_.]+")

def canonicalize_name(name):
"""Fallback if the import doesn't work with same behavior."""
return _canonicalize_regex.sub("-", name).lower()


Expand All @@ -36,16 +54,16 @@ def temporary_directory(*args, **kwargs):


def extract_pkg_name(package_spec):
return re.split('<|>|=|~', args.package, 1)[0]
"""Get the package name from a specification such as 'package>=3.42'"""
return re.split('<|>|=|~', package_spec, 1)[0]


def can_run(cmd, argument):
def can_run(cmd, *arguments):
"""Check if the given cmd and argument can be run successfully"""
with open(os.devnull, 'w') as FNULL:
try:
return subprocess.call([cmd, argument], stdout=FNULL, stderr=subprocess.STDOUT) == 0
except (subprocess.CalledProcessError, OSError):
return False
try:
return subprocess.call([cmd, *arguments], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) == 0
except (subprocess.CalledProcessError, OSError):
return False


def run_cmd(arguments, action_desc, capture_stderr=True, **kwargs):
Expand All @@ -59,13 +77,13 @@ def run_cmd(arguments, action_desc, capture_stderr=True, **kwargs):
if p.returncode != 0:
if err:
err = "\nSTDERR:\n" + err
raise RuntimeError('Failed to %s: %s%s' % (action_desc, out, err))
raise RuntimeError(f'Failed to {action_desc}: {out}{err}')
return out


def run_in_venv(cmd, venv_path, action_desc):
"""Run the given command in the virtualenv at the given path"""
cmd = 'source %s/bin/activate && %s' % (venv_path, cmd)
cmd = f'source {venv_path}/bin/activate && {cmd}'
return run_cmd(cmd, action_desc, shell=True, executable='/bin/bash')


Expand All @@ -78,17 +96,17 @@ def get_dep_tree(package_spec, verbose):
venv_dir = os.path.join(tmp_dir, 'venv')
if verbose:
print('Creating virtualenv at ' + venv_dir)
run_cmd(['virtualenv', '--system-site-packages', venv_dir], action_desc='create virtualenv')
run_cmd([sys.executable, '-m', 'venv', '--system-site-packages', venv_dir], action_desc='create virtualenv')
if verbose:
print('Updating pip in virtualenv')
run_in_venv('pip install --upgrade pip', venv_dir, action_desc='update pip')
if verbose:
print('Installing %s into virtualenv' % package_spec)
out = run_in_venv('pip install "%s"' % package_spec, venv_dir, action_desc='install ' + package_spec)
print('%s installed: %s' % (package_spec, out))
print(f'Installing {package_spec} into virtualenv')
out = run_in_venv(f'pip install "{package_spec}"', venv_dir, action_desc='install ' + package_spec)
print(f'{package_spec} installed: {out}')
# install pipdeptree, figure out dependency tree for installed package
run_in_venv('pip install pipdeptree', venv_dir, action_desc='install pipdeptree')
dep_tree = run_in_venv('pipdeptree -j -p "%s"' % package_name,
dep_tree = run_in_venv(f'pipdeptree -j -p "{package_name}"',
venv_dir, action_desc='collect dependencies')
return json.loads(dep_tree)

Expand All @@ -108,26 +126,27 @@ def find_deps(pkgs, dep_tree):
for orig_pkg in cur_pkgs:
count += 1
if count > MAX_PACKAGES:
raise RuntimeError("Aborting after checking %s packages. Possibly cycle detected!" % MAX_PACKAGES)
raise RuntimeError(f"Aborting after checking {MAX_PACKAGES} packages. Possibly cycle detected!")
pkg = canonicalize_name(orig_pkg)
matching_entries = [entry for entry in dep_tree
if pkg in (entry['package']['package_name'], entry['package']['key'])]
if not matching_entries:
matching_entries = [entry for entry in dep_tree
if orig_pkg in (entry['package']['package_name'], entry['package']['key'])]
if not matching_entries:
raise RuntimeError("Found no installed package for '%s' in %s" % (pkg, dep_tree))
raise RuntimeError(f"Found no installed package for '{pkg}' in {dep_tree}")
if len(matching_entries) > 1:
raise RuntimeError("Found multiple installed packages for '%s' in %s" % (pkg, dep_tree))
raise RuntimeError(f"Found multiple installed packages for '{pkg}' in {dep_tree}")
entry = matching_entries[0]
res.append((entry['package']['package_name'], entry['package']['installed_version']))
res.append(entry['package'])
# Add dependencies to list of packages to check next
# Could call this function recursively but that might exceed the max recursion depth
next_pkgs.update(dep['package_name'] for dep in entry['dependencies'])
return res


def print_deps(package, verbose):
"""Print dependencies of the given package that are not installed yet in a format usable as 'exts_list'"""
if verbose:
print('Getting dep tree of ' + package)
dep_tree = get_dep_tree(package, verbose)
Expand All @@ -144,83 +163,93 @@ def print_deps(package, verbose):
res = []
handled = set()
for dep in reversed(deps):
if dep not in handled:
handled.add(dep)
if dep[0] in installed_modules:
# Tuple as we need it for exts_list
dep_entry = (dep['package_name'], dep['installed_version'])
if dep_entry not in handled:
handled.add(dep_entry)
# Need to check for key and package_name as naming is not consistent. E.g.:
# "PyQt5-sip": 'key': 'pyqt5-sip', 'package_name': 'PyQt5-sip'
# "jupyter-core": 'key': 'jupyter-core', 'package_name': 'jupyter_core'
if dep['key'] in installed_modules or dep['package_name'] in installed_modules:
if verbose:
print("Skipping installed module '%s'" % dep[0])
print(f"Skipping installed module '{dep['package_name']}'")
else:
res.append(dep)
res.append(dep_entry)

print("List of dependencies in (likely) install order:")
pprint(res, indent=4)
print("Sorted list of dependencies:")
pprint(sorted(res), indent=4)


examples = [
'Example usage with EasyBuild (after installing dependency modules):',
'\t' + sys.argv[0] + ' --ec TensorFlow-2.3.4.eb tensorflow==2.3.4',
'Which is the same as:',
'\t' + ' && '.join(['eb TensorFlow-2.3.4.eb --dump-env',
'source TensorFlow-2.3.4.env',
sys.argv[0] + ' tensorflow==2.3.4',
]),
]
parser = argparse.ArgumentParser(
description='Find dependencies of Python packages by installing it in a temporary virtualenv. ',
epilog='\n'.join(examples),
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('package', metavar='python-pkg-spec',
help='Python package spec, e.g. tensorflow==2.3.4')
parser.add_argument('--ec', metavar='easyconfig', help='EasyConfig to use as the build environment. '
'You need to have dependency modules installed already!')
parser.add_argument('--verbose', help='Verbose output', action='store_true')
args = parser.parse_args()

if args.ec:
if not can_run('eb', '--version'):
print('EasyBuild not found or executable. Make sure it is in your $PATH when using --ec!')
sys.exit(1)
if args.verbose:
print('Checking with EasyBuild for missing dependencies')
missing_dep_out = run_cmd(['eb', args.ec, '--missing'],
capture_stderr=False,
action_desc='Get missing dependencies'
)
excluded_dep = '(%s)' % os.path.basename(args.ec)
missing_deps = [dep for dep in missing_dep_out.split('\n')
if dep.startswith('*') and excluded_dep not in dep
]
if missing_deps:
print('You need to install all modules on which %s depends first!' % args.ec)
print('\n\t'.join(['Missing:'] + missing_deps))
sys.exit(1)

# If the --ec argument is a (relative) existing path make it absolute so we can find it after the chdir
ec_arg = os.path.abspath(args.ec) if os.path.exists(args.ec) else args.ec
with temporary_directory() as tmp_dir:
old_dir = os.getcwd()
os.chdir(tmp_dir)
if args.verbose:
print('Running EasyBuild to get build environment')
run_cmd(['eb', ec_arg, '--dump-env', '--force'], action_desc='Dump build environment')
os.chdir(old_dir)

cmd = "source %s/*.env && python %s '%s'" % (tmp_dir, sys.argv[0], args.package)
def main():
"""Entrypoint of the script"""
examples = textwrap.dedent(f"""
Example usage with EasyBuild (after installing dependency modules):
{sys.argv[0]} --ec TensorFlow-2.3.4.eb tensorflow==2.3.4
Which is the same as:
eb TensorFlow-2.3.4.eb --dump-env && source TensorFlow-2.3.4.env && {sys.argv[0]} tensorflow==2.3.4
Using the '--ec' parameter is recommended as the latter requires manually updating the .env file
after each change to the EasyConfig.
""")
parser = argparse.ArgumentParser(
description='Find dependencies of Python packages by installing it in a temporary virtualenv. ',
epilog='\n'.join(examples),
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('package', metavar='python-pkg-spec',
help='Python package spec, e.g. tensorflow==2.3.4')
parser.add_argument('--ec', metavar='easyconfig', help='EasyConfig to use as the build environment. '
'You need to have dependency modules installed already!')
parser.add_argument('--verbose', help='Verbose output', action='store_true')
args = parser.parse_args()

if args.ec:
if not can_run('eb', '--version'):
print('EasyBuild not found or executable. Make sure it is in your $PATH when using --ec!')
sys.exit(1)
if args.verbose:
cmd += ' --verbose'
print('Restarting script in new build environment')

out = run_cmd(cmd, action_desc='Run in new environment', shell=True, executable='/bin/bash')
print(out)
else:
if not can_run('virtualenv', '--version'):
print('Virtualenv not found or executable. ' +
'Make sure it is installed (e.g. in the currently loaded Python module)!')
sys.exit(1)
if 'PIP_PREFIX' in os.environ:
print("$PIP_PREFIX is set. Unsetting it as it doesn't work well with virtualenv.")
del os.environ['PIP_PREFIX']
print_deps(args.package, args.verbose)
print('Checking with EasyBuild for missing dependencies')
missing_dep_out = run_cmd(['eb', args.ec, '--missing'],
capture_stderr=False,
action_desc='Get missing dependencies'
)
excluded_dep = f'({os.path.basename(args.ec)})'
missing_deps = [dep for dep in missing_dep_out.split('\n')
if dep.startswith('*') and excluded_dep not in dep
]
if missing_deps:
print(f'You need to install all modules on which {args.ec} depends first!')
print('\n\t'.join(['Missing:'] + missing_deps))
sys.exit(1)

# If the --ec argument is a (relative) existing path make it absolute so we can find it after the chdir
ec_arg = os.path.abspath(args.ec) if os.path.exists(args.ec) else args.ec
with temporary_directory() as tmp_dir:
old_dir = os.getcwd()
os.chdir(tmp_dir)
if args.verbose:
print('Running EasyBuild to get build environment')
run_cmd(['eb', ec_arg, '--dump-env', '--force'], action_desc='Dump build environment')
os.chdir(old_dir)

cmd = f"source {tmp_dir}/*.env && python {sys.argv[0]} '{args.package}'"
if args.verbose:
cmd += ' --verbose'
print('Restarting script in new build environment')

out = run_cmd(cmd, action_desc='Run in new environment', shell=True, executable='/bin/bash')
print(out)
else:
if not can_run(sys.executable, '-m', 'venv', '-h'):
print("'venv' module not found. This should be available in Python 3.3+.")
sys.exit(1)
if 'PIP_PREFIX' in os.environ:
print("$PIP_PREFIX is set. Unsetting it as it doesn't work well with virtualenv.")
del os.environ['PIP_PREFIX']
os.environ['PYTHONNOUSERSITE'] = '1'
print_deps(args.package, args.verbose)


if __name__ == "__main__":
main()
Loading