diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..c842c79 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,56 @@ +name: test +on: + pull_request: + push: + branches: + - main + +jobs: + + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 + - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 + + test: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ ubuntu-latest, macos-latest ] + python-version: [ "3.8", "3.10", "3.12" ] + resolver: [ mamba, conda, micromamba ] + env: + METAFLOW_CONDA_DEPENDENCY_RESOLVER: ${{ matrix.resolver }} + METAFLOW_CONDA_TEST: 1 + METAFLOW_DATASTORE_SYSROOT_LOCAL: .metaflow + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - uses: mamba-org/setup-micromamba@f8b8a1e23a26f60a44c853292711bacfd3eac822 # v1.9.0 + with: + micromamba-version: latest + environment-file: dev-env.yml + init-shell: bash + create-args: >- + python=${{ matrix.python-version }} + + - name: install nflx-extension + shell: bash -eo pipefail -l {0} + run: | + which pip + pip install -e . --force-reinstall -U + + - name: install bash + if: runner.os == 'macOS' + run: brew install bash + + - name: run test + shell: bash -eo pipefail -l {0} + run: | + set -x + which pytest + mkdir .metaflow + pytest -n 4 tests diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..f8ed033 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,15 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.1.0 + hooks: + - id: check-yaml + - id: check-json + - repo: https://github.com/ambv/black + rev: 24.4.2 + hooks: + - id: black + language_version: python3 + exclude: "^/metaflow_extensions/netflix_ext/vendor/" + args: [-t, py34, -t, py35, -t, py36, -t, py37, -t, py38, -t, py39, -t, py310, -t, py311, -t, py312] + + diff --git a/dev-env.yml b/dev-env.yml new file mode 100644 index 0000000..252da01 --- /dev/null +++ b/dev-env.yml @@ -0,0 +1,16 @@ +name: nflxext-dev +category: dev +channels: + - conda-forge + - nodefaults +dependencies: + - pytest + - pytest-xdist + - pytest-timeout + - wheel + - mamba + - conda + - conda-lock + - conda-package-handling + - pip: + - sh diff --git a/metaflow_extensions/netflix_ext/cmd/debug/debug_script_generator.py b/metaflow_extensions/netflix_ext/cmd/debug/debug_script_generator.py index 70952dc..92fe959 100644 --- a/metaflow_extensions/netflix_ext/cmd/debug/debug_script_generator.py +++ b/metaflow_extensions/netflix_ext/cmd/debug/debug_script_generator.py @@ -364,15 +364,17 @@ def generate_notebook_json( # Create an empty notebook nb = { "cells": [], - "metadata": {} - if kernel_def is None - else { - "kernelspec": { - "name": kernel_def[0], - "display_name": kernel_def[1], - "language": "python", + "metadata": ( + {} + if kernel_def is None + else { + "kernelspec": { + "name": kernel_def[0], + "display_name": kernel_def[1], + "language": "python", + } } - }, + ), "nbformat": 4, "nbformat_minor": 4, } @@ -458,9 +460,11 @@ def generate_notebook_cells( "format": "markdown", "content": Constants.JUPYTER_TITLE_MARKDOWN.format( TaskPathSpec=self.task_pathspec, - DebugType="Post-Execution State" - if self.inspect - else "Pre-Execution State", + DebugType=( + "Post-Execution State" + if self.inspect + else "Pre-Execution State" + ), ), } ] diff --git a/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py b/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py index bdbd144..f2af6c8 100644 --- a/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py +++ b/metaflow_extensions/netflix_ext/cmd/environment/environment_cmd.py @@ -21,6 +21,7 @@ from metaflow.plugins import DATASTORES from metaflow.metaflow_config import ( CONDA_ALL_ARCHS, + CONDA_TEST, CONDA_SYS_DEPENDENCIES, DEFAULT_DATASTORE, DEFAULT_METADATA, @@ -359,7 +360,7 @@ def create( ) resolver.resolve_environments(obj.echo) update_envs = [] # type: List[ResolvedEnvironment] - if obj.datastore_type != "local": + if obj.datastore_type != "local" or CONDA_TEST: # We may need to update caches # Note that it is possible that something we needed to resolve, we don't need # to cache (if we resolved to something already cached). @@ -823,9 +824,9 @@ def resolve( sources, new_extras, base_env=base_env if using_str else None, - base_from_full_id=base_env.is_info_accurate - if base_env and using_str - else False, + base_from_full_id=( + base_env.is_info_accurate if base_env and using_str else False + ), local_only=local_only, force=force, force_co_resolve=len(archs) > 1, @@ -881,7 +882,7 @@ def resolve( # If this is not a dry-run, we cache the environments and write out the resolved # information update_envs = [] # type: List[ResolvedEnvironment] - if obj.datastore_type != "local": + if obj.datastore_type != "local" or CONDA_TEST: # We may need to update caches # Note that it is possible that something we needed to resolve, we don't need # to cache (if we resolved to something already cached). @@ -1215,9 +1216,7 @@ def _parse_yml_file( to_update = ( conda_deps if mode == "deps" - else pypi_deps - if mode == "pypi_deps" - else sys_deps + else pypi_deps if mode == "pypi_deps" else sys_deps ) splits = YML_SPLIT_LINE.split(line.replace(" ", ""), maxsplit=1) if len(splits) == 1: diff --git a/metaflow_extensions/netflix_ext/cmd/mfextinit_netflixext.py b/metaflow_extensions/netflix_ext/cmd/mfextinit_netflixext.py index b923dca..719090e 100644 --- a/metaflow_extensions/netflix_ext/cmd/mfextinit_netflixext.py +++ b/metaflow_extensions/netflix_ext/cmd/mfextinit_netflixext.py @@ -1,4 +1,4 @@ CMDS_DESC = [ ("debug", ".debug.debug_cmd.cli"), - ("environment", ".environment.environment_cmd.cli") + ("environment", ".environment.environment_cmd.cli"), ] diff --git a/metaflow_extensions/netflix_ext/config/mfextinit_netflixext.py b/metaflow_extensions/netflix_ext/config/mfextinit_netflixext.py index b11ff36..87d89e1 100644 --- a/metaflow_extensions/netflix_ext/config/mfextinit_netflixext.py +++ b/metaflow_extensions/netflix_ext/config/mfextinit_netflixext.py @@ -8,6 +8,9 @@ from metaflow.metaflow_config_funcs import from_conf, get_validate_choice_fn +# Set to true if running the tests with a local datastore +CONDA_TEST = from_conf("CONDA_TEST", False) + CONDA_S3ROOT = from_conf( "CONDA_S3ROOT", os.path.join(DATASTORE_SYSROOT_S3, "conda_env") if DATASTORE_SYSROOT_S3 else None, @@ -36,7 +39,7 @@ ), ) -CONDA_MAGIC_FILE_V2 = "conda_v2.cnd" +CONDA_MAGIC_FILE_V2 = from_conf("CONDA_MAGIC_FILE_V2", "conda_v2.cnd") # Use an alternate dependency resolver for conda packages instead of conda # Mamba promises faster package dependency resolution times, which @@ -65,7 +68,7 @@ CONDA_LOCK_TIMEOUT = from_conf("CONDA_LOCK_TIMEOUT", 300) # Location within CONDA_ROOT of the packages directory -CONDA_PACKAGES_DIRNAME = from_conf("ENV_PACKAGES_DIRNAME", "packages") +CONDA_PACKAGES_DIRNAME = from_conf("CONDA_PACKAGES_DIRNAME", "packages") # Ditto for the envs directory CONDA_ENVS_DIRNAME = from_conf("CONDA_ENVS_DIRNAME", "envs") diff --git a/metaflow_extensions/netflix_ext/plugins/conda/conda.py b/metaflow_extensions/netflix_ext/plugins/conda/conda.py index 6405082..8f34a91 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/conda.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/conda.py @@ -53,6 +53,7 @@ CONDA_PREFERRED_FORMAT, CONDA_REMOTE_INSTALLER, CONDA_REMOTE_INSTALLER_DIRNAME, + CONDA_TEST, CONDA_DEFAULT_PYPI_SOURCE, CONDA_USE_REMOTE_LATEST, ) @@ -139,11 +140,11 @@ def _modified_logger(*args: Any, **kwargs: Any): self._cached_environment = read_conda_manifest(self._local_root) # Initialize storage - if self._datastore_type != "local": + if self._datastore_type != "local" or CONDA_TEST: # Prevent circular dep from metaflow.plugins import DATASTORES - # We will be able to cache things -- currently no caching for local + # We will be able to cache things -- currently no caching for local except in testing storage_impl = [d for d in DATASTORES if d.TYPE == self._datastore_type][0] self._storage = storage_impl( get_conda_root(self._datastore_type) @@ -229,8 +230,10 @@ def virtual_packages(self) -> Dict[str, str]: } elif "virtual packages" in self._info: # Micromamba outputs them differently for some reason + # It also includes a -vX at the end of archspec for example which doesn't + # play nice with conda-lock. Strip it out. return { - name: build_str + name: build_str.split("-", 1)[0] for name, build_str in map( lambda x: x.split("=", 1), cast(List[str], self._info["virtual packages"]), @@ -271,7 +274,7 @@ def call_conda( or binary == "micromamba" ) ): - args.extend(["-r", self._info["root_prefix"], "--json"]) + args.extend(["-r", self.root_prefix, "--json"]) debug.conda_exec("Conda call: %s" % str([self._bins[binary]] + args)) return cast( bytes, @@ -284,15 +287,19 @@ def call_conda( except subprocess.CalledProcessError as e: if pretty_print_exception: print( - "Pretty-printed STDOUT:\n%s" % e.output.decode("utf-8") - if e.output - else "No STDOUT", + ( + "Pretty-printed STDOUT:\n%s" % e.output.decode("utf-8") + if e.output + else "No STDOUT" + ), file=sys.stderr, ) print( - "Pretty-printed STDERR:\n%s" % e.stderr.decode("utf-8") - if e.stderr - else "No STDERR", + ( + "Pretty-printed STDERR:\n%s" % e.stderr.decode("utf-8") + if e.stderr + else "No STDERR" + ), file=sys.stderr, ) raise CondaException( @@ -334,15 +341,19 @@ def call_binary( ).strip() except subprocess.CalledProcessError as e: print( - "Pretty-printed STDOUT:\n%s" % e.output.decode("utf-8") - if e.output - else "No STDOUT", + ( + "Pretty-printed STDOUT:\n%s" % e.output.decode("utf-8") + if e.output + else "No STDOUT" + ), file=sys.stderr, ) print( - "Pretty-printed STDERR:\n%s" % e.stderr.decode("utf-8") - if e.stderr - else "No STDERR", + ( + "Pretty-printed STDERR:\n%s" % e.stderr.decode("utf-8") + if e.stderr + else "No STDERR" + ), file=sys.stderr, ) raise CondaException( @@ -863,7 +874,7 @@ def alias_environment(self, env_id: EnvID, aliases: List[str]) -> None: The list of aliases -- note that you can only update mutable aliases or add new ones. """ - if self._datastore_type != "local": + if self._datastore_type != "local" or CONDA_TEST: # We first fetch any aliases we have remotely because that way # we will catch any non-mutable changes resolved_aliases = [resolve_env_alias(a) for a in aliases] @@ -965,9 +976,11 @@ def cache_environments( my_arch_id = arch_id() cache_formats = cache_formats or { "pypi": ["_any"], - "conda": [CONDA_PREFERRED_FORMAT] - if CONDA_PREFERRED_FORMAT and CONDA_PREFERRED_FORMAT != "none" - else ["_any"], + "conda": ( + [CONDA_PREFERRED_FORMAT] + if CONDA_PREFERRED_FORMAT and CONDA_PREFERRED_FORMAT != "none" + else ["_any"] + ), } # key: URL @@ -2046,10 +2059,19 @@ def _check_match(dir_name: str) -> Optional[EnvID]: self._remove(os.path.basename(dir_name)) return None - if self._conda_executable_type == "micromamba" or CONDA_LOCAL_PATH is not None: + if ( + self._conda_executable_type == "micromamba" + or CONDA_LOCAL_PATH is not None + or CONDA_TEST + ): + # Micromamba does not record created environments so we look around for them + # in the root env directory. We also do this if had a local installation + # because we don't want to look around at other environments created outside + # of that local installation. Finally, we also do this in test mode for + # similar reasons -- we only want to search the ones we created. # For micromamba OR if we are using a specific conda installation # (so with CONDA_LOCAL_PATH), only search there - env_dir = os.path.join(self._info["root_prefix"], "envs") + env_dir = self._root_env_dir with CondaLock(self.echo, self._env_lock_file(os.path.join(env_dir, "_"))): # Grab a lock *once* on the parent directory so we pick anyname for # the "directory". @@ -2059,6 +2081,7 @@ def _check_match(dir_name: str) -> Optional[EnvID]: if possible_env_id: ret.setdefault(possible_env_id, []).append(entry.path) else: + # Else we iterate over all the environments that the installation know about envs = self._info["envs"] # type: List[str] for env in envs: with CondaLock(self.echo, self._env_lock_file(env)): @@ -2176,18 +2199,19 @@ def _remote_env_fetch_alias( @property def _package_dirs(self) -> List[str]: info = self._info - if self._conda_executable_type == "micromamba": - pkg_dir = os.path.join(info["root_prefix"], "pkgs") - if not os.path.exists(pkg_dir): - os.makedirs(pkg_dir) - return [pkg_dir] + # We rely on the first directory existing. This should be a fairly + # easy check. + if not os.path.exists(info["pkgs_dirs"][0]): + os.makedirs(info["pkgs_dirs"][0]) return info["pkgs_dirs"] @property def _root_env_dir(self) -> str: info = self._info - if self._conda_executable_type == "micromamba": - return os.path.join(info["root_prefix"], "envs") + # We rely on the first directory existing. This should be a fairly + # easy check. + if not os.path.exists(info["envs_dirs"][0]): + os.makedirs(info["envs_dirs"][0]) return info["envs_dirs"][0] @property @@ -2200,22 +2224,10 @@ def _info(self) -> Dict[str, Any]: def _info_no_lock(self) -> Dict[str, Any]: if self._cached_info is None: self._cached_info = json.loads(self.call_conda(["info", "--json"])) - # Micromamba is annoying because if there are multiple installations of it - # executing the binary doesn't necessarily point us to the root directory - # we are in so we kind of look for it heuristically if self._conda_executable_type == "micromamba": - # Best info if we don't have something else self._cached_info["root_prefix"] = self._cached_info["base environment"] - cur_dir = os.path.dirname(self._bins[self._conda_executable_type]) - while True: - if os.path.isdir(os.path.join(cur_dir, "pkgs")) and os.path.isdir( - os.path.join(cur_dir, "envs") - ): - self._cached_info["root_prefix"] = cur_dir - break - if cur_dir == "/": - break - cur_dir = os.path.dirname(cur_dir) + self._cached_info["envs_dirs"] = self._cached_info["envs directories"] + self._cached_info["pkgs_dirs"] = self._cached_info["package cache"] return self._cached_info @@ -2382,9 +2394,11 @@ def _create(self, env: ResolvedEnvironment, env_name: str) -> str: args, # Creating with micromamba is faster as it extracts in parallel. Prefer # it if it exists. - binary="micromamba" - if self._bins and "micromamba" in self._bins - else "conda", + binary=( + "micromamba" + if self._bins and "micromamba" in self._bins + else "conda" + ), ) if pypi_paths: @@ -2412,15 +2426,19 @@ def _create(self, env: ResolvedEnvironment, env_name: str) -> str: subprocess.check_output(arg_list, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: print( - "Pretty-printed STDOUT:\n%s" % e.output.decode("utf-8") - if e.output - else "No STDOUT", + ( + "Pretty-printed STDOUT:\n%s" % e.output.decode("utf-8") + if e.output + else "No STDOUT" + ), file=sys.stderr, ) print( - "Pretty-printed STDERR:\n%s" % e.stderr.decode("utf-8") - if e.stderr - else "No STDERR", + ( + "Pretty-printed STDERR:\n%s" % e.stderr.decode("utf-8") + if e.stderr + else "No STDERR" + ), file=sys.stderr, ) raise CondaException( diff --git a/metaflow_extensions/netflix_ext/plugins/conda/conda_environment.py b/metaflow_extensions/netflix_ext/plugins/conda/conda_environment.py index c1ae868..3b59700 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/conda_environment.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/conda_environment.py @@ -30,6 +30,7 @@ from metaflow.metaflow_config import ( CONDA_MAGIC_FILE_V2, CONDA_REMOTE_COMMANDS, + CONDA_TEST, get_pinned_conda_libs, ) @@ -139,7 +140,7 @@ def init_environment(self, echo: Callable[..., None]): resolver.resolve_environments(echo) update_envs = [] # type: List[ResolvedEnvironment] - if self._datastore_type != "local": + if self._datastore_type != "local" or CONDA_TEST: # We may need to update caches # Note that it is possible that something we needed to resolve, we don't need # to cache (if we resolved to something already cached). diff --git a/metaflow_extensions/netflix_ext/plugins/conda/envsresolver.py b/metaflow_extensions/netflix_ext/plugins/conda/envsresolver.py index 2dcf728..31d5fe0 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/envsresolver.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/envsresolver.py @@ -189,9 +189,11 @@ def add_environment( "deps": deps, "sources": user_sources, "extras": extras, - "conda_format": [CONDA_PREFERRED_FORMAT] - if CONDA_PREFERRED_FORMAT and CONDA_PREFERRED_FORMAT != "none" - else ["_any"], + "conda_format": ( + [CONDA_PREFERRED_FORMAT] + if CONDA_PREFERRED_FORMAT and CONDA_PREFERRED_FORMAT != "none" + else ["_any"] + ), "base": base_env, "base_accurate": base_env and base_env.is_info_accurate diff --git a/metaflow_extensions/netflix_ext/plugins/conda/resolvers/conda_resolver.py b/metaflow_extensions/netflix_ext/plugins/conda/resolvers/conda_resolver.py index 60516f0..077164e 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/resolvers/conda_resolver.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/resolvers/conda_resolver.py @@ -63,7 +63,8 @@ def resolve( addl_env = { "CONDA_SUBDIR": architecture, - "CONDA_PKGS_DIRS": mamba_dir, + "CONDA_PKGS_DIRS": os.path.join(mamba_dir, "pkgs"), + "CONDA_ENVS_DIRS": os.path.join(mamba_dir, "envs"), "CONDA_ROOT": self._conda.root_prefix, "CONDA_UNSATISFIABLE_HINTS_CHECK_DEPTH": "0", } diff --git a/metaflow_extensions/netflix_ext/plugins/conda/terminal_menu.py b/metaflow_extensions/netflix_ext/plugins/conda/terminal_menu.py index c0eb6b3..19e8aba 100644 --- a/metaflow_extensions/netflix_ext/plugins/conda/terminal_menu.py +++ b/metaflow_extensions/netflix_ext/plugins/conda/terminal_menu.py @@ -1090,9 +1090,13 @@ def setup_title_or_status_bar_lines( ) self._sort = self.Sort( self._menu_entries, - column_headers - if column_headers - else ["Column %d" % d for d in range(len(self._menu_entries_cols[0]) - 1)], + ( + column_headers + if column_headers + else [ + "Column %d" % d for d in range(len(self._menu_entries_cols[0]) - 1) + ] + ), self._menu_entries_cols, ) self._in_sort_query = False @@ -1183,9 +1187,9 @@ def _init_backspace_control_character(self) -> None: name, ctrl_code = match_obj.group(1), match_obj.group(2) if name != "erase": continue - self._name_to_control_character[ - "backspace" - ] = self._get_keycode_for_key("ctrl-" + ctrl_code) + self._name_to_control_character["backspace"] = ( + self._get_keycode_for_key("ctrl-" + ctrl_code) + ) return except subprocess.CalledProcessError: pass @@ -1207,10 +1211,12 @@ def _init_terminal_codes(cls) -> None: return supported_colors = int(cls._query_terminfo_database("colors")) cls._codename_to_terminal_code = { - codename: cls._query_terminfo_database(codename) - if not (codename.startswith("bg_") or codename.startswith("fg_")) - or supported_colors >= 8 - else "" + codename: ( + cls._query_terminfo_database(codename) + if not (codename.startswith("bg_") or codename.startswith("fg_")) + or supported_colors >= 8 + else "" + ) for codename in cls._codenames } cls._codename_to_terminal_code.update(cls._name_to_control_character) @@ -1684,9 +1690,11 @@ def get_preview_string() -> Optional[Iterable[str]]: ) def strip_ansi_codes_except_styling(string: str) -> str: stripped_string = strip_ansi_codes_except_styling.ansi_escape_regex.sub( # type: ignore - lambda match_obj: match_obj.group(0) - if strip_ansi_codes_except_styling.ansi_sgr_regex.match(match_obj.group(0)) # type: ignore - else "", + lambda match_obj: ( + match_obj.group(0) + if strip_ansi_codes_except_styling.ansi_sgr_regex.match(match_obj.group(0)) # type: ignore + else "" + ), string, ) return cast(str, stripped_string) diff --git a/metaflow_extensions/netflix_ext/plugins/environment_cli.py b/metaflow_extensions/netflix_ext/plugins/environment_cli.py index 6019fe7..8baabeb 100644 --- a/metaflow_extensions/netflix_ext/plugins/environment_cli.py +++ b/metaflow_extensions/netflix_ext/plugins/environment_cli.py @@ -6,7 +6,7 @@ from metaflow._vendor import click from metaflow.exception import CommandException -from metaflow.metaflow_config import CONDA_PREFERRED_FORMAT +from metaflow.metaflow_config import CONDA_PREFERRED_FORMAT, CONDA_TEST from .conda.conda import Conda from .conda.conda_common_decorator import StepRequirement @@ -203,7 +203,7 @@ def resolve( # If this is not a dry-run, we cache the environments and write out the resolved # information update_envs = [] # type: List[ResolvedEnvironment] - if obj.flow_datastore.TYPE != "local": + if obj.flow_datastore.TYPE != "local" or CONDA_TEST: # We may need to update caches # Note that it is possible that something we needed to resolve, we don't need # to cache (if we resolved to something already cached). @@ -283,7 +283,9 @@ def show(obj: Any, local_only: bool, steps_to_show: Tuple[str]): result[step.name]["env"] = resolved_env result[step.name]["state"].append("resolved") - if obj.flow_datastore.TYPE != "local" and resolved_env.is_cached( + if ( + obj.flow_datastore.TYPE != "local" or CONDA_TEST + ) and resolved_env.is_cached( { "conda": ( [CONDA_PREFERRED_FORMAT] diff --git a/tests/check_env.sh b/tests/check_env.sh new file mode 100755 index 0000000..42800a8 --- /dev/null +++ b/tests/check_env.sh @@ -0,0 +1,270 @@ +#!/usr/bin/env bash + +python_executable="" +python_version="" +file_opt="" +file_name="" +alias_name="" +using_env="" +declare -A check_versions + +my_dir=$(dirname "$0") +my_dir=$(realpath "$my_dir") + +env_dir="$my_dir/environments" +check_dir="$my_dir/checks" + +usage() { + echo "Usage: $0 -e python_executable -p python_version [-r req_file_name] [-f yml_file_name] [-u using_env_name] [-a alias]" +} + +exit_bad_cmd() { + usage + exit 1 +} + +script_output="" + +cleanup() { + if [[ -n "${script_output}" ]]; then + rm "${script_output}" + fi +} + +trap cleanup EXIT + +# A simple function to compare version strings x.y.z +compare_versions() { + local version1=$1 + local version2=$2 + + if [[ "$version1" == "$version2" ]]; then + echo 0 + return + fi + + local ver1_arr + local ver2_arr + IFS='.' read -ra ver1_arr <<<"$version1" + IFS='.' read -ra ver2_arr <<<"$version2" + + local max_components=$((${#ver1_arr[@]} > ${#ver2_arr[@]} ? ${#ver1_arr[@]} : ${#ver2_arr[@]})) + + for ((i = 0; i < max_components; i++)); do + local component1=${ver1_arr[$i]:-0} + local component2=${ver2_arr[$i]:-0} + # Ignore pre/post/dev markers + if [[ "$component1" == "dev*" || "$component1" == "pre*" || "$component1" == "post*" || + "$component2" == "dev*" || "$component2" == "pre*" || "$component2" == "post*" ]]; then + continue + fi + if ((component1 < component2)); then + echo 1 + return + elif ((component1 > component2)); then + echo 2 + return + fi + done + echo 0 +} + +while getopts "e:p:r:f:u:a:" o; do + case "${o}" in + e) + if [[ -z "$python_executable" ]]; then + python_executable=$OPTARG + else + echo "Error: Cannot specify multiple -e options" + exit_bad_cmd + fi + ;; + + p) + if [[ -z "$python_version" ]]; then + python_version=$OPTARG + else + echo "Error: Cannot specify multiple -p options" + exit_bad_cmd + fi + ;; + r) + if [[ -z "$file_name" ]]; then + file_name=$OPTARG + file_opt="-r" + else + echo "Error: Can only specify one of -r or -f and only once" + exit_bad_cmd + fi + ;; + f) + if [[ -z "$file_name" ]]; then + file_name=$OPTARG + file_opt="-f" + else + echo "Error: Can only specify one of -r or -f and only once" + exit_bad_cmd + fi + ;; + u) + if [[ -z "$using_env" ]]; then + using_env=$OPTARG + else + echo "Error: Cannot specify multiple -u options" + exit_bad_cmd + fi + ;; + a) + if [[ -z "$alias_name" ]]; then + alias_name=$OPTARG + else + echo "Error: Cannot specify multiple -a options" + exit_bad_cmd + fi + ;; + :) + echo "Error: -${OPTARG} requires an argument." + exit_bad_cmd + ;; + \?) + echo "Cannot parse command line" + exit_bad_cmd + ;; + esac +done + +if [[ -z "$python_executable" ]]; then + echo "Error: Need to specify python executable" + exit_bad_cmd +fi + +# Get all the checks we need to perform +if [[ -e "$check_dir/$file_name.check" ]]; then + while IFS=',' read -r pkg min max req_py_version; do + # Skip empty lines + if [[ -z "$pkg" ]]; then + continue + fi + if [[ -n "$req_py_version" && "$req_py_version" != "$python_version" ]]; then + continue + fi + if [[ -v check_versions["$pkg"] ]]; then + echo "Error: Cannot specify a check on $pkg more than once" + exit_bad_cmd + fi + if [[ -z "$min" ]]; then + min="0" + fi + if [[ -z "$max" ]]; then + max="1000000" + fi + check_versions["$pkg"]="$min,$max" + done <<<"$(cat $check_dir/$file_name.check)" +fi + +export METAFLOW_DEBUG_CONDA=1 +# Form the command line for resolution +script_output=$(mktemp) +cmd="$python_executable -m metaflow.cmd.main_cli environment --quiet --quiet-file-output ${script_output} resolve " + +if [[ -z "$python_version" ]]; then + echo "Error: requires -p " + exit_bad_cmd +else + if [[ "$python_version" == "file" ]]; then + cmd="$cmd $file_opt $env_dir/$file_name" + else + cmd="$cmd --python $python_version $file_opt $env_dir/$file_name" + fi +fi + +if [[ -n "$using_env" ]]; then + cmd="$cmd --using $using_env" +fi + +if [[ -n "$alias_name" ]]; then + cmd="$cmd --alias $alias_name" +fi + +echo "Resolving environment using $cmd" + +if $cmd; then + env_id=$( + sed -n '2p' "${script_output}" | cut -d' ' -f1-2 | tr ' ' : + ) + echo "Successfully resolved environment and got ${env_id}" +else + echo "Error: Failed to resolve using $cmd" + exit 2 +fi + +# Create the environment +cmd="$python_executable -m metaflow.cmd.main_cli environment --quiet --quiet-file-output ${script_output} create --force" +if [[ -n "$alias_name" ]]; then + cmd="$cmd $alias_name" +else + cmd="$cmd $env_id" +fi + +echo "Creating environment using $cmd" +if $cmd; then + python_binary=$(IFS=' ' read -r bin <"${script_output}" && echo "$bin") + echo "Successfully created environment with binary $python_binary" +else + echo "Error: Failed to create environment using $cmd" + exit 2 +fi + +# Verify using pip freeze to see if all the packages we want are actually there +freeze_output=$("$python_binary" -m pip list --format freeze) + +declare -A all_pkgs +while read -r line; do + split_line=$(echo "$line" | sed -e 's/\([^=]*\)==\(.*\)/\1 \2/') + read -r pkg_name pkg_version <<<"$split_line" + all_pkgs["$pkg_name"]="$pkg_version" +done <<<"$freeze_output" + +declare -a all_errors +for key in "${!check_versions[@]}"; do + version_check_str="${check_versions[$key]}" + min_version="${version_check_str%%,*}" + max_version="${version_check_str#*,}" + if [[ "$min_version" == "-1" ]]; then + if [[ -v all_pkgs["$key"] ]]; then + all_errors+=("Expected package $key to be absent but found") + fi + else + if [[ ! -v all_pkgs["$key"] ]]; then + all_errors+=("Expected package $key but not found") + else + cur_version="${all_pkgs["$key"]}" + smaller_cmp=$(compare_versions "$min_version" "$cur_version") + larger_cmp=$(compare_versions "$cur_version" "$max_version") + echo "For $key got min:$min_version; max:$max_version; cur:$cur_version" + if [[ "$smaller_cmp" -gt 1 ]]; then + all_errors+=("Expected $key to have mimimum version $min_version but found $cur_version") + fi + if [[ "$larger_cmp" -gt 1 ]]; then + all_errors+=("Expected $key to have maximum version $max_version but found $cur_version") + fi + fi + fi +done + +# Print some helpful information +echo "** Pip freeze output **" +echo "$freeze_output" +echo "" +echo "" +echo "** Environment info output **" +env_output=$("$python_executable" -m metaflow.cmd.main_cli environment show "$env_id") +echo "$env_output" + +if [[ "${#all_errors[@]}" -gt 0 ]]; then + echo "Found errors: " + for err in "${all_errors[@]}"; do + echo " $err" + done + exit 2 +fi diff --git a/tests/checks/channel-conda.yml.check b/tests/checks/channel-conda.yml.check new file mode 100644 index 0000000..c7faefd --- /dev/null +++ b/tests/checks/channel-conda.yml.check @@ -0,0 +1 @@ +comet-ml,3, diff --git a/tests/checks/conda-pkg-corner-case.txt.check b/tests/checks/conda-pkg-corner-case.txt.check new file mode 100644 index 0000000..74e7926 --- /dev/null +++ b/tests/checks/conda-pkg-corner-case.txt.check @@ -0,0 +1 @@ +transnetv2,1.0.0,1.0.0 diff --git a/tests/checks/git-repo-corner-case.txt.check b/tests/checks/git-repo-corner-case.txt.check new file mode 100644 index 0000000..72c7563 --- /dev/null +++ b/tests/checks/git-repo-corner-case.txt.check @@ -0,0 +1 @@ +requests-html,0.10.0,0.10.0 diff --git a/tests/checks/itsdangerous.txt.check b/tests/checks/itsdangerous.txt.check new file mode 100644 index 0000000..fab8675 --- /dev/null +++ b/tests/checks/itsdangerous.txt.check @@ -0,0 +1,2 @@ +requests,, +itsdangerous,, diff --git a/tests/checks/itsdangerous.yml.check b/tests/checks/itsdangerous.yml.check new file mode 100644 index 0000000..fab8675 --- /dev/null +++ b/tests/checks/itsdangerous.yml.check @@ -0,0 +1,2 @@ +requests,, +itsdangerous,, diff --git a/tests/checks/only-src.txt.check b/tests/checks/only-src.txt.check new file mode 100644 index 0000000..cf464b6 --- /dev/null +++ b/tests/checks/only-src.txt.check @@ -0,0 +1,2 @@ +matplotlib,, +outlier_detector,0.0.3,0.0.3 diff --git a/tests/checks/only-src.yml.check b/tests/checks/only-src.yml.check new file mode 100644 index 0000000..cf464b6 --- /dev/null +++ b/tests/checks/only-src.yml.check @@ -0,0 +1,2 @@ +matplotlib,, +outlier_detector,0.0.3,0.0.3 diff --git a/tests/checks/other-builds-corner-case.txt.check b/tests/checks/other-builds-corner-case.txt.check new file mode 100644 index 0000000..ca5fdda --- /dev/null +++ b/tests/checks/other-builds-corner-case.txt.check @@ -0,0 +1,2 @@ +foo,1.0.0,1.0.0 +outlier_detector,0.0.3,0.0.3 diff --git a/tests/checks/pip-version.txt.check b/tests/checks/pip-version.txt.check new file mode 100644 index 0000000..e6c160b --- /dev/null +++ b/tests/checks/pip-version.txt.check @@ -0,0 +1,4 @@ +importlib-resources,1,,3.7.* +importlib_resources,1,,3.8.* +importlib_resources,-1,,3.10.* +jsonschema,, diff --git a/tests/checks/pip-version.yml.check b/tests/checks/pip-version.yml.check new file mode 100644 index 0000000..e6c160b --- /dev/null +++ b/tests/checks/pip-version.yml.check @@ -0,0 +1,4 @@ +importlib-resources,1,,3.7.* +importlib_resources,1,,3.8.* +importlib_resources,-1,,3.10.* +jsonschema,, diff --git a/tests/checks/simple-conda.yml.check b/tests/checks/simple-conda.yml.check new file mode 100644 index 0000000..a46450a --- /dev/null +++ b/tests/checks/simple-conda.yml.check @@ -0,0 +1,3 @@ +pandas,1, +pip,, +requests,2.21, diff --git a/tests/checks/simple-pip.txt.check b/tests/checks/simple-pip.txt.check new file mode 100644 index 0000000..a46450a --- /dev/null +++ b/tests/checks/simple-pip.txt.check @@ -0,0 +1,3 @@ +pandas,1, +pip,, +requests,2.21, diff --git a/tests/environments/channel-conda.yml b/tests/environments/channel-conda.yml new file mode 100644 index 0000000..d66c3ec --- /dev/null +++ b/tests/environments/channel-conda.yml @@ -0,0 +1,4 @@ +dependencies: + - comet_ml::comet_ml = 3.32.0 + - pip: + - itsdangerous diff --git a/tests/environments/conda-pkg-corner-case.txt b/tests/environments/conda-pkg-corner-case.txt new file mode 100644 index 0000000..167532e --- /dev/null +++ b/tests/environments/conda-pkg-corner-case.txt @@ -0,0 +1,4 @@ +# A conda package in a pure pip env +--conda-pkg git-lfs +# Needs LFS to build +transnetv2 @ git+https://github.com/soCzech/TransNetV2.git#main diff --git a/tests/environments/git-repo-corner-case.txt b/tests/environments/git-repo-corner-case.txt new file mode 100644 index 0000000..5789124 --- /dev/null +++ b/tests/environments/git-repo-corner-case.txt @@ -0,0 +1,2 @@ +# GIT repo +requests-html @ git+https://github.com/psf/requests-html@v0.10.0 diff --git a/tests/environments/itsdangerous.txt b/tests/environments/itsdangerous.txt new file mode 100644 index 0000000..e163955 --- /dev/null +++ b/tests/environments/itsdangerous.txt @@ -0,0 +1 @@ +itsdangerous diff --git a/tests/environments/itsdangerous.yml b/tests/environments/itsdangerous.yml new file mode 100644 index 0000000..b1b8cd8 --- /dev/null +++ b/tests/environments/itsdangerous.yml @@ -0,0 +1,2 @@ +dependencies: + - itsdangerous diff --git a/tests/environments/only-src.txt b/tests/environments/only-src.txt new file mode 100644 index 0000000..7a54a39 --- /dev/null +++ b/tests/environments/only-src.txt @@ -0,0 +1,13 @@ +matplotlib +plotly +statsmodels +pmdarima +pydotplus +ipython +cerberus +gql +aiohttp +reportlab +graphing +# This one has only a source package available +outlier-detector==0.0.3 diff --git a/tests/environments/only-src.yml b/tests/environments/only-src.yml new file mode 100644 index 0000000..ccd08f1 --- /dev/null +++ b/tests/environments/only-src.yml @@ -0,0 +1,14 @@ +dependencies: + - matplotlib + - plotly + - statsmodels + - pmdarima + - pydotplus + - ipython + - cerberus + - gql + - aiohttp + - reportlab + - pip: + - graphing + - outlier-detector = 0.0.3 diff --git a/tests/environments/other-builds-corner-case.txt b/tests/environments/other-builds-corner-case.txt new file mode 100644 index 0000000..007c59b --- /dev/null +++ b/tests/environments/other-builds-corner-case.txt @@ -0,0 +1,4 @@ +# Source only distribution +outlier-detector==0.0.3 +# Local package +foo @ file:///tmp/build_foo_pkg diff --git a/tests/environments/pip-version.txt b/tests/environments/pip-version.txt new file mode 100644 index 0000000..7b8f015 --- /dev/null +++ b/tests/environments/pip-version.txt @@ -0,0 +1 @@ +jsonschema \ No newline at end of file diff --git a/tests/environments/pip-version.yml b/tests/environments/pip-version.yml new file mode 100644 index 0000000..d6481cc --- /dev/null +++ b/tests/environments/pip-version.yml @@ -0,0 +1,4 @@ +dependencies: + - pandas = >=1.0.0 + - pip: + - jsonschema = <= 4.21.1 diff --git a/tests/environments/simple-conda.yml b/tests/environments/simple-conda.yml new file mode 100644 index 0000000..4a6e0a7 --- /dev/null +++ b/tests/environments/simple-conda.yml @@ -0,0 +1,2 @@ +dependencies: + - pandas=>=1.0.0 diff --git a/tests/environments/simple-pip.txt b/tests/environments/simple-pip.txt new file mode 100644 index 0000000..4e15bfa --- /dev/null +++ b/tests/environments/simple-pip.txt @@ -0,0 +1,4 @@ +# This is a comment that should be ignored +# Checking if --pre works +--pre +pandas>=1.0.0 diff --git a/tests/foo_pkg/.gitignore b/tests/foo_pkg/.gitignore new file mode 100644 index 0000000..e81fe67 --- /dev/null +++ b/tests/foo_pkg/.gitignore @@ -0,0 +1,2 @@ +*.egg-info/ +build/ diff --git a/tests/foo_pkg/foo_pkg/__init__.py b/tests/foo_pkg/foo_pkg/__init__.py new file mode 100644 index 0000000..f301245 --- /dev/null +++ b/tests/foo_pkg/foo_pkg/__init__.py @@ -0,0 +1 @@ +print("Hello World!") diff --git a/tests/foo_pkg/setup.py b/tests/foo_pkg/setup.py new file mode 100644 index 0000000..3f130e1 --- /dev/null +++ b/tests/foo_pkg/setup.py @@ -0,0 +1,10 @@ +from setuptools import setup + +setup( + name="foo", + version="1.0.0", + author="Romain", + description='A simple Python package that prints "Hello, World!"', + packages=["foo_pkg"], + install_requires=[], +) diff --git a/tests/test_env_create.py b/tests/test_env_create.py new file mode 100644 index 0000000..27740b7 --- /dev/null +++ b/tests/test_env_create.py @@ -0,0 +1,144 @@ +import os +import sys +import uuid +import pytest +import shutil + +import sh + + +my_dir = os.path.dirname(os.path.abspath(__file__)) + +all_tests = [] + +trans_table = str.maketrans(".-", "__") + +try: + os.symlink(os.path.join(my_dir, "foo_pkg"), "/tmp/build_foo_pkg") +except FileExistsError: + pass + +with os.scandir(os.path.join(my_dir, "environments")) as it: + for entry in it: + if entry.is_file() and ( + entry.name.endswith(".yml") or entry.name.endswith(".txt") + ): + if entry.name.endswith(".yml"): + flag = "-f" + else: + flag = "-r" + if entry.name.startswith("no_python_"): + python_versions = ["file"] + elif entry.name.startswith("pip-version"): + python_versions = ["3.8.*", "3.10.*"] + else: + python_versions = ["3.8.*"] + for pv in python_versions: + all_tests.extend( + [ + (pv, flag, entry.name, None), + ( + pv, + flag, + entry.name, + "metaflow/tests/env_resolution/%s" + % entry.name.translate(trans_table), + ), + ] + ) + + +@pytest.mark.parametrize( + argnames=["python_version", "file_type", "file_name", "alias"], argvalues=all_tests +) +def test_resolve_and_check_env(capsys, python_version, file_type, file_name, alias): + cwd = os.getcwd() + os.chdir(my_dir) + try: + conda_rand = str(uuid.uuid4()) + + env_dict = dict(os.environ) + env_dict["METAFLOW_CONDA_ENVS_DIRNAME"] = "testing/envs_%s" % conda_rand + env_dict["METAFLOW_CONDA_PACKAGES_DIRNAME"] = "testing/packages_%s" % conda_rand + env_dict["METAFLOW_CONDA_MAGIC_FILE_V2"] = "condav2-%s.cnd" % conda_rand + env_dict["METAFLOW_CONDA_LOCK_TIMEOUT"] = ( + "7200" # Increase to make sure we resolve everything + ) + env_dict["METAFLOW_DEBUG_CONDA"] = "1" + env_dict["CONDA_ENVS_DIRS"] = "/tmp/mfcondaenvs-%s" % conda_rand + env_dict["CONDA_PKGS_DIRS"] = "/tmp/mfcondapkgs-%s" % conda_rand + check_command = sh.Command("./check_env.sh").bake(["-e", sys.executable]) + metaflow_command = sh.Command(sys.executable).bake( + ["-m", "metaflow.cmd.main_cli"] + ) + if alias: + check_command( + "-p", + python_version, + file_type, + file_name, + "-a", + alias, + _env=env_dict, + _truncate_exc=False, + ) + # We try using the environment by adding a tiny package. + if file_name.endswith(".yml"): + other_file = "./environments/itsdangerous.yml" + else: + other_file = "./environments/itsdangerous.txt" + if file_name.startswith("itsdangerous"): + with pytest.raises(sh.ErrorReturnCode): + # This should fail because the environment already exists. + metaflow_command( + "environment", + "resolve", + "--using", + alias, + file_type, + other_file, + _env=env_dict, + _truncate_exc=False, + ) + else: + metaflow_command( + "environment", + "resolve", + "--using", + alias, + file_type, + other_file, + _env=env_dict, + _truncate_exc=False, + ) + else: + check_command( + "-p", + python_version, + file_type, + file_name, + _env=env_dict, + _truncate_exc=False, + ) + finally: + # Runners run out of space so clear out all the packages and environments we created/downloaded + shutil.rmtree( + os.path.join( + os.environ["METAFLOW_DATASTORE_SYSROOT_LOCAL"], + "conda_env", + "testing", + "envs_%s" % conda_rand, + ) + ) + shutil.rmtree( + os.path.join( + os.environ["METAFLOW_DATASTORE_SYSROOT_LOCAL"], + "conda_env", + "testing", + "packages_%s" % conda_rand, + ) + ) + shutil.rmtree(env_dict["CONDA_ENVS_DIRS"]) + shutil.rmtree(env_dict["CONDA_PKGS_DIRS"]) + + os.chdir(cwd)