diff --git a/datalad_next/annexremotes/tests/test_archivist.py b/datalad_next/annexremotes/tests/test_archivist.py index 23a1c5e4..f4157db6 100644 --- a/datalad_next/annexremotes/tests/test_archivist.py +++ b/datalad_next/annexremotes/tests/test_archivist.py @@ -7,7 +7,7 @@ from .. import UnsupportedRequest from ..archivist import ArchivistRemote from datalad_next.datasets import Dataset -from datalad_next.runners import CommandError +from datalad_core.runners import CommandError from datalad_next.tests import assert_result_count diff --git a/datalad_next/constraints/git.py b/datalad_next/constraints/git.py index 46657c83..2ba16e6d 100644 --- a/datalad_next/constraints/git.py +++ b/datalad_next/constraints/git.py @@ -1,7 +1,7 @@ """Constraints for Git-related concepts and parameters""" from __future__ import annotations -from datalad_next.runners import ( +from datalad_core.runners import ( CommandError, call_git, call_git_oneline, diff --git a/datalad_next/gitremotes/datalad_annex.py b/datalad_next/gitremotes/datalad_annex.py index 5c267c18..42ce084f 100755 --- a/datalad_next/gitremotes/datalad_annex.py +++ b/datalad_next/gitremotes/datalad_annex.py @@ -212,7 +212,7 @@ LegacyGitRepo as GitRepo, ) from datalad_next.exceptions import CapturedException -from datalad_next.runners import ( +from datalad_core.runners import ( CommandError, call_git, call_git_oneline, diff --git a/datalad_next/iter_collections/annexworktree.py b/datalad_next/iter_collections/annexworktree.py index 4e950bb9..e534156a 100644 --- a/datalad_next/iter_collections/annexworktree.py +++ b/datalad_next/iter_collections/annexworktree.py @@ -29,7 +29,7 @@ from datalad_next.consts import on_windows from datalad_next.repo_utils import has_initialized_annex -from datalad_next.runners import iter_git_subproc +from datalad_core.runners import iter_git_subproc from .gitworktree import ( GitWorktreeItem, diff --git a/datalad_next/iter_collections/gitdiff.py b/datalad_next/iter_collections/gitdiff.py index d4e11abb..f5be0069 100644 --- a/datalad_next/iter_collections/gitdiff.py +++ b/datalad_next/iter_collections/gitdiff.py @@ -25,13 +25,11 @@ from datalad_next.consts import PRE_INIT_COMMIT_SHA from datasalad.gitpathspec import GitPathSpecs -from datalad_next.runners import ( +from datalad_core.runners import ( CommandError, - iter_git_subproc, -) -from datalad_next.runners import ( call_git, call_git_oneline, + iter_git_subproc, ) from .gittree import ( diff --git a/datalad_next/iter_collections/gitstatus.py b/datalad_next/iter_collections/gitstatus.py index 4abafb37..ea10733f 100644 --- a/datalad_next/iter_collections/gitstatus.py +++ b/datalad_next/iter_collections/gitstatus.py @@ -12,7 +12,7 @@ ) from datalad_next.consts import PRE_INIT_COMMIT_SHA -from datalad_next.runners import ( +from datalad_core.runners import ( call_git_lines, ) from datalad_next.repo_utils import ( diff --git a/datalad_next/iter_collections/gittree.py b/datalad_next/iter_collections/gittree.py index 83a85582..85eb98eb 100644 --- a/datalad_next/iter_collections/gittree.py +++ b/datalad_next/iter_collections/gittree.py @@ -21,7 +21,7 @@ itemize, ) -from datalad_next.runners import iter_git_subproc +from datalad_core.runners import iter_git_subproc from .utils import PathBasedItem diff --git a/datalad_next/iter_collections/gitworktree.py b/datalad_next/iter_collections/gitworktree.py index ede18c5a..893b7681 100644 --- a/datalad_next/iter_collections/gitworktree.py +++ b/datalad_next/iter_collections/gitworktree.py @@ -23,7 +23,7 @@ itemize, ) -from datalad_next.runners import iter_git_subproc +from datalad_core.runners import iter_git_subproc from datasalad.gitpathspec import GitPathSpecs from .utils import ( FileSystemItem, diff --git a/datalad_next/iter_collections/tests/test_itergitstatus.py b/datalad_next/iter_collections/tests/test_itergitstatus.py index fca05642..fc165c55 100644 --- a/datalad_next/iter_collections/tests/test_itergitstatus.py +++ b/datalad_next/iter_collections/tests/test_itergitstatus.py @@ -2,7 +2,7 @@ import pytest from datalad_next.datasets import Dataset -from datalad_next.runners import ( +from datalad_core.runners import ( call_git_success, ) diff --git a/datalad_next/patches/replace_sshremoteio.py b/datalad_next/patches/replace_sshremoteio.py index 6e47b388..1a73ca83 100644 --- a/datalad_next/patches/replace_sshremoteio.py +++ b/datalad_next/patches/replace_sshremoteio.py @@ -50,7 +50,7 @@ from datalad_next.exceptions import CapturedException from datalad_next.patches import apply_patch -from datalad_next.runners import CommandError +from datalad_core.runners import CommandError from datalad_next.shell import ( FixedLengthResponseGeneratorPosix, shell, diff --git a/datalad_next/repo_utils/annex.py b/datalad_next/repo_utils/annex.py index f08f969a..07616d11 100644 --- a/datalad_next/repo_utils/annex.py +++ b/datalad_next/repo_utils/annex.py @@ -1,6 +1,6 @@ from pathlib import Path -from datalad_next.runners import call_git_success +from datalad_core.runners import call_git_success def has_initialized_annex( diff --git a/datalad_next/repo_utils/tests/test_head.py b/datalad_next/repo_utils/tests/test_head.py index 5530f447..9163ce1c 100644 --- a/datalad_next/repo_utils/tests/test_head.py +++ b/datalad_next/repo_utils/tests/test_head.py @@ -1,6 +1,6 @@ import pytest -from datalad_next.runners import call_git +from datalad_core.runners import call_git from .. import get_worktree_head diff --git a/datalad_next/repo_utils/worktree.py b/datalad_next/repo_utils/worktree.py index e5fb7426..2222fdf3 100644 --- a/datalad_next/repo_utils/worktree.py +++ b/datalad_next/repo_utils/worktree.py @@ -3,7 +3,7 @@ from pathlib import Path from datalad_next.exceptions import CapturedException -from datalad_next.runners import ( +from datalad_core.runners import ( CommandError, call_git_lines, ) diff --git a/datalad_next/runners/__init__.py b/datalad_next/runners/__init__.py index 6e6ab831..c2658b29 100644 --- a/datalad_next/runners/__init__.py +++ b/datalad_next/runners/__init__.py @@ -1,5 +1,10 @@ """Execution of subprocesses +.. deprecated:: 1.6 + This module is deprecated. It has been partially migrated to the + `datalad-core library `__. Imports + should be adjusted to ``datalad_core.runners``. + This module provides all relevant components for subprocess execution. The main work horse is :func:`~datalad_next.runners.iter_subproc`, a context manager that enables interaction with a subprocess in the form of an iterable @@ -63,59 +68,95 @@ StdOutErrCapture """ -from .iter_subproc import ( - iter_subproc, -) -from .git import ( - call_git, - call_git_lines, - call_git_oneline, - call_git_success, - iter_git_subproc, +__all__ = [ + 'call_git', + 'call_git_lines', + 'call_git_oneline', + 'call_git_success', + 'iter_git_subproc', + 'iter_subproc', + 'CommandError', + 'GitRunner', + 'KillOutput', + 'NoCapture', + 'Protocol', + 'Runner', + 'StdErrCapture', + 'StdOutCapture', + 'StdOutErrCapture', + 'STDERR_FILENO', + 'STDOUT_FILENO', + 'ThreadedRunner', + 'LineSplitter', + 'GeneratorMixIn', + 'NoCaptureGeneratorProtocol', + 'StdOutCaptureGeneratorProtocol', + 'DEVNULL', +] + +import warnings + +# TODO: REMOVE FOR V2.0 +from subprocess import ( + DEVNULL, ) # runners -# TODO REMOVE FOR V2.0 -from datalad.runner import ( - GitRunner, - Runner, -) -# TODO REMOVE FOR V2.0 -from datalad.runner.nonasyncrunner import ThreadedRunner +# TODO: REMOVE FOR V2.0 # protocols -# TODO REMOVE FOR V2.0 +# TODO: REMOVE FOR V2.0 from datalad.runner import ( + GitRunner, KillOutput, NoCapture, Protocol, - StdOutCapture, + Runner, StdErrCapture, + StdOutCapture, StdOutErrCapture, ) -# TODO REMOVE FOR V2.0 -from datalad.runner.protocol import GeneratorMixIn -# TODO REMOVE FOR V2.0 -from .protocols import ( - NoCaptureGeneratorProtocol, - StdOutCaptureGeneratorProtocol, -) -# exceptions -# The following import supports legacy code that uses `CommandError` from this -# module. If you are writing new code, please use `CommandError` from -# `datalad.support.exceptions`. We intend to remove this import in the future. -from datalad_next.exceptions import CommandError +# TODO: REMOVE FOR V2.0 # utilities -# TODO REMOVE FOR V2.0 +# TODO: REMOVE FOR V2.0 from datalad.runner.nonasyncrunner import ( - STDOUT_FILENO, STDERR_FILENO, + STDOUT_FILENO, + ThreadedRunner, ) -# TODO REMOVE FOR V2.0 + +# TODO: REMOVE FOR V2.0 +from datalad.runner.protocol import GeneratorMixIn + +# TODO: REMOVE FOR V2.0 from datalad.runner.utils import ( LineSplitter, ) -# TODO REMOVE FOR V2.0 -from subprocess import ( - DEVNULL, +from datalad_core.runners import ( + call_git, + call_git_lines, + call_git_oneline, + call_git_success, + iter_git_subproc, + iter_subproc, +) + +# exceptions +# The following import supports legacy code that uses `CommandError` from this +# module. If you are writing new code, please use `CommandError` from +# `datalad_core.runners`. We intend to remove this import in the future. +from datalad_next.exceptions import CommandError + +# TODO: REMOVE FOR V2.0 +from .protocols import ( + NoCaptureGeneratorProtocol, + StdOutCaptureGeneratorProtocol, +) + +warnings.warn( + '`datalad_next.runners` has been partially migrated to the ' + 'datalad-core library, ' + 'check docs, and adjust imports to `datalad_core.runners`', + DeprecationWarning, + stacklevel=1, ) diff --git a/datalad_next/runners/git.py b/datalad_next/runners/git.py deleted file mode 100644 index 9dcdf2a7..00000000 --- a/datalad_next/runners/git.py +++ /dev/null @@ -1,212 +0,0 @@ -from __future__ import annotations - -import os -from pathlib import Path -import subprocess - -from datalad_next.exceptions import CapturedException - -from .iter_subproc import ( - CommandError, - iter_subproc, -) - - -def _call_git( - args: list[str], - *, - capture_output: bool = False, - cwd: Path | None = None, - check: bool = False, - text: bool | None = None, - input: str | bytes | None = None, - force_c_locale: bool = False, -) -> subprocess.CompletedProcess: - """Wrapper around ``subprocess.run`` for calling Git command - - ``args`` is a list of argument for the Git command. This list must not - contain the Git executable itself. It will be prepended (unconditionally) - to the arguments before passing them on. - - If ``force_c_locale`` is ``True`` the environment of the Git process - is altered to ensure output according to the C locale. This is useful - when output has to be processed in a locale invariant fashion. - - All other argument are pass on to ``subprocess.run()`` verbatim. - """ - env = None - if force_c_locale: - env = dict(os.environ, LC_ALL='C') - - # make configurable - git_executable = 'git' - cmd = [git_executable, *args] - try: - return subprocess.run( - cmd, - capture_output=capture_output, - cwd=cwd, - check=check, - text=text, - input=input, - env=env, - ) - except subprocess.CalledProcessError as e: - # TODO we could support post-error forensics, but some client - # might call this knowing that it could fail, and may not - # appreciate the slow-down. Add option `expect_fail=False`? - # - # normalize exception to datalad-wide standard - raise CommandError( - cmd=cmd, - code=e.returncode, - stdout=e.stdout, - stderr=e.stderr, - cwd=cwd, - ) from e - - -def call_git( - args: list[str], - *, - cwd: Path | None = None, - force_c_locale: bool = False, -) -> None: - """Call Git with no output capture, raises on non-zero exit. - - If ``cwd`` is not None, the function changes the working directory to - ``cwd`` before executing the command. - - If ``force_c_locale`` is ``True`` the environment of the Git process - is altered to ensure output according to the C locale. This is useful - when output has to be processed in a locale invariant fashion. - """ - _call_git( - args, - capture_output=False, - cwd=cwd, - check=True, - force_c_locale=force_c_locale, - ) - - -def call_git_success( - args: list[str], - *, - cwd: Path | None = None, - capture_output: bool = False, -) -> bool: - """Call Git and report success or failure of the command - - ``args`` is a list of arguments for the Git command. This list must not - contain the Git executable itself. It will be prepended (unconditionally) - to the arguments before passing them on. - - If ``cwd`` is not None, the function changes the working directory to - ``cwd`` before executing the command. - - If ``capture_output`` is ``True``, process output is captured, but not - returned. By default process output is not captured. - """ - try: - _call_git( - args, - capture_output=capture_output, - cwd=cwd, - check=True, - ) - except CommandError as e: - CapturedException(e) - return False - return True - - -def call_git_lines( - args: list[str], - *, - cwd: Path | None = None, - input: str | None = None, - force_c_locale: bool = False, -) -> list[str]: - """Call Git for any (small) number of lines of output - - ``args`` is a list of arguments for the Git command. This list must not - contain the Git executable itself. It will be prepended (unconditionally) - to the arguments before passing them on. - - If ``cwd`` is not None, the function changes the working directory to - ``cwd`` before executing the command. - - If ``input`` is not None, the argument becomes the subprocess’s stdin. - This is intended for small-scale inputs. For call that require processing - large inputs, ``iter_git_subproc()`` is to be preferred. - - If ``force_c_locale`` is ``True`` the environment of the Git process - is altered to ensure output according to the C locale. This is useful - when output has to be processed in a locale invariant fashion. - - Raises - ------ - CommandError if the call exits with a non-zero status. - """ - res = _call_git( - args, - capture_output=True, - cwd=cwd, - check=True, - text=True, - input=input, - force_c_locale=force_c_locale, - ) - return res.stdout.splitlines() - - -def call_git_oneline( - args: list[str], - *, - cwd: Path | None = None, - input: str | None = None, - force_c_locale: bool = False, -) -> str: - """Call Git for a single line of output - - If ``cwd`` is not None, the function changes the working directory to - ``cwd`` before executing the command. - - If ``input`` is not None, the argument becomes the subprocess’s stdin. - This is intended for small-scale inputs. For call that require processing - large inputs, ``iter_git_subproc()`` is to be preferred. - - If ``force_c_locale`` is ``True`` the environment of the Git process - is altered to ensure output according to the C locale. This is useful - when output has to be processed in a locale invariant fashion. - - Raises - ------ - CommandError if the call exits with a non-zero status. - AssertionError if there is more than one line of output. - """ - lines = call_git_lines(args, cwd=cwd, input=input, - force_c_locale=force_c_locale) - if len(lines) > 1: - raise AssertionError( - f"Expected Git {args} to return a single line, but got {lines}" - ) - return lines[0] - - -def iter_git_subproc( - args: list[str], - **kwargs -): - """``iter_subproc()`` wrapper for calling Git commands - - All argument semantics are identical to those of ``iter_subproc()``, - except that ``args`` must not contain the Git binary, but need to be - exclusively arguments to it. The respective `git` command/binary is - automatically added internally. - """ - cmd = ['git'] - cmd.extend(args) - - return iter_subproc(cmd, **kwargs) diff --git a/datalad_next/runners/iter_subproc.py b/datalad_next/runners/iter_subproc.py deleted file mode 100644 index 24154773..00000000 --- a/datalad_next/runners/iter_subproc.py +++ /dev/null @@ -1,122 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from typing import ( - Iterable, - List, -) -from datasalad.runners import CommandError as SaladCommandError -from datasalad.iterable_subprocess import iterable_subprocess - -from datalad_next.exceptions import CommandError -from datalad_next.consts import COPY_BUFSIZE - -__all__ = ['iter_subproc'] - - -def iter_subproc( - args: List[str], - *, - input: Iterable[bytes] | None = None, - chunk_size: int = COPY_BUFSIZE, - cwd: Path | None = None, - bufsize: int = -1, -): - """Context manager to communicate with a subprocess using iterables - - .. deprecated:: 1.6 - - Use ``datasalad.runners.iter_proc`` instead. Renamed ``input`` argument - to ``inputs``, and raises datalad's ``CommandError``. - - This offers a higher level interface to subprocesses than Python's - built-in ``subprocess`` module. It allows a subprocess to be naturally - placed in a chain of iterables as part of a data processing pipeline. - It is also helpful when data won't fit in memory and has to be streamed. - - This is a convenience wrapper around ``datalad_next.iterable_subprocess``, - which itself is a slightly modified (for use on Windows) fork of - https://github.com/uktrade/iterable-subprocess, written by - Michal Charemza. - - This function provides a context manager. - On entering the context, the subprocess is started, the thread to read - from standard error is started, the thread to populate subprocess - input is started. - When running, the standard input thread iterates over the input, - passing chunks to the process, while the standard error thread - fetches the error output, and while the main thread iterates over - the process's output from client code in the context. - - On context exit, the main thread closes the process's standard output, - waits for the standard input thread to exit, waits for the standard error - thread to exit, and wait for the process to exit. If the process exited - with a non-zero return code, a ``CommandError`` is raised, - containing the process's return code. - - If the context is exited due to an exception that was raised in the - context, the main thread terminates the process via ``Popen.terminate()``, - closes the process's standard output, waits for the standard input - thread to exit, waits for the standard error thread to exit, waits - for the process to exit, and re-raises the exception. - - Note, if an exception is raised in the context, this exception will bubble - up to the main thread. That means no ``CommandError`` will - be raised if the subprocess exited with a non-zero return code. - To access the return code in case of an exception inside the context, - use the ``code``-attribute of the ``as``-variable. - This object will always contain the return code of the subprocess. - For example, the following code will raise a ``StopIteration``-exception - in the context (by repeatedly using :func:`next`). The subprocess - will exit with ``2`` due to the illegal option ``-@``, and no - ``CommandError`` is raised. The return code is read from - the variable ``ls_stdout`` - - .. code-block:: python - - >>> from datalad_next.runners import iter_subproc - >>> try: - ... with iter_subproc(['ls', '-@']) as ls_stdout: - ... while True: - ... next(ls_stdout) - ... except Exception as e: - ... print(repr(e), ls_stdout.returncode) - StopIteration() 2 - - - Parameters - ---------- - args: list - Sequence of program arguments to be passed to ``subprocess.Popen``. - input: iterable, optional - If given, chunks of ``bytes`` to be written, iteratively, to the - subprocess's ``stdin``. - chunk_size: int, optional - Size of chunks to read from the subprocess's stdout/stderr in bytes. - cwd: Path - Working directory for the subprocess, passed to ``subprocess.Popen``. - bufsize: int, optional - Buffer size to use for the subprocess's ``stdin``, ``stdout``, and - ``stderr``. See ``subprocess.Popen`` for details. - - Returns - ------- - contextmanager - """ - try: - return iterable_subprocess( - args, - tuple() if input is None else input, - chunk_size=chunk_size, - cwd=cwd, - bufsize=bufsize, - ) - except SaladCommandError as e: - raise CommandError( - cmd=e.cmd, - msg=e.msg, - code=e.returncode, - stdout=e.stdout, - stderr=e.stderr, - cwd=e.cwd, - ) from e diff --git a/datalad_next/tests/fixtures.py b/datalad_next/tests/fixtures.py index 8edc5f22..50bee141 100644 --- a/datalad_next/tests/fixtures.py +++ b/datalad_next/tests/fixtures.py @@ -11,7 +11,7 @@ from urllib.request import urlopen from datalad_next.datasets import Dataset -from datalad_next.runners import ( +from datalad_core.runners import ( call_git_lines, call_git_success, ) diff --git a/datalad_next/url_operations/ssh.py b/datalad_next/url_operations/ssh.py index 47410747..8a2d33c7 100644 --- a/datalad_next/url_operations/ssh.py +++ b/datalad_next/url_operations/ssh.py @@ -31,7 +31,7 @@ from datalad_next.consts import COPY_BUFSIZE from datalad_next.config import ConfigManager -from datalad_next.runners import CommandError +from datalad_core.runners import CommandError from datalad_next.shell import ( FixedLengthResponseGeneratorPosix, ShellCommandExecutor, diff --git a/docs/source/index.rst b/docs/source/index.rst index 435f2bd6..ad9f1944 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -72,11 +72,11 @@ as stable as possible. This includes signatures and return value behavior. As an example:: - from datalad_next.runners import iter_git_subproc + from datalad_next.repo_utils import get_worktree_head imports a part of the public API, but:: - from datalad_next.runners.git import iter_git_subproc + from datalad_next.repo_utils.worktree import get_worktree_head does not.