From 9512391001b917b19edba9a80817ac35fb22a301 Mon Sep 17 00:00:00 2001 From: Emery Berger Date: Sat, 16 Dec 2023 17:09:06 -0800 Subject: [PATCH] Refactoring Python redirection (#741) * Use OS path instead of hard-coded slash. * Factored out Python redirection logic. * Avoid overwriting original Python executable. --- scalene/redirect_python.py | 53 +++++++++++++++++++++++++++++++++++ scalene/scalene_profiler.py | 53 ++++------------------------------- scalene/scalene_statistics.py | 2 +- 3 files changed, 60 insertions(+), 48 deletions(-) create mode 100644 scalene/redirect_python.py diff --git a/scalene/redirect_python.py b/scalene/redirect_python.py new file mode 100644 index 000000000..6f6ba71c1 --- /dev/null +++ b/scalene/redirect_python.py @@ -0,0 +1,53 @@ +import os +import pathlib +import stat +import sys + +def redirect_python(preface: str, cmdline: str, python_alias_dir: pathlib.Path) -> None: + # Likely names for the Python interpreter. + base_python_extension = ".exe" if sys.platform == "win32" else "" + all_python_names = [ + "python" + base_python_extension, + "python" + str(sys.version_info.major) + base_python_extension, + "python" + str(sys.version_info.major) + "." + str(sys.version_info.minor) + base_python_extension + ] + # if sys.platform == "win32": + # base_python_name = re.sub(r'\.exe$', '', os.path.basename(sys.executable)) + # else: + # base_python_name = sys.executable + + # Don't show commands on Windows; regular shebang for + # shell scripts on Linux/OS X + shebang = "@echo off" if sys.platform == "win32" else "#!/bin/bash" + # Get all arguments, platform specific + # all_args = "%* & exit 0" if sys.platform == "win32" else '"$@"' + all_args = "%*" if sys.platform == "win32" else '"$@"' + + payload = f"""{shebang} +{preface} {sys.executable} -m scalene {cmdline} {all_args} +""" + + # Now create all the files. + for name in all_python_names: + fname = os.path.join(python_alias_dir, name) + if sys.platform == "win32": + fname = re.sub(r'\.exe$', '.bat', fname) + with open(fname, "w") as file: + file.write(payload) + os.chmod(fname, stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR) + + # Finally, insert this directory into the path. + sys.path.insert(0, str(python_alias_dir)) + os.environ["PATH"] = ( + str(python_alias_dir) + + os.pathsep + + os.environ["PATH"] + ) + # Force the executable (if anyone invokes it later) to point to one of our aliases. + sys.executable = os.path.join( + python_alias_dir, + all_python_names[0], + ) + if sys.platform == "win32" and sys.executable.endswith(".exe"): + sys.executable = re.sub(r'\.exe$', '.bat', sys.executable) + diff --git a/scalene/scalene_profiler.py b/scalene/scalene_profiler.py index a3642bde3..2b76f21bc 100644 --- a/scalene/scalene_profiler.py +++ b/scalene/scalene_profiler.py @@ -43,6 +43,7 @@ from scalene.get_module_details import _get_module_details from scalene.find_browser import find_browser +from scalene.redirect_python import redirect_python from collections import defaultdict from importlib.abc import SourceLoader @@ -201,19 +202,6 @@ def get_original_lock() -> threading.Lock: """Return the true lock, which we shim in replacement_lock.py.""" return Scalene.__original_lock() - # Likely names for the Python interpreter. - __all_python_names = [ - "python", - "python" + str(sys.version_info.major), - "python" + str(sys.version_info.major) + "." + str(sys.version_info.minor), - os.path.basename(sys.executable), - os.path.basename(sys.executable) + str(sys.version_info.major), - os.path.basename(sys.executable) - + str(sys.version_info.major) - + "." - + str(sys.version_info.minor), - ] - # when did we last receive a signal? __last_signal_time_virtual: float = 0 __last_signal_time_wallclock: float = 0 @@ -664,12 +652,12 @@ def __init__( else: # Parent process. - Scalene.__python_alias_dir = pathlib.Path( - tempfile.mkdtemp(prefix="scalene") - ) # Create a temporary directory to hold aliases to the Python # executable, so scalene can handle multiple processes; each # one is a shell script that redirects to Scalene. + Scalene.__python_alias_dir = pathlib.Path( + tempfile.mkdtemp(prefix="scalene") + ) Scalene.__pid = 0 cmdline = "" # Pass along commands from the invoking command line. @@ -702,40 +690,11 @@ def __init__( "=".join((k, str(v))) for (k, v) in environ.items() ) - # Don't show commands on Windows; regular shebang for - # shell scripts on Linux/OS X - shebang = "@echo off" if sys.platform == "win32" else "#!/bin/bash" - executable = sys.executable # Add the --pid field so we can propagate it to the child. cmdline += f" --pid={os.getpid()} ---" - # Get all arguments, platform specific - all_args = "%* & exit 0" if sys.platform == "win32" else '"$@"' - payload = f"""{shebang} -{preface} {executable} -m scalene {cmdline} {all_args} -""" - # Now create all the files. - for name in Scalene.__all_python_names: - fname = os.path.join(Scalene.__python_alias_dir, name) - if sys.platform == "win32": - fname = re.sub(r'\.exe$', '.bat', fname) - with open(fname, "w") as file: - file.write(payload) - os.chmod(fname, stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR) - # Finally, insert this directory into the path. - sys.path.insert(0, str(Scalene.__python_alias_dir)) - os.environ["PATH"] = ( - str(Scalene.__python_alias_dir) - + os.pathsep - + os.environ["PATH"] - ) - # Force the executable (if anyone invokes it later) to point to one of our aliases. - sys.executable = os.path.join( - Scalene.__python_alias_dir, - Scalene.__all_python_names[0], - ) - if sys.platform == "win32" and sys.executable.endswith(".exe"): - sys.executable = re.sub(r'\.exe$', '.bat', sys.executable) + redirect_python(preface, cmdline, Scalene.__python_alias_dir) + # Register the exit handler to run when the program terminates or we quit. atexit.register(Scalene.exit_handler) diff --git a/scalene/scalene_statistics.py b/scalene/scalene_statistics.py index 6feff6db4..0f9668344 100644 --- a/scalene/scalene_statistics.py +++ b/scalene/scalene_statistics.py @@ -395,7 +395,7 @@ def increment_core_utilization( def merge_stats(self, the_dir_name: pathlib.Path) -> None: """Merge all statistics in a given directory.""" the_dir = pathlib.Path(the_dir_name) - for f in list(the_dir.glob("**/scalene*")): + for f in list(the_dir.glob(os.path.join("**", "scalene*"))): # Skip empty files. if os.path.getsize(f) == 0: continue