MyPy clean. Fixed Pydantic annotations. Removed currently unused RDP code for now.

emeryberger · emeryberger · commit 060d4905edf5 · 2024-12-29T12:24:48.000-05:00
diff --git a/mypy.ini b/mypy.ini
@@ -1,6 +1,7 @@
 [mypy]
 scripts_are_modules = True
 show_traceback = True
+plugins = pydantic.mypy
 
 # Options to make the checking stricter.
 check_untyped_defs = True
diff --git a/scalene/scalene_analysis.py b/scalene/scalene_analysis.py
@@ -207,7 +207,7 @@ def walk(
     @staticmethod
     def strip_magic_line(source: str) -> str:
         try:
-            from IPython import get_ipython # type: ignore
+            from IPython import get_ipython
             get_ipython()
             # The above line will fail if not running in a notebook,
             # in which case we return the original source unchanged.
diff --git a/scalene/scalene_json.py b/scalene/scalene_json.py
@@ -7,14 +7,13 @@
 from enum import Enum
 from operator import itemgetter
 from pathlib import Path
-from pydantic import BaseModel, Field, NonNegativeFloat, NonNegativeInt, PositiveInt, StrictBool, ValidationError, confloat, model_validator
+from pydantic import BaseModel, Field, NonNegativeFloat, NonNegativeInt, PositiveInt, StrictBool, ValidationError, model_validator
 from typing import Any, Callable, Dict, List, Optional
 
 from scalene.scalene_leak_analysis import ScaleneLeakAnalysis
 from scalene.scalene_statistics import Filename, LineNumber, ScaleneStatistics
 from scalene.scalene_analysis import ScaleneAnalysis
 
-import numpy as np
 
 class GPUDevice(str, Enum):
     nvidia = "GPU"
@@ -23,26 +22,26 @@ class GPUDevice(str, Enum):
 
 class FunctionDetail(BaseModel):
     line: str
-    lineno: PositiveInt
+    lineno: LineNumber
     memory_samples: List[List[Any]]
     n_avg_mb: NonNegativeFloat
     n_copy_mb_s: NonNegativeFloat
-    n_core_utilization: float = Field(confloat(ge=0, le=1))
-    n_cpu_percent_c: float = Field(confloat(ge=0, le=100))
-    n_cpu_percent_python: float = Field(confloat(ge=0, le=100))
+    n_core_utilization : float = Field(..., ge=0, le=1)
+    n_cpu_percent_c: float = Field(..., ge=0, le=100)
+    n_cpu_percent_python: float = Field(..., ge=0, le=100)
     n_gpu_avg_memory_mb: NonNegativeFloat
     n_gpu_peak_memory_mb: NonNegativeFloat
-    n_gpu_percent: float = Field(confloat(ge=0, le=100))
+    n_gpu_percent: float = Field(..., ge=0, le=100)
     n_growth_mb: NonNegativeFloat
     n_peak_mb: NonNegativeFloat
     n_malloc_mb: NonNegativeFloat
     n_mallocs: NonNegativeInt
-    n_python_fraction: float = Field(confloat(ge=0, le=1))
-    n_sys_percent: float = Field(confloat(ge=0, le=100))
-    n_usage_fraction: float = Field(confloat(ge=0, le=1))
+    n_python_fraction: float = Field(..., ge=0, le=1)
+    n_sys_percent: float = Field(..., ge=0, le=100)
+    n_usage_fraction: float = Field(..., ge=0, le=1)
 
     @model_validator(mode="after")
-    def check_cpu_percentages(cls, values):
+    def check_cpu_percentages(cls, values: Any) -> Any:
         total_cpu_usage = math.floor(
             values.n_cpu_percent_c
             + values.n_cpu_percent_python
@@ -56,15 +55,15 @@ def check_cpu_percentages(cls, values):
 
 
     @model_validator(mode="after")
-    def check_gpu_memory(cls, values):
+    def check_gpu_memory(cls, values: Any) -> Any:
         if values.n_gpu_avg_memory_mb > values.n_gpu_peak_memory_mb:
             raise ValueError(
                 "n_gpu_avg_memory_mb must be less than or equal to n_gpu_peak_memory_mb"
             )
         return values
 
     @model_validator(mode="after")
-    def check_cpu_memory(cls, values):
+    def check_cpu_memory(cls, values: Any) -> Any:
         if values.n_avg_mb > values.n_peak_mb:
             raise ValueError(
                 "n_avg_mb must be less than or equal to n_peak_mb"
@@ -159,79 +158,18 @@ def __init__(self) -> None:
         self.gpu = False
         self.gpu_device = ""
 
-    def rdp(self, points, epsilon):
-        """
-        Ramer-Douglas-Peucker algorithm implementation using NumPy
-        """
-
-        def perpendicular_distance(point, start, end):
-            if np.all(start == end):
-                return np.linalg.norm(point - start)
-            return np.abs(
-                np.cross(end - start, start - point)
-                / np.linalg.norm(end - start)
-            )
-
-        def recursive_rdp(points, start: int, end: int, epsilon: float):
-            dmax = 0.0
-            index = start
-            for i in range(start + 1, end):
-                d = perpendicular_distance(
-                    points[i], points[start], points[end]
-                )
-                if d > dmax:
-                    index = i
-                    dmax = d
-            if dmax > epsilon:
-                results1 = recursive_rdp(points, start, index, epsilon)
-                results2 = recursive_rdp(points, index, end, epsilon)
-                return results1[:-1] + results2
-            else:
-                return [points[start], points[end]]
-
-        points = np.array(points)
-        start = 0
-        end = len(points) - 1
-        return np.array(recursive_rdp(points, start, end, epsilon))
-
     def compress_samples(
         self, samples: List[Any], max_footprint: float
     ) -> Any:
-        # Try to reduce the number of samples with the
-        # Ramer-Douglas-Peucker algorithm, which attempts to
-        # preserve the shape of the graph. If that fails to bring
-        # the number of samples below our maximum, randomly
-        # downsample (epsilon calculation from
-        # https://stackoverflow.com/questions/57052434/can-i-guess-the-appropriate-epsilon-for-rdp-ramer-douglas-peucker)
         if len(samples) <= self.max_sparkline_samples:
             return samples
 
-        if True:
-            # FIXME: bypassing RDP for now
-            # return samples[:self.max_sparkline_samples]
-
-            new_samples = sorted(
-                random.sample(
-                    list(map(tuple, samples)), self.max_sparkline_samples
-                )
+        new_samples = sorted(
+            random.sample(
+                list(map(tuple, samples)), self.max_sparkline_samples
             )
-            return new_samples
-
-        else:
-            epsilon = (len(samples) / (3 * self.max_sparkline_samples)) * 2
-
-            # Use NumPy for RDP algorithm
-            new_samples = self.rdp(np.array(samples), epsilon)
-
-            if len(new_samples) > self.max_sparkline_samples:
-                new_samples = sorted(
-                    random.sample(
-                        list(map(tuple, new_samples)),
-                        self.max_sparkline_samples,
-                    )
-                )
-
-            return new_samples
+        )
+        return new_samples
 
     # Profile output methods
     def output_profile_line(
@@ -355,24 +293,24 @@ def output_profile_line(
         )
 
         payload = {
-            "lineno": line_no,
             "line": line,
+            "lineno": line_no,
+            "memory_samples": stats.per_line_footprint_samples[fname][line_no],
+            "n_avg_mb": n_avg_mb,
+            "n_copy_mb_s": n_copy_mb_s,
             "n_core_utilization": mean_core_util,
             "n_cpu_percent_c": n_cpu_percent_c,
             "n_cpu_percent_python": n_cpu_percent_python,
-            "n_sys_percent": n_sys_percent,
-            "n_gpu_percent": n_gpu_percent,
             "n_gpu_avg_memory_mb": n_gpu_mem_samples.mean(),
             "n_gpu_peak_memory_mb": n_gpu_mem_samples.peak(),
-            "n_peak_mb": n_peak_mb,
+            "n_gpu_percent": n_gpu_percent,
             "n_growth_mb": n_peak_mb,  # For backwards compatibility
-            "n_avg_mb": n_avg_mb,
-            "n_mallocs": n_mallocs,
+            "n_peak_mb": n_peak_mb,
             "n_malloc_mb": n_malloc_mb,
-            "n_usage_fraction": n_usage_fraction,
+            "n_mallocs": n_mallocs,
             "n_python_fraction": n_python_fraction,
-            "n_copy_mb_s": n_copy_mb_s,
-            "memory_samples": stats.per_line_footprint_samples[fname][line_no],
+            "n_sys_percent": n_sys_percent,
+            "n_usage_fraction": n_usage_fraction,
         }
         try:
             FunctionDetail(**payload)
diff --git a/scalene/scalene_magics.py b/scalene/scalene_magics.py
@@ -17,7 +17,7 @@
     from scalene.scalene_parseargs import ScaleneParseArgs
 
     @magics_class
-    class ScaleneMagics(Magics):  # type: ignore
+    class ScaleneMagics(Magics):
         """IPython (Jupyter) support for magics for Scalene (%scrun and %%scalene)."""
 
         def run_code(self, args: ScaleneArguments, code: str) -> None:
diff --git a/scalene/scalene_profiler.py b/scalene/scalene_profiler.py
@@ -1721,6 +1721,7 @@ def disable_signals(retry: bool = True) -> None:
             Scalene.timer_signals = False
             return
         try:
+            assert Scalene.__signals.cpu_timer_signal is not None
             Scalene.__orig_setitimer(Scalene.__signals.cpu_timer_signal, 0)
             for sig in [
                 Scalene.__signals.malloc_signal,
@@ -1938,7 +1939,7 @@ def main() -> None:
 
                 Scalene.__accelerator = ScaleneAppleGPU()
             else:
-                from scalene.scalene_nvidia_gpu import ScaleneNVIDIAGPU  # type: ignore
+                from scalene.scalene_nvidia_gpu import ScaleneNVIDIAGPU
 
                 Scalene.__accelerator = ScaleneNVIDIAGPU()
 
diff --git a/scalene/scalene_signals.py b/scalene/scalene_signals.py
@@ -11,6 +11,9 @@ class ScaleneSignals:
     """
 
     def __init__(self) -> None:
+        # Declare these here, then configure them.
+        self.cpu_signal : signal.Signals
+        self.cpu_timer_signal : int
         # Configure timer signals using set_timer_signals method (defined below).
         self.set_timer_signals(use_virtual_time=True)
         # Set profiling signals depending upon the platform.
@@ -37,8 +40,8 @@ def set_timer_signals(self, use_virtual_time: bool = True) -> None:
             If True, sets virtual timer signals, otherwise sets real timer signals.
         """
         if sys.platform == "win32":
+            self.cpu_timer_signal = signal.SIGBREAK # Note: on Windows, this is unused, so any signal will do
             self.cpu_signal = signal.SIGBREAK
-            self.cpu_timer_signal = None
             return
         if use_virtual_time:
             self.cpu_timer_signal = signal.ITIMER_VIRTUAL
@@ -49,26 +52,16 @@ def set_timer_signals(self, use_virtual_time: bool = True) -> None:
 
     def get_timer_signals(self) -> Tuple[int, signal.Signals]:
         """
-        Return the signals used for CPU profiling.
-
-        Returns:
-        --------
-        Tuple[int, signal.Signals]
-            Returns 2-tuple of the integers representing the CPU timer signal and the CPU signal.
+        Returns 2-tuple of the integers representing the CPU timer signal and the CPU signal.
         """
         return self.cpu_timer_signal, self.cpu_signal
 
     def get_lifecycle_signals(self) -> Tuple[signal.Signals, signal.Signals]:
         return (self.start_profiling_signal, self.stop_profiling_signal)
 
-    def get_all_signals(self) -> List[int]:
+    def get_all_signals(self) -> List[signal.Signals]:
         """
         Return all the signals used for controlling profiling, except the CPU timer.
-
-        Returns:
-        --------
-        List[int]
-            Returns a list of integers representing all the profiling signals except the CPU timer.
         """
         return [
             self.start_profiling_signal,
diff --git a/scalene/scalene_statistics.py b/scalene/scalene_statistics.py
@@ -5,6 +5,7 @@
 import pickle
 import time
 from collections import defaultdict
+from pydantic import PositiveInt
 from typing import (
     Any,
     Dict,
@@ -22,7 +23,7 @@
 
 Address = NewType("Address", str)
 Filename = NewType("Filename", str)
-LineNumber = NewType("LineNumber", int)
+LineNumber = NewType("LineNumber", PositiveInt)
 ByteCodeIndex = NewType("ByteCodeIndex", int)
 T = TypeVar("T")
 
diff --git a/scalene/scalene_utility.py b/scalene/scalene_utility.py
@@ -70,7 +70,7 @@ def get_fully_qualified_name(frame: FrameType) -> Filename:
     version = sys.version_info
     if version.major >= 3 and version.minor >= 11:
         # Introduced in Python 3.11
-        fn_name = Filename(frame.f_code.co_qualname)  # type: ignore
+        fn_name = Filename(frame.f_code.co_qualname)
         return fn_name
     f = frame
     # Manually search for an enclosing class.