Merge pull request #6146 from vyasr/branch-25.02-merge-branch-24.12

Forward-merge branch-24.12 into branch-25.02
rapidsai · Nov 26, 2024 · 639ef42 · 639ef42
2 parents c83261b + 028b7ff
commit 639ef42
Show file tree

Hide file tree

Showing 47 changed files with 4,085 additions and 35 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -10,6 +10,7 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  # Please keep pr-builder as the top job here
   pr-builder:
     needs:
       - changed-files
@@ -23,6 +24,7 @@ jobs:
       - conda-python-tests-dask
       - conda-notebook-tests
       - docs-build
+      - telemetry-setup
       - wheel-build-cuml
       - wheel-tests-cuml
       - devcontainer
@@ -31,8 +33,17 @@ jobs:
     if: always()
     with:
       needs: ${{ toJSON(needs) }}
+  telemetry-setup:
+    runs-on: ubuntu-latest
+    continue-on-error: true
+    env:
+        OTEL_SERVICE_NAME: "pr-cuml"
+    steps:
+      - name: Telemetry setup
+        uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main
   changed-files:
     secrets: inherit
+    needs: telemetry-setup
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]
     with:
       files_yaml: |
@@ -66,11 +77,12 @@ jobs:
           - '!thirdparty/LICENSES/**'
   checks:
     secrets: inherit
+    needs: telemetry-setup
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]
     with:
       enable_check_generated_files: false
       ignored_pr_jobs: >-
-        optional-job-conda-python-tests-cudf-pandas-integration
+        optional-job-conda-python-tests-cudf-pandas-integration telemetry-summarize
   clang-tidy:
     needs: checks
     secrets: inherit
@@ -101,7 +113,6 @@ jobs:
     with:
       build_type: pull-request
       enable_check_symbols: true
-      symbol_exclusions: raft_cutlass
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
@@ -173,6 +184,7 @@ jobs:
       build_type: pull-request
       script: ci/test_wheel.sh
   devcontainer:
+    needs: telemetry-setup
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]
     with:
@@ -183,3 +195,18 @@ jobs:
         sccache -z;
         build-all --verbose;
         sccache -s;
+
+  telemetry-summarize:
+    runs-on: ubuntu-latest
+    needs: pr-builder
+    if: always()
+    continue-on-error: true
+    steps:
+      - name: Load stashed telemetry env vars
+        uses: rapidsai/shared-actions/telemetry-dispatch-load-base-env-vars@main
+        with:
+            load_service_name: true
+      - name: Telemetry summarize
+        uses: rapidsai/shared-actions/telemetry-dispatch-write-summary@main
+        with:
+          cert_concat: "${{ secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY }}"
diff --git a/ci/run_cuml_singlegpu_accel_pytests.sh b/ci/run_cuml_singlegpu_accel_pytests.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+# Support invoking run_cuml_singlegpu_pytests.sh outside the script directory
+cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cuml/cuml/tests/experimental/accel
+
+python -m pytest -p cuml.experimental.accel --cache-clear "$@" .
diff --git a/ci/test_python_singlegpu.sh b/ci/test_python_singlegpu.sh
@@ -21,6 +21,16 @@ rapids-logger "pytest cuml single GPU"
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuml-coverage.xml" \
   --cov-report=term
 
+  rapids-logger "pytest cuml accelerator"
+./ci/run_cuml_singlegpu_accel_pytests.sh \
+  --numprocesses=8 \
+  --dist=worksteal \
+  --junitxml="${RAPIDS_TESTS_DIR}/junit-cuml-accel.xml" \
+  --cov-config=../../../../.coveragerc \
+  --cov=cuml \
+  --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuml-accel-coverage.xml" \
+  --cov-report=term
+
 rapids-logger "memory leak pytests"
 
 ./ci/run_cuml_singlegpu_memleak_pytests.sh \

diff --git a/python/cuml/cuml/cluster/dbscan.pyx b/python/cuml/cuml/cluster/dbscan.pyx
@@ -225,6 +225,19 @@ class DBSCAN(UniversalBase,
     core_sample_indices_ = CumlArrayDescriptor(order="C")
     labels_ = CumlArrayDescriptor(order="C")
 
+    _hyperparam_interop_translator = {
+        "metric": {
+            "manhattan": "NotImplemented",
+            "chebyshev": "NotImplemented",
+            "minkowski": "NotImplemented",
+        },
+        "algorithm": {
+            "auto": "brute",
+            "ball_tree": "NotImplemented",
+            "kd_tree": "NotImplemented",
+        },
+    }
+
     @device_interop_preparation
     def __init__(self, *,
                  eps=0.5,
@@ -263,7 +276,7 @@ class DBSCAN(UniversalBase,
         opg that is set to `False` for SG, `True` for OPG (multi-GPU)
         """
         if out_dtype not in ["int32", np.int32, "int64", np.int64]:
-            raise ValueError("Invalid value for out_dtype. "
+            raise ValueError(f"Invalid value for out_dtype: {out_dtype}. "
                              "Valid values are {'int32', 'int64', "
                              "np.int32, np.int64}")
 
@@ -422,7 +435,7 @@ class DBSCAN(UniversalBase,
 
     @generate_docstring(skip_parameters_heading=True)
     @enable_device_interop
-    def fit(self, X, out_dtype="int32", sample_weight=None,
+    def fit(self, X, y=None, out_dtype="int32", sample_weight=None,
             convert_dtype=True) -> "DBSCAN":
         """
         Perform DBSCAN clustering from features.
@@ -447,7 +460,7 @@ class DBSCAN(UniversalBase,
                                        'description': 'Cluster labels',
                                        'shape': '(n_samples, 1)'})
     @enable_device_interop
-    def fit_predict(self, X, out_dtype="int32", sample_weight=None) -> CumlArray:
+    def fit_predict(self, X, y=None, out_dtype="int32", sample_weight=None) -> CumlArray:
         """
         Performs clustering on X and returns cluster labels.
 
@@ -463,7 +476,7 @@ class DBSCAN(UniversalBase,
             negative weight may inhibit its eps-neighbor from being core.
             default: None (which is equivalent to weight 1 for all samples).
         """
-        self.fit(X, out_dtype, sample_weight)
+        self.fit(X, out_dtype=out_dtype, sample_weight=sample_weight)
         return self.labels_
 
     @classmethod

diff --git a/python/cuml/cuml/cluster/hdbscan/hdbscan.pyx b/python/cuml/cuml/cluster/hdbscan/hdbscan.pyx
@@ -485,6 +485,19 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
     mst_dst_ = CumlArrayDescriptor()
     mst_weights_ = CumlArrayDescriptor()
 
+    _hyperparam_interop_translator = {
+        "metric": {
+            "manhattan": "NotImplemented",
+            "chebyshev": "NotImplemented",
+            "minkowski": "NotImplemented",
+        },
+        "algorithm": {
+            "auto": "brute",
+            "ball_tree": "NotImplemented",
+            "kd_tree": "NotImplemented",
+        },
+    }
+
     @device_interop_preparation
     def __init__(self, *,
                  min_cluster_size=5,

diff --git a/python/cuml/cuml/cluster/kmeans.pyx b/python/cuml/cuml/cluster/kmeans.pyx
@@ -564,7 +564,7 @@ class KMeans(UniversalBase,
                                        'description': 'Cluster indexes',
                                        'shape': '(n_samples, 1)'})
     @enable_device_interop
-    def predict(self, X, convert_dtype=True, sample_weight=None,
+    def predict(self, X, y=None, convert_dtype=True, sample_weight=None,
                 normalize_weights=True) -> CumlArray:
         """
         Predict the closest cluster each sample in X belongs to.
@@ -583,7 +583,7 @@ class KMeans(UniversalBase,
                                        'description': 'Transformed data',
                                        'shape': '(n_samples, n_clusters)'})
     @enable_device_interop
-    def transform(self, X, convert_dtype=True) -> CumlArray:
+    def transform(self, X, y=None, convert_dtype=True) -> CumlArray:
         """
         Transform X to a cluster-distance space.
 
@@ -687,7 +687,7 @@ class KMeans(UniversalBase,
                                        'description': 'Transformed data',
                                        'shape': '(n_samples, n_clusters)'})
     @enable_device_interop
-    def fit_transform(self, X, convert_dtype=False,
+    def fit_transform(self, X, y=None, convert_dtype=False,
                       sample_weight=None) -> CumlArray:
         """
         Compute clustering and transform X to cluster-distance space.

diff --git a/python/cuml/cuml/decomposition/pca.pyx b/python/cuml/cuml/decomposition/pca.pyx
@@ -280,6 +280,16 @@ class PCA(UniversalBase,
     noise_variance_ = CumlArrayDescriptor(order='F')
     trans_input_ = CumlArrayDescriptor(order='F')
 
+    _hyperparam_interop_translator = {
+        "svd_solver": {
+            "arpack": "full",
+            "randomized": "full"
+        },
+        "iterated_power": {
+            "auto": 15,
+        },
+    }
+
     @device_interop_preparation
     def __init__(self, *, copy=True, handle=None, iterated_power=15,
                  n_components=None, random_state=None, svd_solver='auto',

diff --git a/python/cuml/cuml/decomposition/tsvd.pyx b/python/cuml/cuml/decomposition/tsvd.pyx
@@ -240,6 +240,13 @@ class TruncatedSVD(UniversalBase,
     explained_variance_ratio_ = CumlArrayDescriptor(order='F')
     singular_values_ = CumlArrayDescriptor(order='F')
 
+    _hyperparam_interop_translator = {
+        "algorithm": {
+            "randomized": "full",
+            "arpack": "full",
+        },
+    }
+
     @device_interop_preparation
     def __init__(self, *, algorithm='full', handle=None, n_components=1,
                  n_iter=15, random_state=None, tol=1e-7,

diff --git a/python/cuml/cuml/experimental/accel/__init__.py b/python/cuml/cuml/experimental/accel/__init__.py
@@ -0,0 +1,68 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import importlib
+
+from .magics import load_ipython_extension
+
+from cuml.internals import logger
+from cuml.internals.global_settings import GlobalSettings
+from cuml.internals.memory_utils import set_global_output_type
+
+__all__ = ["load_ipython_extension", "install"]
+
+
+def _install_for_library(library_name):
+    importlib.import_module(f"._wrappers.{library_name}", __name__)
+    return True
+
+
+def install():
+    """Enable cuML Accelerator Mode."""
+    logger.set_level(logger.level_info)
+    logger.set_pattern("%v")
+
+    logger.info("cuML: Installing experimental accelerator...")
+    loader_sklearn = _install_for_library(library_name="sklearn")
+    loader_umap = _install_for_library(library_name="umap")
+    loader_hdbscan = _install_for_library(library_name="hdbscan")
+
+    GlobalSettings().accelerator_loaded = all(
+        [loader_sklearn, loader_umap, loader_hdbscan]
+    )
+
+    GlobalSettings().accelerator_active = True
+
+    if GlobalSettings().accelerator_loaded:
+        logger.info(
+            "cuML: experimental accelerator successfully initialized..."
+        )
+    else:
+        logger.info("cuML: experimental accelerator failed to initialize...")
+
+    set_global_output_type("numpy")
+
+
+def pytest_load_initial_conftests(early_config, parser, args):
+    # https://docs.pytest.org/en/7.1.x/reference/\
+    # reference.html#pytest.hookspec.pytest_load_initial_conftests
+    try:
+        install()
+    except RuntimeError:
+        raise RuntimeError(
+            "An existing plugin has already loaded sklearn. Interposing failed."
+        )
diff --git a/python/cuml/cuml/experimental/accel/__main__.py b/python/cuml/cuml/experimental/accel/__main__.py
@@ -0,0 +1,70 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import click
+import code
+import os
+import runpy
+import sys
+
+from . import install
+
+
+@click.command()
+@click.option("-m", "module", required=False, help="Module to run")
+@click.option(
+    "--strict",
+    is_flag=True,
+    default=False,
+    help="Turn strict mode for hyperparameters on.",
+)
+@click.argument("args", nargs=-1)
+def main(module, strict, args):
+
+    if strict:
+        os.environ["CUML_ACCEL_STRICT_MODE"] = "ON"
+
+    install()
+
+    if module:
+        (module,) = module
+        # run the module passing the remaining arguments
+        # as if it were run with python -m <module> <args>
+        sys.argv[:] = [module] + args  # not thread safe?
+        runpy.run_module(module, run_name="__main__")
+    elif len(args) >= 1:
+        # Remove ourself from argv and continue
+        sys.argv[:] = args
+        runpy.run_path(args[0], run_name="__main__")
+    else:
+        if sys.stdin.isatty():
+            banner = f"Python {sys.version} on {sys.platform}"
+            site_import = not sys.flags.no_site
+            if site_import:
+                cprt = 'Type "help", "copyright", "credits" or "license" for more information.'
+                banner += "\n" + cprt
+        else:
+            # Don't show prompts or banners if stdin is not a TTY
+            sys.ps1 = ""
+            sys.ps2 = ""
+            banner = ""
+
+        # Launch an interactive interpreter
+        code.interact(banner=banner, exitmsg="")
+
+
+if __name__ == "__main__":
+    main()