Skip to content

Commit

Permalink
Merge pull request #6146 from vyasr/branch-25.02-merge-branch-24.12
Browse files Browse the repository at this point in the history
Forward-merge branch-24.12 into branch-25.02
  • Loading branch information
AyodeAwe authored Nov 26, 2024
2 parents c83261b + 028b7ff commit 639ef42
Show file tree
Hide file tree
Showing 47 changed files with 4,085 additions and 35 deletions.
31 changes: 29 additions & 2 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ concurrency:
cancel-in-progress: true

jobs:
# Please keep pr-builder as the top job here
pr-builder:
needs:
- changed-files
Expand All @@ -23,6 +24,7 @@ jobs:
- conda-python-tests-dask
- conda-notebook-tests
- docs-build
- telemetry-setup
- wheel-build-cuml
- wheel-tests-cuml
- devcontainer
Expand All @@ -31,8 +33,17 @@ jobs:
if: always()
with:
needs: ${{ toJSON(needs) }}
telemetry-setup:
runs-on: ubuntu-latest
continue-on-error: true
env:
OTEL_SERVICE_NAME: "pr-cuml"
steps:
- name: Telemetry setup
uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main
changed-files:
secrets: inherit
needs: telemetry-setup
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
files_yaml: |
Expand Down Expand Up @@ -66,11 +77,12 @@ jobs:
- '!thirdparty/LICENSES/**'
checks:
secrets: inherit
needs: telemetry-setup
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
enable_check_generated_files: false
ignored_pr_jobs: >-
optional-job-conda-python-tests-cudf-pandas-integration
optional-job-conda-python-tests-cudf-pandas-integration telemetry-summarize
clang-tidy:
needs: checks
secrets: inherit
Expand Down Expand Up @@ -101,7 +113,6 @@ jobs:
with:
build_type: pull-request
enable_check_symbols: true
symbol_exclusions: raft_cutlass
conda-python-build:
needs: conda-cpp-build
secrets: inherit
Expand Down Expand Up @@ -173,6 +184,7 @@ jobs:
build_type: pull-request
script: ci/test_wheel.sh
devcontainer:
needs: telemetry-setup
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand All @@ -183,3 +195,18 @@ jobs:
sccache -z;
build-all --verbose;
sccache -s;
telemetry-summarize:
runs-on: ubuntu-latest
needs: pr-builder
if: always()
continue-on-error: true
steps:
- name: Load stashed telemetry env vars
uses: rapidsai/shared-actions/telemetry-dispatch-load-base-env-vars@main
with:
load_service_name: true
- name: Telemetry summarize
uses: rapidsai/shared-actions/telemetry-dispatch-write-summary@main
with:
cert_concat: "${{ secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY }}"
7 changes: 7 additions & 0 deletions ci/run_cuml_singlegpu_accel_pytests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

# Support invoking run_cuml_singlegpu_pytests.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cuml/cuml/tests/experimental/accel

python -m pytest -p cuml.experimental.accel --cache-clear "$@" .
10 changes: 10 additions & 0 deletions ci/test_python_singlegpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ rapids-logger "pytest cuml single GPU"
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuml-coverage.xml" \
--cov-report=term

rapids-logger "pytest cuml accelerator"
./ci/run_cuml_singlegpu_accel_pytests.sh \
--numprocesses=8 \
--dist=worksteal \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cuml-accel.xml" \
--cov-config=../../../../.coveragerc \
--cov=cuml \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuml-accel-coverage.xml" \
--cov-report=term

rapids-logger "memory leak pytests"

./ci/run_cuml_singlegpu_memleak_pytests.sh \
Expand Down
21 changes: 17 additions & 4 deletions python/cuml/cuml/cluster/dbscan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,19 @@ class DBSCAN(UniversalBase,
core_sample_indices_ = CumlArrayDescriptor(order="C")
labels_ = CumlArrayDescriptor(order="C")

_hyperparam_interop_translator = {
"metric": {
"manhattan": "NotImplemented",
"chebyshev": "NotImplemented",
"minkowski": "NotImplemented",
},
"algorithm": {
"auto": "brute",
"ball_tree": "NotImplemented",
"kd_tree": "NotImplemented",
},
}

@device_interop_preparation
def __init__(self, *,
eps=0.5,
Expand Down Expand Up @@ -263,7 +276,7 @@ class DBSCAN(UniversalBase,
opg that is set to `False` for SG, `True` for OPG (multi-GPU)
"""
if out_dtype not in ["int32", np.int32, "int64", np.int64]:
raise ValueError("Invalid value for out_dtype. "
raise ValueError(f"Invalid value for out_dtype: {out_dtype}. "
"Valid values are {'int32', 'int64', "
"np.int32, np.int64}")

Expand Down Expand Up @@ -422,7 +435,7 @@ class DBSCAN(UniversalBase,

@generate_docstring(skip_parameters_heading=True)
@enable_device_interop
def fit(self, X, out_dtype="int32", sample_weight=None,
def fit(self, X, y=None, out_dtype="int32", sample_weight=None,
convert_dtype=True) -> "DBSCAN":
"""
Perform DBSCAN clustering from features.
Expand All @@ -447,7 +460,7 @@ class DBSCAN(UniversalBase,
'description': 'Cluster labels',
'shape': '(n_samples, 1)'})
@enable_device_interop
def fit_predict(self, X, out_dtype="int32", sample_weight=None) -> CumlArray:
def fit_predict(self, X, y=None, out_dtype="int32", sample_weight=None) -> CumlArray:
"""
Performs clustering on X and returns cluster labels.

Expand All @@ -463,7 +476,7 @@ class DBSCAN(UniversalBase,
negative weight may inhibit its eps-neighbor from being core.
default: None (which is equivalent to weight 1 for all samples).
"""
self.fit(X, out_dtype, sample_weight)
self.fit(X, out_dtype=out_dtype, sample_weight=sample_weight)
return self.labels_

@classmethod
Expand Down
13 changes: 13 additions & 0 deletions python/cuml/cuml/cluster/hdbscan/hdbscan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,19 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
mst_dst_ = CumlArrayDescriptor()
mst_weights_ = CumlArrayDescriptor()

_hyperparam_interop_translator = {
"metric": {
"manhattan": "NotImplemented",
"chebyshev": "NotImplemented",
"minkowski": "NotImplemented",
},
"algorithm": {
"auto": "brute",
"ball_tree": "NotImplemented",
"kd_tree": "NotImplemented",
},
}

@device_interop_preparation
def __init__(self, *,
min_cluster_size=5,
Expand Down
6 changes: 3 additions & 3 deletions python/cuml/cuml/cluster/kmeans.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ class KMeans(UniversalBase,
'description': 'Cluster indexes',
'shape': '(n_samples, 1)'})
@enable_device_interop
def predict(self, X, convert_dtype=True, sample_weight=None,
def predict(self, X, y=None, convert_dtype=True, sample_weight=None,
normalize_weights=True) -> CumlArray:
"""
Predict the closest cluster each sample in X belongs to.
Expand All @@ -583,7 +583,7 @@ class KMeans(UniversalBase,
'description': 'Transformed data',
'shape': '(n_samples, n_clusters)'})
@enable_device_interop
def transform(self, X, convert_dtype=True) -> CumlArray:
def transform(self, X, y=None, convert_dtype=True) -> CumlArray:
"""
Transform X to a cluster-distance space.

Expand Down Expand Up @@ -687,7 +687,7 @@ class KMeans(UniversalBase,
'description': 'Transformed data',
'shape': '(n_samples, n_clusters)'})
@enable_device_interop
def fit_transform(self, X, convert_dtype=False,
def fit_transform(self, X, y=None, convert_dtype=False,
sample_weight=None) -> CumlArray:
"""
Compute clustering and transform X to cluster-distance space.
Expand Down
10 changes: 10 additions & 0 deletions python/cuml/cuml/decomposition/pca.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,16 @@ class PCA(UniversalBase,
noise_variance_ = CumlArrayDescriptor(order='F')
trans_input_ = CumlArrayDescriptor(order='F')

_hyperparam_interop_translator = {
"svd_solver": {
"arpack": "full",
"randomized": "full"
},
"iterated_power": {
"auto": 15,
},
}

@device_interop_preparation
def __init__(self, *, copy=True, handle=None, iterated_power=15,
n_components=None, random_state=None, svd_solver='auto',
Expand Down
7 changes: 7 additions & 0 deletions python/cuml/cuml/decomposition/tsvd.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,13 @@ class TruncatedSVD(UniversalBase,
explained_variance_ratio_ = CumlArrayDescriptor(order='F')
singular_values_ = CumlArrayDescriptor(order='F')

_hyperparam_interop_translator = {
"algorithm": {
"randomized": "full",
"arpack": "full",
},
}

@device_interop_preparation
def __init__(self, *, algorithm='full', handle=None, n_components=1,
n_iter=15, random_state=None, tol=1e-7,
Expand Down
68 changes: 68 additions & 0 deletions python/cuml/cuml/experimental/accel/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


import importlib

from .magics import load_ipython_extension

from cuml.internals import logger
from cuml.internals.global_settings import GlobalSettings
from cuml.internals.memory_utils import set_global_output_type

__all__ = ["load_ipython_extension", "install"]


def _install_for_library(library_name):
importlib.import_module(f"._wrappers.{library_name}", __name__)
return True


def install():
"""Enable cuML Accelerator Mode."""
logger.set_level(logger.level_info)
logger.set_pattern("%v")

logger.info("cuML: Installing experimental accelerator...")
loader_sklearn = _install_for_library(library_name="sklearn")
loader_umap = _install_for_library(library_name="umap")
loader_hdbscan = _install_for_library(library_name="hdbscan")

GlobalSettings().accelerator_loaded = all(
[loader_sklearn, loader_umap, loader_hdbscan]
)

GlobalSettings().accelerator_active = True

if GlobalSettings().accelerator_loaded:
logger.info(
"cuML: experimental accelerator successfully initialized..."
)
else:
logger.info("cuML: experimental accelerator failed to initialize...")

set_global_output_type("numpy")


def pytest_load_initial_conftests(early_config, parser, args):
# https://docs.pytest.org/en/7.1.x/reference/\
# reference.html#pytest.hookspec.pytest_load_initial_conftests
try:
install()
except RuntimeError:
raise RuntimeError(
"An existing plugin has already loaded sklearn. Interposing failed."
)
70 changes: 70 additions & 0 deletions python/cuml/cuml/experimental/accel/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import click
import code
import os
import runpy
import sys

from . import install


@click.command()
@click.option("-m", "module", required=False, help="Module to run")
@click.option(
"--strict",
is_flag=True,
default=False,
help="Turn strict mode for hyperparameters on.",
)
@click.argument("args", nargs=-1)
def main(module, strict, args):

if strict:
os.environ["CUML_ACCEL_STRICT_MODE"] = "ON"

install()

if module:
(module,) = module
# run the module passing the remaining arguments
# as if it were run with python -m <module> <args>
sys.argv[:] = [module] + args # not thread safe?
runpy.run_module(module, run_name="__main__")
elif len(args) >= 1:
# Remove ourself from argv and continue
sys.argv[:] = args
runpy.run_path(args[0], run_name="__main__")
else:
if sys.stdin.isatty():
banner = f"Python {sys.version} on {sys.platform}"
site_import = not sys.flags.no_site
if site_import:
cprt = 'Type "help", "copyright", "credits" or "license" for more information.'
banner += "\n" + cprt
else:
# Don't show prompts or banners if stdin is not a TTY
sys.ps1 = ""
sys.ps2 = ""
banner = ""

# Launch an interactive interpreter
code.interact(banner=banner, exitmsg="")


if __name__ == "__main__":
main()
Loading

0 comments on commit 639ef42

Please sign in to comment.