NVIDIA · rwgk · Dec 12, 2024 · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024
@@ -11,6 +11,12 @@ fail_if_no_gpu
 readonly prefix="${BUILD_DIR}/python/"
 export PYTHONPATH="${prefix}:${PYTHONPATH:-}"
 
+pushd ../python/cuda_cccl >/dev/null
+
+run_command "⚙️  Pip install cuda_cccl" pip install --force-reinstall --upgrade --target "${prefix}" .
+
+popd >/dev/null
+
 pushd ../python/cuda_cooperative >/dev/null
 
 run_command "⚙️  Pip install cuda_cooperative" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
@@ -20,11 +26,7 @@ popd >/dev/null
 
 pushd ../python/cuda_parallel >/dev/null
 
-# Temporarily install the package twice to populate include directory as part of the first installation
-# and to let manifest discover these includes during the second installation. Do not forget to remove the
-# second installation after https://github.com/NVIDIA/cccl/issues/2281 is addressed.
-run_command "⚙️  Pip install cuda_parallel once" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
-run_command "⚙️  Pip install cuda_parallel twice" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
+run_command "⚙️  Pip install cuda_parallel" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
 run_command "🚀  Pytest cuda_parallel" python -m pytest -v ./tests
 
 popd >/dev/null

@@ -37,6 +37,7 @@ CUB_CMAKE_VERSION_FILE="lib/cmake/cub/cub-config-version.cmake"
 LIBCUDACXX_CMAKE_VERSION_FILE="lib/cmake/libcudacxx/libcudacxx-config-version.cmake"
 THRUST_CMAKE_VERSION_FILE="lib/cmake/thrust/thrust-config-version.cmake"
 CUDAX_CMAKE_VERSION_FILE="lib/cmake/cudax/cudax-config-version.cmake"
+CUDA_CCCL_VERSION_FILE="python/cuda_cccl/cuda/cccl/_version.py"
 CUDA_COOPERATIVE_VERSION_FILE="python/cuda_cooperative/cuda/cooperative/_version.py"
 CUDA_PARALLEL_VERSION_FILE="python/cuda_parallel/cuda/parallel/_version.py"
 
@@ -110,6 +111,7 @@ update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_MAJOR \([0-9]\+\))" "
 update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_MINOR \([0-9]\+\))" "set(cudax_VERSION_MINOR $minor)"
 update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_PATCH \([0-9]\+\))" "set(cudax_VERSION_PATCH $patch)"
 
+update_file "$CUDA_CCCL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$major.$minor.$patch\""
 update_file "$CUDA_COOPERATIVE_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\""
 update_file "$CUDA_PARALLEL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\""
 

@@ -347,6 +347,7 @@ autodoc.mock_imports = [
     "numba",
     "pynvjitlink",
     "cuda.bindings",
+    "cuda.cccl",
     "llvmlite",
     "numpy",
 ]

@@ -0,0 +1,3 @@
+LICENSE
+cuda/cccl/include
+*egg-info
@@ -0,0 +1 @@
+recursive-include cuda/cccl/include *
@@ -0,0 +1,3 @@
+## Note
+
+This package is currently FOR INTERNAL USE ONLY and not meant to be used/installed explicitly.
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+from cuda.cccl._version import __version__
+
+__all__ = ["__version__"]
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# This file is generated by ci/update_version.sh
+# Do not edit this file manually.
+__version__ = "2.8.0"
@@ -0,0 +1,60 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+from dataclasses import dataclass
+from functools import lru_cache
+import os
+import shutil
+from typing import Optional
+
+
+def _get_cuda_path() -> Optional[str]:
+    cuda_path = os.environ.get("CUDA_PATH")
+    if cuda_path and os.path.exists(cuda_path):
+        return cuda_path
+
+    nvcc_path = shutil.which("nvcc")
+    if nvcc_path is not None:
+        return os.path.dirname(os.path.dirname(nvcc_path))
+
+    default_path = "/usr/local/cuda"
+    if os.path.exists(default_path):
+        return default_path
+
+    return None
+
+
+@dataclass
+class IncludePaths:
+    cuda: Optional[str]
+    libcudacxx: Optional[str]
+    cub: Optional[str]
+    thrust: Optional[str]
+
+    def as_tuple(self):
+        # Note: higher-level ... lower-level order:
+        return (self.thrust, self.cub, self.libcudacxx, self.cuda)
+
+
+@lru_cache()
+def get_include_paths() -> IncludePaths:
+    # TODO: once docs env supports Python >= 3.9, we
+    # can move this to a module-level import.
+    from importlib.resources import as_file, files
+
+    cuda_incl = None
+    cuda_path = _get_cuda_path()
+    if cuda_path is not None:
+        cuda_incl = os.path.join(cuda_path, "include")
+
+    with as_file(files("cuda.cccl.include")) as f:
+        cccl_incl = str(f)
+    assert os.path.exists(cccl_incl)
+
+    return IncludePaths(
+        cuda=cuda_incl,
+        libcudacxx=os.path.join(cccl_incl, "libcudacxx"),
+        cub=cccl_incl,
+        thrust=cccl_incl,
+    )
@@ -0,0 +1,42 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+[build-system]
+requires = [
+    "setuptools>=61.0.0",
+    "wheel",
+    "packaging",
+]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "cuda-cccl"
+description = "Experimental Package with CCCL headers to support JIT compilation"
+authors = [
+    { name = "NVIDIA Corporation" },
+]
+license = { file = "LICENSE" }
+classifiers = [
+    "Programming Language :: Python :: 3 :: Only",
+    "Environment :: GPU :: NVIDIA CUDA",
+    "License :: OSI Approved :: Apache Software License",
+]
+requires-python = ">=3.9"
+dynamic = ["version", "readme"]
+
+[project.urls]
+Homepage = "https://github.com/NVIDIA/cccl"
+Documentation = "https://github.com/NVIDIA/cccl/tree/main/python/cuda_cccl"
+Source = "https://github.com/NVIDIA/cccl/tree/main/python/cuda_cccl"
+Tracker = "https://github.com/NVIDIA/cccl/issues"
+
+[tool.setuptools.dynamic]
+version = { attr = "cuda.cccl._version.__version__" }
+readme = { file = ["README.md"], content-type = "text/markdown" }
+
+[tool.setuptools.package-data]
+"cuda" = ["cccl/include/**/*"]
+
+[tool.setuptools.exclude-package-data]
+"cuda" = ["cccl/include/__init__.py"]
@@ -0,0 +1,40 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+from setuptools import setup, find_namespace_packages
+import os
+import shutil
+
+PROJECT_PATH = os.path.abspath(os.path.dirname(__file__))
+CCCL_PATH = os.path.abspath(os.path.join(PROJECT_PATH, "..", ".."))
+
+
+def copy_license():
+    src = os.path.abspath(os.path.join(CCCL_PATH, "LICENSE"))
+    dst = os.path.join(PROJECT_PATH, "LICENSE")
+    shutil.copy(src, dst)
+
+
+def copy_cccl_headers_to_cuda_cccl_include():
+    cccl_headers = [["cub", "cub"], ["libcudacxx", "include"], ["thrust", "thrust"]]
+    inc_path = os.path.join(PROJECT_PATH, "cuda", "cccl", "include")
+    os.makedirs(inc_path, exist_ok=True)
+    for proj_dir, header_dir in cccl_headers:
+        src_path = os.path.abspath(os.path.join(CCCL_PATH, proj_dir, header_dir))
+        dst_path = os.path.join(inc_path, proj_dir)
+        if os.path.exists(dst_path):
+            shutil.rmtree(dst_path)
+        shutil.copytree(src_path, dst_path)
+    init_py_path = os.path.join(inc_path, "__init__.py")
+    with open(init_py_path, "w") as f:
+        f.write("# Intentionally empty.\n")
+
+
+copy_license()
+copy_cccl_headers_to_cuda_cccl_include()
+
+setup(
+    packages=find_namespace_packages(include=["cuda.*"]),
+    include_package_data=True,
+)
@@ -1,3 +1,2 @@
-cuda/_include
 env
 *egg-info
@@ -2,12 +2,9 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-import os
-import shutil
 from cuda.bindings import nvrtc
 from cuda.cooperative.experimental._caching import disk_cache
 from cuda.cooperative.experimental._common import check_in, version
-import importlib.resources as pkg_resources
 import functools
 
 
@@ -19,22 +16,6 @@ def CHECK_NVRTC(err, prog):
         raise RuntimeError(f"NVRTC error: {log.decode('ascii')}")
 
 
-def get_cuda_path():
-    cuda_path = os.environ.get("CUDA_PATH", "")
-    if os.path.exists(cuda_path):
-        return cuda_path
-
-    nvcc_path = shutil.which("nvcc")
-    if nvcc_path is not None:
-        return os.path.dirname(os.path.dirname(nvcc_path))
-
-    default_path = "/usr/local/cuda"
-    if os.path.exists(default_path):
-        return default_path
-
-    return None
-
-
 # cpp is the C++ source code
 # cc = 800 for Ampere, 900 Hopper, etc
 # rdc is true or false
@@ -46,24 +27,14 @@ def compile_impl(cpp, cc, rdc, code, nvrtc_path, nvrtc_version):
     check_in("rdc", rdc, [True, False])
     check_in("code", code, ["lto", "ptx"])
 
-    with pkg_resources.path("cuda", "_include") as include_path:
-        # Using `.parent` for compatibility with pip install --editable:
-        include_path = pkg_resources.files("cuda.cooperative").parent.joinpath(
-            "_include"
-        )
-        cub_path = include_path
-        thrust_path = include_path
-        libcudacxx_path = os.path.join(include_path, "libcudacxx")
-        cuda_include_path = os.path.join(get_cuda_path(), "include")
-
-    opts = [
-        b"--std=c++17",
-        bytes(f"--include-path={cub_path}", encoding="ascii"),
-        bytes(f"--include-path={thrust_path}", encoding="ascii"),
-        bytes(f"--include-path={libcudacxx_path}", encoding="ascii"),
-        bytes(f"--include-path={cuda_include_path}", encoding="ascii"),
-        bytes(f"--gpu-architecture=compute_{cc}", encoding="ascii"),
-    ]
+    opts = [b"--std=c++17"]
+
+    from cuda.cccl.include_paths import get_include_paths
+
+    for path in get_include_paths().as_tuple():
+        if path:
+            opts += [f"--include-path={path}".encode("ascii")]
+    opts += [f"--gpu-architecture=compute_{cc}".encode("ascii")]
     if rdc:
         opts += [b"--relocatable-device-code=true"]
 

@@ -3,9 +3,8 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 import os
-import shutil
 
-from setuptools import Command, setup, find_namespace_packages
+from setuptools import setup, find_namespace_packages
 from setuptools.command.build_py import build_py
 from wheel.bdist_wheel import bdist_wheel
 
@@ -27,35 +26,14 @@
 
 class CustomBuildCommand(build_py):
     def run(self):
-        self.run_command("package_cccl")
         build_py.run(self)
 
 
 class CustomWheelBuild(bdist_wheel):
     def run(self):
-        self.run_command("package_cccl")
         super().run()
 
 
-class PackageCCCLCommand(Command):
-    description = "Generate additional files"
-    user_options = []
-
-    def initialize_options(self):
-        pass
-
-    def finalize_options(self):
-        pass
-
-    def run(self):
-        for proj_dir, header_dir in cccl_headers:
-            src_path = os.path.abspath(os.path.join(cccl_path, proj_dir, header_dir))
-            dst_path = os.path.join(project_path, "cuda", "_include", proj_dir)
-            if os.path.exists(dst_path):
-                shutil.rmtree(dst_path)
-            shutil.copytree(src_path, dst_path)
-
-
 setup(
     name="cuda-cooperative",
     version=ver,
@@ -70,6 +48,7 @@ def run(self):
     packages=find_namespace_packages(include=["cuda.*"]),
     python_requires=">=3.9",
     install_requires=[
+        f"cuda-cccl @ file://{cccl_path}/python/cuda_cccl",
         "numba>=0.60.0",
         "pynvjitlink-cu12>=0.2.4",
         "cuda-python",
@@ -82,7 +61,6 @@ def run(self):
         ]
     },
     cmdclass={
-        "package_cccl": PackageCCCLCommand,
         "build_py": CustomBuildCommand,
         "bdist_wheel": CustomWheelBuild,
     },

@@ -1,4 +1,3 @@
-cuda/_include
 env
 *egg-info
 *so
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		## Note

		This package is currently FOR INTERNAL USE ONLY and not meant to be used/installed explicitly.