NVIDIA · rwgk · Dec 12, 2024 · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024
@@ -11,6 +11,12 @@ fail_if_no_gpu
 readonly prefix="${BUILD_DIR}/python/"
 export PYTHONPATH="${prefix}:${PYTHONPATH:-}"
 
+pushd ../python/cuda_cccl >/dev/null
+
+run_command "⚙️  Pip install cuda_cccl" pip install --force-reinstall --upgrade --target "${prefix}" .
+
+popd >/dev/null
+
 pushd ../python/cuda_cooperative >/dev/null
 
 run_command "⚙️  Pip install cuda_cooperative" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
@@ -20,11 +26,7 @@ popd >/dev/null
 
 pushd ../python/cuda_parallel >/dev/null
 
-# Temporarily install the package twice to populate include directory as part of the first installation
-# and to let manifest discover these includes during the second installation. Do not forget to remove the
-# second installation after https://github.com/NVIDIA/cccl/issues/2281 is addressed.
-run_command "⚙️  Pip install cuda_parallel once" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
-run_command "⚙️  Pip install cuda_parallel twice" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
+run_command "⚙️  Pip install cuda_parallel" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
 run_command "🚀  Pytest cuda_parallel" python -m pytest -v ./tests
 
 popd >/dev/null

@@ -37,6 +37,7 @@ CUB_CMAKE_VERSION_FILE="lib/cmake/cub/cub-config-version.cmake"
 LIBCUDACXX_CMAKE_VERSION_FILE="lib/cmake/libcudacxx/libcudacxx-config-version.cmake"
 THRUST_CMAKE_VERSION_FILE="lib/cmake/thrust/thrust-config-version.cmake"
 CUDAX_CMAKE_VERSION_FILE="lib/cmake/cudax/cudax-config-version.cmake"
+CUDA_CCCL_VERSION_FILE="python/cuda_cccl/cuda/cccl/_version.py"
 CUDA_COOPERATIVE_VERSION_FILE="python/cuda_cooperative/cuda/cooperative/_version.py"
 CUDA_PARALLEL_VERSION_FILE="python/cuda_parallel/cuda/parallel/_version.py"
 
@@ -110,6 +111,7 @@ update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_MAJOR \([0-9]\+\))" "
 update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_MINOR \([0-9]\+\))" "set(cudax_VERSION_MINOR $minor)"
 update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_PATCH \([0-9]\+\))" "set(cudax_VERSION_PATCH $patch)"
 
+update_file "$CUDA_CCCL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\""
-update_file "$CUDA_CCCL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\""
+update_file "$CUDA_CCCL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$major.$minor.$patch\""
-update_file "$CUDA_CCCL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\""
+update_file "$CUDA_CCCL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$major.$minor.$patch\""
 update_file "$CUDA_COOPERATIVE_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\""
 update_file "$CUDA_PARALLEL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\""
 

@@ -0,0 +1,2 @@
+cuda/_include
+*egg-info
@@ -0,0 +1,11 @@
+# `cuda.cccl`: Experimental CUDA Core Compute Library Python module with CCCL headers
+
+## Documentation
+
+Please visit the documentation here: https://nvidia.github.io/cccl/python.html.
+
+## Local development
+
+```bash
+pip3 install .
+```
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# This file is generated by ci/update_version.sh
+# Do not edit this file manually.
+__version__ = "0.1.2.8.0"
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+[build-system]
+requires = ["packaging", "setuptools>=61.0.0", "wheel"]
+build-backend = "setuptools.build_meta"
@@ -0,0 +1,57 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+#
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import os
+import shutil
+
+from setuptools import setup, find_namespace_packages
+
+
+project_path = os.path.abspath(os.path.dirname(__file__))
+cccl_path = os.path.abspath(os.path.join(project_path, "..", ".."))
+cccl_headers = [["cub", "cub"], ["libcudacxx", "include"], ["thrust", "thrust"]]
+__version__ = None
+with open(os.path.join(project_path, "cuda", "cccl", "_version.py")) as f:
+    exec(f.read())
+assert __version__ is not None
+ver = __version__
+del __version__
+
+
+with open("README.md") as f:
+    long_description = f.read()
+
+
+def copy_cccl_headers_to_cuda_include():
+    inc_path = os.path.join(project_path, "cuda", "_include")
-    inc_path = os.path.join(project_path, "cuda", "_include")
+    inc_path = os.path.join(project_path, "cccl", "include")
 cuda_include_path = os.path.join(get_cuda_path(), "include") 
-    inc_path = os.path.join(project_path, "cuda", "_include")
+    inc_path = os.path.join(project_path, "cccl", "include")
 cuda_include_path = os.path.join(get_cuda_path(), "include") 
+    for proj_dir, header_dir in cccl_headers:
+        src_path = os.path.abspath(os.path.join(cccl_path, proj_dir, header_dir))
+        dst_path = os.path.join(inc_path, proj_dir)
+        if os.path.exists(dst_path):
+            shutil.rmtree(dst_path)
+        shutil.copytree(src_path, dst_path)
+    init_py_path = os.path.join(inc_path, "__init__.py")
+    with open(init_py_path, "w") as f:
+        print("# Intentionally empty.", file=f)
+
+
+copy_cccl_headers_to_cuda_include()
+
+setup(
+    name="cuda-cccl",
+    version=ver,
+    description="Experimental Package with CCCL headers to support JIT compilation",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    author="NVIDIA Corporation",
+    classifiers=[
+        "Programming Language :: Python :: 3 :: Only",
+        "Environment :: GPU :: NVIDIA CUDA",
+    ],
+    packages=find_namespace_packages(include=["cuda.*"]),
+    python_requires=">=3.9",
+    include_package_data=True,
+    license="Apache-2.0 with LLVM exception",
+    license_files=("../../LICENSE",),
+)
@@ -1,3 +1,2 @@
-cuda/_include
 env
 *egg-info
diff --git a/python/cuda_cooperative/README.md b/python/cuda_cooperative/README.md
@@ -6,7 +6,16 @@ Please visit the documentation here: https://nvidia.github.io/cccl/python.html.
 
 ## Local development
 
+First-time installation:
+
+```bash
+pip3 install ./cuda_cccl
+pip3 install ./cuda_cooperative[test]
+pytest -v ./cuda_cooperative/tests/
+```
+
+For faster iterative development:
+
 ```bash
-pip3 install -e .[test]
-pytest -v ./tests/
+pip3 install -e ./cuda_cooperative[test]
 ```
@@ -47,10 +47,6 @@ def compile_impl(cpp, cc, rdc, code, nvrtc_path, nvrtc_version):
     check_in("code", code, ["lto", "ptx"])
 
     with pkg_resources.path("cuda", "_include") as include_path:
-        # Using `.parent` for compatibility with pip install --editable:
-        include_path = pkg_resources.files("cuda.cooperative").parent.joinpath(
-            "_include"
-        )
         cub_path = include_path
         thrust_path = include_path
         libcudacxx_path = os.path.join(include_path, "libcudacxx")

@@ -3,9 +3,8 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 import os
-import shutil
 
-from setuptools import Command, setup, find_namespace_packages
+from setuptools import setup, find_namespace_packages
 from setuptools.command.build_py import build_py
 from wheel.bdist_wheel import bdist_wheel
 
@@ -27,35 +26,14 @@
 
 class CustomBuildCommand(build_py):
     def run(self):
-        self.run_command("package_cccl")
         build_py.run(self)
 
 
 class CustomWheelBuild(bdist_wheel):
     def run(self):
-        self.run_command("package_cccl")
         super().run()
 
 
-class PackageCCCLCommand(Command):
-    description = "Generate additional files"
-    user_options = []
-
-    def initialize_options(self):
-        pass
-
-    def finalize_options(self):
-        pass
-
-    def run(self):
-        for proj_dir, header_dir in cccl_headers:
-            src_path = os.path.abspath(os.path.join(cccl_path, proj_dir, header_dir))
-            dst_path = os.path.join(project_path, "cuda", "_include", proj_dir)
-            if os.path.exists(dst_path):
-                shutil.rmtree(dst_path)
-            shutil.copytree(src_path, dst_path)
-
-
 setup(
     name="cuda-cooperative",
     version=ver,
@@ -70,6 +48,7 @@ def run(self):
     packages=find_namespace_packages(include=["cuda.*"]),
     python_requires=">=3.9",
     install_requires=[
+        f"cuda-cccl @ file://{cccl_path}/python/cuda_cccl",
         "numba>=0.60.0",
         "pynvjitlink-cu12>=0.2.4",
         "cuda-python",
@@ -82,7 +61,6 @@ def run(self):
         ]
     },
     cmdclass={
-        "package_cccl": PackageCCCLCommand,
         "build_py": CustomBuildCommand,
         "bdist_wheel": CustomWheelBuild,
     },

@@ -1,4 +1,3 @@
-cuda/_include
 env
 *egg-info
 *so
diff --git a/python/cuda_parallel/README.md b/python/cuda_parallel/README.md
@@ -6,7 +6,16 @@ Please visit the documentation here: https://nvidia.github.io/cccl/python.html.
 
 ## Local development
 
+First-time installation:
+
+```bash
+pip3 install ./cuda_cccl
+pip3 install ./cuda_parallel[test]
+pytest -v ./cuda_parallel/tests/
+```
+
+For faster iterative development:
+
 ```bash
-pip3 install -e .[test]
-pytest -v ./tests/
+pip3 install -e ./cuda_parallel[test]
 ```
@@ -52,13 +52,13 @@ def get_bindings() -> ctypes.CDLL:
 
 @lru_cache()
 def get_paths() -> List[bytes]:
-    with as_file(files("cuda.parallel")) as f:
-        # Using `.parent` for compatibility with pip install --editable:
-        cub_include_path = str(f.parent / "_include")
+    with as_file(files("cuda._include")) as f:
+        cub_include_path = str(f)
     thrust_include_path = cub_include_path
     libcudacxx_include_path = str(os.path.join(cub_include_path, "libcudacxx"))
     cuda_include_path = None
-    if cuda_path := _get_cuda_path():
+    cuda_path = _get_cuda_path()
+    if cuda_path:
         cuda_include_path = str(os.path.join(cuda_path, "include"))
     paths = [
         f"-I{path}".encode()

@@ -3,13 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 import os
-import shutil
 import subprocess
 
-from setuptools import Command, Extension, setup, find_namespace_packages
-from setuptools.command.build_py import build_py
+from setuptools import Extension, setup, find_namespace_packages
 from setuptools.command.build_ext import build_ext
-from wheel.bdist_wheel import bdist_wheel
 
 
 project_path = os.path.abspath(os.path.dirname(__file__))
@@ -27,38 +24,6 @@
     long_description = f.read()
 
 
-class CustomBuildCommand(build_py):
-    def run(self):
-        self.run_command("package_cccl")
-        build_py.run(self)
-
-
-class CustomWheelBuild(bdist_wheel):
-    def run(self):
-        self.run_command("package_cccl")
-        super().run()
-
-
-class PackageCCCLCommand(Command):
-    description = "Generate additional files"
-    user_options = []
-
-    def initialize_options(self):
-        pass
-
-    def finalize_options(self):
-        pass
-
-    def run(self):
-        for proj_dir, header_dir in cccl_headers:
-            src_path = os.path.abspath(os.path.join(cccl_path, proj_dir, header_dir))
-            # TODO Extract cccl headers into a standalone package
-            dst_path = os.path.join(project_path, "cuda", "_include", proj_dir)
-            if os.path.exists(dst_path):
-                shutil.rmtree(dst_path)
-            shutil.copytree(src_path, dst_path)
-
-
 class CMakeExtension(Extension):
     def __init__(self, name):
         super().__init__(name, sources=[])
@@ -100,7 +65,12 @@ def build_extension(self, ext):
     ],
     packages=find_namespace_packages(include=["cuda.*"]),
     python_requires=">=3.9",
-    install_requires=["numba>=0.60.0", "cuda-python", "jinja2"],
+    install_requires=[
+        f"cuda-cccl @ file://{cccl_path}/python/cuda_cccl",
+        "numba>=0.60.0",
+        "cuda-python",
+        "jinja2",
+    ],
     extras_require={
         "test": [
             "pytest",
@@ -109,9 +79,6 @@ def build_extension(self, ext):
         ]
     },
     cmdclass={
-        "package_cccl": PackageCCCLCommand,
-        "build_py": CustomBuildCommand,
-        "bdist_wheel": CustomWheelBuild,
         "build_ext": BuildCMakeExtension,
     },
     ext_modules=[CMakeExtension("cuda.parallel.experimental.cccl.c")],