Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
e04346e
Add GitHub Actions workflow for building nvdiffrast
Aero-Ex Nov 14, 2025
3063d0a
Change Python version to 3.11
Aero-Ex Nov 14, 2025
f951467
Refactor Windows build workflow for nvdiffrast
Aero-Ex Nov 14, 2025
f4880d2
Update workflow to include NumPy 1.26.4 installation
Aero-Ex Nov 14, 2025
23d3eae
Add binary wheel build support for Windows (CUDA 11.8)
Aero-Ex Nov 14, 2025
52f9792
Fix Unicode encoding errors in build scripts
Aero-Ex Nov 14, 2025
840e70c
Remove old source wheel workflow
Aero-Ex Nov 14, 2025
47b7f3d
Update workflow to use Windows Server 2022
Aero-Ex Nov 14, 2025
a1f7ce2
Fix CUDA toolkit installation issues
Aero-Ex Nov 14, 2025
656126e
Make MSVC verification non-fatal
Aero-Ex Nov 14, 2025
267c97f
Force verification step to exit with success
Aero-Ex Nov 14, 2025
cd129f7
Make verification step truly optional
Aero-Ex Nov 14, 2025
88fa066
Add -allow-unsupported-compiler flag for CUDA 11.8 + VS2022
Aero-Ex Nov 14, 2025
ef555f7
Add STL version mismatch bypass flag
Aero-Ex Nov 14, 2025
b5230f7
Add comprehensive build safety measures
Aero-Ex Nov 14, 2025
b0355ce
Add NVDR_TORCH define to C++ compiler flags
Aero-Ex Nov 14, 2025
07193cb
Fix PowerShell error handling in build step
Aero-Ex Nov 14, 2025
4bf58ee
Add MSVC environment activation before build
Aero-Ex Nov 14, 2025
231524c
Add missing CUDA libraries to toolkit installation
Aero-Ex Nov 14, 2025
bc99cf4
Install full CUDA toolkit instead of sub-packages
Aero-Ex Nov 14, 2025
9260580
Enable CUDA separate compilation to fix linking errors
claude Nov 14, 2025
1105de0
Fix CUDA build by renaming conflicting source files
claude Nov 14, 2025
593b0e1
Fix uninitialized variable bug in RasterImpl_host.cpp
claude Nov 14, 2025
901e971
Fix DLL load test by importing torch first
claude Nov 14, 2025
a61929c
Upload wheel artifact before running tests
claude Nov 14, 2025
4614b80
Update build_binary_wheel.yml
Aero-Ex Nov 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 172 additions & 0 deletions .github/workflows/build_binary_wheel.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
name: Build nvdiffrast Binary Wheel (Windows CUDA 12.4)

on:
push:
branches: [main, master]
pull_request:
workflow_dispatch: # Allow manual trigger

jobs:
build_windows_binary:
runs-on: windows-2022 # Has MSVC pre-installed

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install CUDA Toolkit 12.4
uses: Jimver/[email protected]
with:
cuda: '12.4.0'
method: 'network'
use-github-cache: false
use-local-cache: false

- name: Verify CUDA and Python
run: |
$ErrorActionPreference = 'Continue'

Write-Host "=== Python Version ==="
python --version

Write-Host "`n=== NVCC Version ==="
nvcc --version

Write-Host "`n=== CUDA Environment ==="
Write-Host "CUDA_PATH: $env:CUDA_PATH"

Write-Host "`n=== MSVC Compiler (optional check) ==="
$clPath = where.exe cl.exe 2>&1 | Out-Null
if ($LASTEXITCODE -eq 0) {
Write-Host "Found in PATH"
} else {
Write-Host "Not in PATH (will be auto-detected during build)"
}

Write-Host "`n[OK] Verification complete"
shell: powershell
continue-on-error: true

- name: Install build dependencies
run: |
pip install --upgrade pip
pip install wheel setuptools ninja
shell: powershell

- name: Install PyTorch 2.6.0 (CUDA 12.4)
run: |
pip install torch==2.6.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu124
shell: powershell

- name: Install NumPy 1.26.4
run: |
pip install numpy==1.26.4
shell: powershell

- name: Verify PyTorch CUDA
run: |
python -c "import torch; print(f'PyTorch: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}'); print(f'CUDA version: {torch.version.cuda}')"
shell: powershell

- name: Build binary wheel
run: |
$ErrorActionPreference = 'Continue'

Write-Host "=== Setting up MSVC environment ==="
# Find and activate Visual Studio Developer environment
$vsPath = & "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -latest -property installationPath
Write-Host "Visual Studio path: $vsPath"

Import-Module "$vsPath\Common7\Tools\Microsoft.VisualStudio.DevShell.dll"
Enter-VsDevShell -VsInstallPath $vsPath -SkipAutomaticLocation -DevCmdArguments "-arch=x64 -host_arch=x64"

Write-Host "Verifying cl.exe is now in PATH:"
where.exe cl.exe

Write-Host "`n=== Building binary wheel with CUDA extensions ==="
$env:BUILD_BINARY_WHEEL = "1"
$env:TORCH_CUDA_ARCH_LIST = "6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0"
$env:DISTUTILS_USE_SDK = "1"
$env:MAX_JOBS = "2"

Write-Host "Build environment:"
Write-Host " CUDA_PATH: $env:CUDA_PATH"
Write-Host " BUILD_BINARY_WHEEL: $env:BUILD_BINARY_WHEEL"
Write-Host " TORCH_CUDA_ARCH_LIST: $env:TORCH_CUDA_ARCH_LIST"
Write-Host " MAX_JOBS: $env:MAX_JOBS"
Write-Host ""

python setup.py bdist_wheel > build.log 2>&1
$buildExitCode = $LASTEXITCODE

Write-Host "`n=== Build Output ==="
Get-Content build.log

if ($buildExitCode -ne 0) {
Write-Host "`n=== BUILD FAILED (exit code: $buildExitCode) ==="
exit $buildExitCode
}

Write-Host "`n=== Built wheel ==="
$wheels = Get-ChildItem dist\*.whl
foreach ($wheel in $wheels) {
Write-Host " Wheel: $($wheel.Name)"
Write-Host " Size: $([math]::Round($wheel.Length / 1MB, 2)) MB"
}
shell: powershell
env:
BUILD_BINARY_WHEEL: "1"
TORCH_CUDA_ARCH_LIST: "6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0"

- name: Upload wheel artifact
uses: actions/upload-artifact@v4
with:
name: nvdiffrast-binary-wheel-py311-cu124-win_amd64
path: dist/*.whl
retention-days: 30

- name: Test wheel installation
run: |
Write-Host "=== Creating test environment ==="
python -m venv test_env
.\test_env\Scripts\Activate.ps1

Write-Host "`n=== Installing PyTorch in test env ==="
pip install torch==2.6.0 --extra-index-url https://download.pytorch.org/whl/cu124

Write-Host "`n=== Installing built wheel ==="
$wheel = Get-ChildItem dist\*.whl | Select-Object -First 1
pip install $wheel.FullName

Write-Host "`n=== Testing import ==="
python -c "import nvdiffrast.torch as dr; print('[OK] nvdiffrast imported successfully')"

Write-Host "`n=== Testing CUDA plugin load ==="
python -c "from nvdiffrast.torch import ops; plugin = ops._get_plugin(); print('[OK] CUDA plugin loaded successfully')"

Write-Host "`n=== Verifying pre-compiled plugin ==="
python -c "import sys; import torch; import nvdiffrast_plugin; print('[OK] Pre-compiled plugin found:', nvdiffrast_plugin.__file__)"
shell: powershell

- name: Display build summary
run: |
Write-Host "`n=========================================="
Write-Host "✅ BUILD SUCCESSFUL!"
Write-Host "=========================================="
Write-Host "`nBuilt wheel:"
Get-ChildItem dist\*.whl | ForEach-Object {
Write-Host " 📦 $($_.Name)"
Write-Host " Size: $([math]::Round($_.Length / 1MB, 2)) MB"
}
Write-Host "`n📥 To download:"
Write-Host " 1. Go to Actions tab in GitHub"
Write-Host " 2. Click on this workflow run"
Write-Host " 3. Download artifact from 'Artifacts' section"
Write-Host "`n✨ This is a TRUE BINARY WHEEL (cp311-cp311-win_amd64)"
Write-Host " No MSVC Build Tools required for installation!"
shell: powershell
128 changes: 128 additions & 0 deletions BUILD_README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# nvdiffrast Binary Wheel Build

This fork adds support for building **true binary wheels** for Windows with pre-compiled CUDA extensions.

## What's Different?

| Aspect | Original nvdiffrast | This Fork |
|--------|-------------------|-----------|
| Wheel Type | Source wheel (`py3-none-any`) | Binary wheel (`cp311-cp311-win_amd64`) |
| Installation Requires MSVC? | ✅ YES (~7GB) | ❌ NO |
| Compilation | JIT at runtime (1-2 min first use) | Pre-compiled (instant load) |
| Setup | Complex | Simple `pip install` |

## Changes Made

### 1. Modified `setup.py`
- Added environment variable `BUILD_BINARY_WHEEL` to trigger binary build mode
- Uses `torch.utils.cpp_extension.CUDAExtension` to compile CUDA code during wheel build
- Falls back to original source wheel behavior if `BUILD_BINARY_WHEEL != 1`

### 2. Modified `nvdiffrast/torch/ops.py`
- Added pre-compiled extension loader at the beginning of `_get_plugin()`
- Tries to import `nvdiffrast_plugin` (the pre-compiled extension) first
- Falls back to JIT compilation if pre-compiled version not found
- Backwards compatible with source wheels

### 3. Added GitHub Actions Workflow
- `.github/workflows/build_binary_wheel.yml`
- Automatically builds binary wheels on push
- Installs CUDA 11.8 toolkit in CI
- Tests the built wheel
- Uploads as artifact

## Building Binary Wheels

### Using GitHub Actions (Recommended)

1. Push changes to your fork
2. GitHub Actions automatically builds the wheel
3. Download from Actions > Artifacts

### Manual Build (Requires MSVC + CUDA 11.8)

```powershell
# Install dependencies
pip install wheel setuptools ninja
pip install torch==2.2.2 --extra-index-url https://download.pytorch.org/whl/cu118
pip install numpy==1.26.4

# Build binary wheel
$env:BUILD_BINARY_WHEEL = "1"
$env:TORCH_CUDA_ARCH_LIST = "6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0"
python setup.py bdist_wheel

# Wheel will be in dist/ folder
```

## Installing Binary Wheel

```bash
# Install PyTorch first (required dependency)
pip install torch==2.2.2 --extra-index-url https://download.pytorch.org/whl/cu118

# Install the binary wheel
pip install nvdiffrast-0.3.4-cp311-cp311-win_amd64.whl

# Test it
python -c "import nvdiffrast.torch as dr; print('Success!')"
```

## Requirements

### Build Requirements (CI only)
- Python 3.11
- PyTorch 2.2.2 (CUDA 11.8)
- CUDA Toolkit 11.8
- MSVC compiler
- NumPy 1.26.4

### Runtime Requirements (End Users)
- Python 3.11
- PyTorch 2.2.2 (CUDA 11.8)
- NVIDIA GPU with CUDA support
- CUDA 11.8 runtime (included in GPU drivers)
- **NO MSVC Build Tools required** ✅

## Supported GPU Architectures

The wheel includes pre-compiled code for these compute capabilities:
- 6.0, 6.1 (GTX 10-series)
- 7.0, 7.5 (RTX 20-series, Tesla V100/T4)
- 8.0 (A100)
- 8.6 (RTX 30-series)
- 8.9 (RTX 40-series)
- 9.0 (Future GPUs)

## Backwards Compatibility

The modified `setup.py` is **fully backwards compatible**:
- Without `BUILD_BINARY_WHEEL=1`: Builds original source wheel
- With `BUILD_BINARY_WHEEL=1`: Builds binary wheel with pre-compiled extensions

The modified `ops.py` is also backwards compatible:
- If pre-compiled extension exists: Uses it
- If not: Falls back to JIT compilation (original behavior)

## Troubleshooting

### "Could not locate MSVC installation" during build
- Use GitHub Actions (has MSVC pre-installed)
- Or install Visual Studio Build Tools locally

### "CUDA error: no kernel image is available"
- Your GPU architecture wasn't included in `TORCH_CUDA_ARCH_LIST`
- Rebuild with your GPU's compute capability

### Import error at runtime
- Ensure PyTorch 2.2.2 (CUDA 11.8) is installed
- Verify CUDA runtime is available (check GPU drivers)

## Credits

- Original nvdiffrast: https://github.com/NVlabs/nvdiffrast
- Binary wheel modifications: Aero-Ex

## License

Same as original nvdiffrast (NVIDIA Source Code License)
Original file line number Diff line number Diff line change
Expand Up @@ -324,12 +324,13 @@ void RasterImpl::launchStages(bool instanceMode, bool peel, cudaStream_t stream)
p.activeTiles = m_activeTiles.getPtr();
p.tileFirstSeg = m_tileFirstSeg.getPtr();

p.strideX = m_bufferSizePixels.x;
p.strideY = m_bufferSizePixels.y;

size_t byteOffset = ((size_t)m_offsetPixels.x + (size_t)m_offsetPixels.y * (size_t)p.strideX) * sizeof(U32);
p.colorBuffer = m_colorBuffer.getPtr(byteOffset);
p.depthBuffer = m_depthBuffer.getPtr(byteOffset);
p.peelBuffer = (m_renderModeFlags & CudaRaster::RenderModeFlag_EnableDepthPeeling) ? m_peelBuffer.getPtr(byteOffset) : 0;
p.strideX = m_bufferSizePixels.x;
p.strideY = m_bufferSizePixels.y;

memcpy(&p.imageParamsFirst, imageParams, min(m_numImages, CR_EMBED_IMAGE_PARAMS) * sizeof(CRImageParams));
p.imageParamsExtra = (CRImageParams*)m_crImageParamsExtra.getPtr();
Expand Down
File renamed without changes.
15 changes: 15 additions & 0 deletions nvdiffrast/torch/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,21 @@ def _get_plugin(gl=False):
if _cached_plugin.get(gl, None) is not None:
return _cached_plugin[gl]

# Try to load pre-compiled extension first (for binary wheels)
try:
if gl:
import nvdiffrast_plugin_gl
_cached_plugin[gl] = nvdiffrast_plugin_gl
else:
import nvdiffrast_plugin
_cached_plugin[gl] = nvdiffrast_plugin

logging.getLogger('nvdiffrast').info("[OK] Loaded pre-compiled nvdiffrast plugin")
return _cached_plugin[gl]
except ImportError:
# Pre-compiled extension not found, fall back to JIT compilation
logging.getLogger('nvdiffrast').info("Pre-compiled plugin not found, using JIT compilation")

# Make sure we can find the necessary compiler and libary binaries.
if os.name == 'nt':
lib_dir = os.path.dirname(__file__) + r"\..\lib"
Expand Down
Loading