diff --git a/.gitignore b/.gitignore index 8c29d0c23166..83974d79ac2f 100644 --- a/.gitignore +++ b/.gitignore @@ -58,7 +58,6 @@ tests/unit/saved_checkpoint/ *_hip.cpp *_hip.h *.hip -*.cuh *hip_layers.h # virtual env directory for format diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9a7bb1c9b371..70212adf3478 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,6 +42,14 @@ repos: exclude: ^(deepspeed/comm/|docs/|benchmarks/|scripts/check-torchdist.py|deepspeed/moe/sharded_moe.py|deepspeed/runtime/comm/coalesced_collectives.py|deepspeed/elasticity/elastic_agent.py|deepspeed/launcher/launch.py|tests/unit/comm/test_dist.py) # Specific deepspeed/ files are excluded for now until we wrap ProcessGroup in deepspeed.comm +- repo: local + hooks: + - id: check-cuh-ignore + name: check-cuh-ignore + entry: ./scripts/check_cuh_ignore.py + language: python + files: ^(\.gitignore|csrc/adam/multi_tensor_apply\.cuh)$ + - repo: local hooks: - id: check-license diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b03a498144a4..2b332370037f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,6 +15,9 @@ can also run these manually: ```bash pre-commit run --files $(git diff --name-only master) ``` +The pre-commit suite also validates CUDA header tracking rules. Avoid adding a global +`*.cuh` ignore rule, because core headers such as `csrc/adam/multi_tensor_apply.cuh` +must remain tracked for fused optimizer builds. If a formatting test fails, it will fix the modified code in place and abort the `git commit`. After looking over the changes, you can `git add ` and then repeat the previous `git commit` command. diff --git a/scripts/check_cuh_ignore.py b/scripts/check_cuh_ignore.py new file mode 100644 index 000000000000..181a2f01f6dc --- /dev/null +++ b/scripts/check_cuh_ignore.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +REQUIRED_HEADERS = ("csrc/adam/multi_tensor_apply.cuh",) +FORBIDDEN_PATTERNS = ("*.cuh", ) + + +def _load_patterns(gitignore_path: Path) -> set[str]: + patterns: set[str] = set() + for raw_line in gitignore_path.read_text(encoding="utf-8").splitlines(): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + patterns.add(line) + return patterns + + +def _is_git_tracked(repo_root: Path, relative_path: str) -> bool: + proc = subprocess.run( + ["git", "ls-files", "--error-unmatch", relative_path], + cwd=repo_root, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=False, + ) + return proc.returncode == 0 + + +def validate_repo(repo_root: Path) -> list[str]: + errors: list[str] = [] + gitignore_path = repo_root / ".gitignore" + patterns = _load_patterns(gitignore_path) + + for pattern in FORBIDDEN_PATTERNS: + if pattern in patterns: + errors.append( + f"Forbidden .gitignore pattern '{pattern}' found in {gitignore_path}. " + "Do not ignore all CUDA headers globally." + ) + + for header in REQUIRED_HEADERS: + header_path = repo_root / header + if not header_path.is_file(): + errors.append(f"Required CUDA header missing: {header}") + continue + if not _is_git_tracked(repo_root, header): + errors.append(f"Required CUDA header is not tracked by git: {header}") + + return errors + + +def main() -> int: + repo_root = Path(__file__).resolve().parents[1] + errors = validate_repo(repo_root) + if not errors: + return 0 + + for error in errors: + print(error, file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/unit/test_check_cuh_ignore.py b/tests/unit/test_check_cuh_ignore.py new file mode 100644 index 000000000000..17a9d680acc2 --- /dev/null +++ b/tests/unit/test_check_cuh_ignore.py @@ -0,0 +1,45 @@ +import importlib.util +import tempfile +import unittest +from pathlib import Path +from unittest import mock + + +SCRIPT_PATH = Path(__file__).resolve().parents[2] / "scripts" / "check_cuh_ignore.py" +_SPEC = importlib.util.spec_from_file_location("check_cuh_ignore", SCRIPT_PATH) +check_cuh_ignore = importlib.util.module_from_spec(_SPEC) +_SPEC.loader.exec_module(check_cuh_ignore) + + +class TestCheckCuhIgnore(unittest.TestCase): + + def _write_required_header(self, repo_root: Path): + required_header = repo_root / "csrc" / "adam" / "multi_tensor_apply.cuh" + required_header.parent.mkdir(parents=True, exist_ok=True) + required_header.write_text("// test header\n", encoding="utf-8") + + def test_validate_repo_rejects_global_cuh_ignore(self): + with tempfile.TemporaryDirectory() as temp_dir: + repo_root = Path(temp_dir) + (repo_root / ".gitignore").write_text("*.cuh\n", encoding="utf-8") + self._write_required_header(repo_root) + + with mock.patch.object(check_cuh_ignore, "_is_git_tracked", return_value=True): + errors = check_cuh_ignore.validate_repo(repo_root) + + self.assertTrue(any("Forbidden .gitignore pattern '*.cuh'" in error for error in errors)) + + def test_validate_repo_accepts_tracked_required_header(self): + with tempfile.TemporaryDirectory() as temp_dir: + repo_root = Path(temp_dir) + (repo_root / ".gitignore").write_text("*.hip\n", encoding="utf-8") + self._write_required_header(repo_root) + + with mock.patch.object(check_cuh_ignore, "_is_git_tracked", return_value=True): + errors = check_cuh_ignore.validate_repo(repo_root) + + self.assertEqual(errors, []) + + +if __name__ == "__main__": + unittest.main()