Skip to content

Commit 709309e

Browse files
trxcllntsylvestre
authored andcommitted
Never cache the outer CUDA compilation (because nvcc -E can't be trusted). Always decompose via nvcc --dryrun, then cache and report the host compiler call as a CUDA compilation
1 parent 6bc8154 commit 709309e

File tree

4 files changed

+85
-48
lines changed

4 files changed

+85
-48
lines changed

src/compiler/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,6 @@ mod tasking_vx;
3232
#[macro_use]
3333
mod counted_array;
3434

35+
pub use crate::compiler::c::CCompilerKind;
3536
pub use crate::compiler::compiler::*;
3637
pub use crate::compiler::preprocessor_cache::PreprocessorCacheEntry;

src/compiler/nvcc.rs

+31-9
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,20 @@ pub fn generate_compile_commands(
461461
output_file_name: output.file_name().unwrap().to_owned(),
462462
};
463463

464-
Ok((command, None, Cacheable::Yes))
464+
Ok((
465+
command,
466+
None,
467+
// Never assume the outer `nvcc` call is cacheable. We must decompose the nvcc call into
468+
// its constituent subcommands with `--dryrun` and only cache the final build product.
469+
//
470+
// Always decomposing `nvcc --dryrun` is the only way to ensure caching nvcc invocations
471+
// is fully sound, because the `nvcc -E` preprocessor output is not sufficient to detect
472+
// all source code changes.
473+
//
474+
// Specifically, `nvcc -E` always defines __CUDA_ARCH__, which means changes to host-only
475+
// code guarded by an `#ifndef __CUDA_ARCH__` will _not_ be captured in `nvcc -E` output.
476+
Cacheable::No,
477+
))
465478
}
466479

467480
#[derive(Clone, Debug)]
@@ -811,19 +824,28 @@ where
811824
)
812825
}
813826
} else {
814-
// Returns Cacheable::Yes to indicate we _do_ want to run this host
815-
// compiler call through sccache (because it may be distributed),
816-
// but we _do not_ want to cache its output. The output file will
817-
// be cached as the result of the outer `nvcc` command. Caching
818-
// here would store the same object twice under two different hashes,
819-
// unnecessarily bloating the cache size.
827+
// Cache the host compiler calls, since we've marked the outer `nvcc` call
828+
// as non-cacheable. This ensures `sccache nvcc ...` _always_ decomposes the
829+
// nvcc call into its constituent subcommands with `--dryrun`, but only caches
830+
// the final build product once.
831+
//
832+
// Always decomposing `nvcc --dryrun` is the only way to ensure caching nvcc invocations
833+
// is fully sound, because the `nvcc -E` preprocessor output is not sufficient to detect
834+
// all source code changes.
835+
//
836+
// Specifically, `nvcc -E` always defines __CUDA_ARCH__, which means changes to host-only
837+
// code guarded by an `#ifndef __CUDA_ARCH__` will _not_ be captured in `nvcc -E` output.
820838
(
821839
env_vars
822840
.iter()
823841
.chain(
824842
[
825-
// Do not cache host compiler calls
826-
("SCCACHE_NO_CACHE".into(), "true".into()),
843+
// HACK: This compilation will look like a C/C++ compilation,
844+
// but we want to report it in the stats as a CUDA compilation.
845+
// The SccacheService API doesn't have a great way to specify this
846+
// case, so we set a special envvar here that it can read when the
847+
// compilation is finished.
848+
("__SCCACHE_THIS_IS_A_CUDA_COMPILATION__".into(), "".into()),
827849
]
828850
.iter(),
829851
)

src/server.rs

+16-2
Original file line numberDiff line numberDiff line change
@@ -1305,8 +1305,22 @@ where
13051305

13061306
let out_pretty = hasher.output_pretty().into_owned();
13071307
let color_mode = hasher.color_mode();
1308-
let kind = compiler.kind();
1309-
let lang = hasher.language();
1308+
1309+
let (kind, lang) = {
1310+
// HACK: See note in src/compiler/nvcc.rs
1311+
if env_vars
1312+
.iter()
1313+
.any(|(k, _)| k == "__SCCACHE_THIS_IS_A_CUDA_COMPILATION__")
1314+
{
1315+
(
1316+
CompilerKind::C(crate::compiler::CCompilerKind::Nvcc),
1317+
Language::Cuda,
1318+
)
1319+
} else {
1320+
(compiler.kind(), hasher.language())
1321+
}
1322+
};
1323+
13101324
let me = self.clone();
13111325

13121326
self.rt

tests/system.rs

+37-37
Original file line numberDiff line numberDiff line change
@@ -703,12 +703,12 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
703703
trace!("compile A request stats");
704704
get_stats(|info| {
705705
assert_eq!(2, info.stats.compile_requests);
706-
assert_eq!(5, info.stats.requests_executed);
707-
assert_eq!(1, info.stats.cache_hits.all());
706+
assert_eq!(8, info.stats.requests_executed);
707+
assert_eq!(3, info.stats.cache_hits.all());
708708
assert_eq!(3, info.stats.cache_misses.all());
709709
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
710-
assert!(info.stats.cache_hits.get("PTX").is_none());
711-
assert!(info.stats.cache_hits.get("CUBIN").is_none());
710+
assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
711+
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
712712
assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
713713
assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
714714
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
@@ -717,8 +717,8 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
717717
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
718718
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
719719
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
720-
assert!(info.stats.cache_hits.get_adv(&adv_ptx_key).is_none());
721-
assert!(info.stats.cache_hits.get_adv(&adv_cubin_key).is_none());
720+
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
721+
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
722722
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
723723
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
724724
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
@@ -747,12 +747,12 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
747747
trace!("compile B request stats");
748748
get_stats(|info| {
749749
assert_eq!(3, info.stats.compile_requests);
750-
assert_eq!(9, info.stats.requests_executed);
751-
assert_eq!(2, info.stats.cache_hits.all());
750+
assert_eq!(12, info.stats.requests_executed);
751+
assert_eq!(4, info.stats.cache_hits.all());
752752
assert_eq!(5, info.stats.cache_misses.all());
753753
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
754-
assert!(info.stats.cache_hits.get("PTX").is_none());
755-
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
754+
assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
755+
assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
756756
assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
757757
assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
758758
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
@@ -761,8 +761,8 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
761761
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
762762
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
763763
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
764-
assert!(info.stats.cache_hits.get_adv(&adv_ptx_key).is_none());
765-
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
764+
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
765+
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
766766
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
767767
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
768768
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
@@ -789,23 +789,23 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
789789
trace!("compile ptx request stats");
790790
get_stats(|info| {
791791
assert_eq!(4, info.stats.compile_requests);
792-
assert_eq!(11, info.stats.requests_executed);
793-
assert_eq!(3, info.stats.cache_hits.all());
794-
assert_eq!(6, info.stats.cache_misses.all());
792+
assert_eq!(14, info.stats.requests_executed);
793+
assert_eq!(5, info.stats.cache_hits.all());
794+
assert_eq!(5, info.stats.cache_misses.all());
795795
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
796-
assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
797-
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
798-
assert_eq!(&3, info.stats.cache_misses.get("CUDA").unwrap());
796+
assert_eq!(&2, info.stats.cache_hits.get("PTX").unwrap());
797+
assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
798+
assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
799799
assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
800800
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
801801
assert!(info.stats.cache_misses.get("C/C++").is_none());
802802
let adv_cuda_key = adv_key_kind("cuda", compiler.name);
803803
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
804804
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
805805
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
806-
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
807-
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
808-
assert_eq!(&3, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
806+
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
807+
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
808+
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
809809
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
810810
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
811811
});
@@ -831,23 +831,23 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
831831
trace!("compile cubin request stats");
832832
get_stats(|info| {
833833
assert_eq!(5, info.stats.compile_requests);
834-
assert_eq!(14, info.stats.requests_executed);
835-
assert_eq!(5, info.stats.cache_hits.all());
836-
assert_eq!(7, info.stats.cache_misses.all());
834+
assert_eq!(17, info.stats.requests_executed);
835+
assert_eq!(7, info.stats.cache_hits.all());
836+
assert_eq!(5, info.stats.cache_misses.all());
837837
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
838-
assert_eq!(&2, info.stats.cache_hits.get("PTX").unwrap());
839-
assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
840-
assert_eq!(&4, info.stats.cache_misses.get("CUDA").unwrap());
838+
assert_eq!(&3, info.stats.cache_hits.get("PTX").unwrap());
839+
assert_eq!(&3, info.stats.cache_hits.get("CUBIN").unwrap());
840+
assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
841841
assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
842842
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
843843
assert!(info.stats.cache_misses.get("C/C++").is_none());
844844
let adv_cuda_key = adv_key_kind("cuda", compiler.name);
845845
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
846846
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
847847
assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_cuda_key).unwrap());
848-
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
849-
assert_eq!(&2, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
850-
assert_eq!(&4, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
848+
assert_eq!(&3, info.stats.cache_hits.get_adv(&adv_ptx_key).unwrap());
849+
assert_eq!(&3, info.stats.cache_hits.get_adv(&adv_cubin_key).unwrap());
850+
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_cuda_key).unwrap());
851851
assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_ptx_key).unwrap());
852852
assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_cubin_key).unwrap());
853853
});
@@ -914,14 +914,14 @@ fn test_nvcc_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) {
914914
trace!("request stats");
915915
get_stats(|info| {
916916
assert_eq!(4, info.stats.compile_requests);
917-
assert_eq!(8, info.stats.requests_executed);
918-
assert_eq!(3, info.stats.cache_hits.all());
917+
assert_eq!(12, info.stats.requests_executed);
918+
assert_eq!(5, info.stats.cache_hits.all());
919919
assert_eq!(3, info.stats.cache_misses.all());
920-
assert_eq!(&1, info.stats.cache_hits.get("C/C++").unwrap());
921-
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
922-
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
923-
assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
924-
assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
920+
assert!(info.stats.cache_hits.get("C/C++").is_none());
921+
assert_eq!(&2, info.stats.cache_hits.get("CUDA").unwrap());
922+
assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
923+
assert!(info.stats.cache_misses.get("C/C++").is_none());
924+
assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
925925
assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
926926
});
927927
}

0 commit comments

Comments
 (0)