From 8f65d2e208a65a5fd789daf65c6c3ba916ea2ebf Mon Sep 17 00:00:00 2001 From: "Karl L. Brennan" Date: Tue, 15 Oct 2024 22:14:01 +0200 Subject: [PATCH] Rationalize compute capability arguments in makefiles Removes superflous {x.y | y > 0} args, adds comments to CC 6+ lines, and removes trailing space on CC 3.0 line. Also uncomments CC 5.0 line in win64 and linux makefiles, as current CUDA 12.6 still supports CC 5.x (Maxwell). --- src/Makefile | 19 ++++++------------- src/Makefile.win | 21 ++++++++++----------- src/Makefile.win32 | 6 +++++- 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/src/Makefile b/src/Makefile index ccf4bb6..b0bd8a1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -15,20 +15,13 @@ NVCCFLAGS = $(CUDA_INCLUDE) --ptxas-options=-v # generate code for various compute capabilities # NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc) # NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all! -# NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code +# NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code # NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.5 (3.2?) _CAN_ use funnel shift which is useful for mfaktc -# NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code -NVCCFLAGS += --generate-code arch=compute_60,code=sm_60 -NVCCFLAGS += --generate-code arch=compute_61,code=sm_61 -NVCCFLAGS += --generate-code arch=compute_62,code=sm_62 -NVCCFLAGS += --generate-code arch=compute_70,code=sm_70 -NVCCFLAGS += --generate-code arch=compute_72,code=sm_72 -NVCCFLAGS += --generate-code arch=compute_75,code=sm_75 -NVCCFLAGS += --generate-code arch=compute_80,code=sm_80 -NVCCFLAGS += --generate-code arch=compute_86,code=sm_86 -NVCCFLAGS += --generate-code arch=compute_87,code=sm_87 -NVCCFLAGS += --generate-code arch=compute_89,code=sm_89 -NVCCFLAGS += --generate-code arch=compute_90,code=sm_90 +NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code +NVCCFLAGS += --generate-code arch=compute_60,code=sm_60 # CC 6.x GPUs will use this code +NVCCFLAGS += --generate-code arch=compute_70,code=sm_70 # CC 7.x GPUs will use this code +NVCCFLAGS += --generate-code arch=compute_80,code=sm_80 # CC 8.x GPUs will use this code +NVCCFLAGS += --generate-code arch=compute_90,code=sm_90 # CC 9.x GPUs will use this code # pass some options to the C host compiler (e.g. gcc on Linux) NVCCFLAGS += --compiler-options=-Wall diff --git a/src/Makefile.win b/src/Makefile.win index c83c304..e68d6ba 100644 --- a/src/Makefile.win +++ b/src/Makefile.win @@ -8,17 +8,16 @@ CUFLAGS = -DWIN64 -Xcompiler /EHsc,/W3,/nologo,/Ox $(NVCCFLAGS) ############################################################ -NVCCFLAGS += --generate-code arch=compute_60,code=sm_60 -NVCCFLAGS += --generate-code arch=compute_61,code=sm_61 -NVCCFLAGS += --generate-code arch=compute_62,code=sm_62 -NVCCFLAGS += --generate-code arch=compute_70,code=sm_70 -NVCCFLAGS += --generate-code arch=compute_72,code=sm_72 -NVCCFLAGS += --generate-code arch=compute_75,code=sm_75 -NVCCFLAGS += --generate-code arch=compute_80,code=sm_80 -NVCCFLAGS += --generate-code arch=compute_86,code=sm_86 -NVCCFLAGS += --generate-code arch=compute_87,code=sm_87 -NVCCFLAGS += --generate-code arch=compute_89,code=sm_89 -NVCCFLAGS += --generate-code arch=compute_90,code=sm_90 +# generate code for various compute capabilities +# NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc) +# NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all! +# NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code +# NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.5 (3.2?) _CAN_ use funnel shift which is useful for mfaktc +NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code +NVCCFLAGS += --generate-code arch=compute_60,code=sm_60 # CC 6.x GPUs will use this code +NVCCFLAGS += --generate-code arch=compute_70,code=sm_70 # CC 7.x GPUs will use this code +NVCCFLAGS += --generate-code arch=compute_80,code=sm_80 # CC 8.x GPUs will use this code +NVCCFLAGS += --generate-code arch=compute_90,code=sm_90 # CC 9.x GPUs will use this code ############################################################ diff --git a/src/Makefile.win32 b/src/Makefile.win32 index 79e553e..700d32d 100644 --- a/src/Makefile.win32 +++ b/src/Makefile.win32 @@ -9,9 +9,13 @@ CUFLAGS = -ccbin="C:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin" -X # generate code for various compute capabilities NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc) NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all! -NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code +NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.5 (3.2?) _CAN_ use funnel shift which is useful for mfaktc NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code +# NVCCFLAGS += --generate-code arch=compute_60,code=sm_60 # CC 6.x GPUs will use this code +# NVCCFLAGS += --generate-code arch=compute_70,code=sm_70 # CC 7.x GPUs will use this code +# NVCCFLAGS += --generate-code arch=compute_80,code=sm_80 # CC 8.x GPUs will use this code +# NVCCFLAGS += --generate-code arch=compute_90,code=sm_90 # CC 9.x GPUs will use this code LINK = link LFLAGS = /nologo /LTCG #/ltcg:pgo