From 8f65d2e208a65a5fd789daf65c6c3ba916ea2ebf Mon Sep 17 00:00:00 2001
From: "Karl L. Brennan" <karlludwigbrennan@outlook.com>
Date: Tue, 15 Oct 2024 22:14:01 +0200
Subject: [PATCH] Rationalize compute capability arguments in makefiles

Removes superflous {x.y | y > 0} args, adds comments to CC 6+ lines,
and removes trailing space on CC 3.0 line.

Also uncomments CC 5.0 line in win64 and linux makefiles,
as current CUDA 12.6 still supports CC 5.x (Maxwell).
---
 src/Makefile       | 19 ++++++-------------
 src/Makefile.win   | 21 ++++++++++-----------
 src/Makefile.win32 |  6 +++++-
 3 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/src/Makefile b/src/Makefile
index ccf4bb6..b0bd8a1 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -15,20 +15,13 @@ NVCCFLAGS = $(CUDA_INCLUDE) --ptxas-options=-v
 # generate code for various compute capabilities
 # NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc)
 # NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all!
-# NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code 
+# NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code
 # NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.5 (3.2?) _CAN_ use funnel shift which is useful for mfaktc
-# NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code
-NVCCFLAGS += --generate-code arch=compute_60,code=sm_60
-NVCCFLAGS += --generate-code arch=compute_61,code=sm_61
-NVCCFLAGS += --generate-code arch=compute_62,code=sm_62
-NVCCFLAGS += --generate-code arch=compute_70,code=sm_70
-NVCCFLAGS += --generate-code arch=compute_72,code=sm_72
-NVCCFLAGS += --generate-code arch=compute_75,code=sm_75
-NVCCFLAGS += --generate-code arch=compute_80,code=sm_80
-NVCCFLAGS += --generate-code arch=compute_86,code=sm_86
-NVCCFLAGS += --generate-code arch=compute_87,code=sm_87
-NVCCFLAGS += --generate-code arch=compute_89,code=sm_89
-NVCCFLAGS += --generate-code arch=compute_90,code=sm_90
+NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code
+NVCCFLAGS += --generate-code arch=compute_60,code=sm_60 # CC 6.x GPUs will use this code
+NVCCFLAGS += --generate-code arch=compute_70,code=sm_70 # CC 7.x GPUs will use this code
+NVCCFLAGS += --generate-code arch=compute_80,code=sm_80 # CC 8.x GPUs will use this code
+NVCCFLAGS += --generate-code arch=compute_90,code=sm_90 # CC 9.x GPUs will use this code
 
 # pass some options to the C host compiler (e.g. gcc on Linux)
 NVCCFLAGS += --compiler-options=-Wall
diff --git a/src/Makefile.win b/src/Makefile.win
index c83c304..e68d6ba 100644
--- a/src/Makefile.win
+++ b/src/Makefile.win
@@ -8,17 +8,16 @@ CUFLAGS = -DWIN64 -Xcompiler /EHsc,/W3,/nologo,/Ox $(NVCCFLAGS)
 
 ############################################################
 
-NVCCFLAGS += --generate-code arch=compute_60,code=sm_60
-NVCCFLAGS += --generate-code arch=compute_61,code=sm_61
-NVCCFLAGS += --generate-code arch=compute_62,code=sm_62
-NVCCFLAGS += --generate-code arch=compute_70,code=sm_70
-NVCCFLAGS += --generate-code arch=compute_72,code=sm_72
-NVCCFLAGS += --generate-code arch=compute_75,code=sm_75
-NVCCFLAGS += --generate-code arch=compute_80,code=sm_80
-NVCCFLAGS += --generate-code arch=compute_86,code=sm_86
-NVCCFLAGS += --generate-code arch=compute_87,code=sm_87
-NVCCFLAGS += --generate-code arch=compute_89,code=sm_89
-NVCCFLAGS += --generate-code arch=compute_90,code=sm_90
+# generate code for various compute capabilities
+# NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc)
+# NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all!
+# NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code
+# NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.5 (3.2?) _CAN_ use funnel shift which is useful for mfaktc
+NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code
+NVCCFLAGS += --generate-code arch=compute_60,code=sm_60 # CC 6.x GPUs will use this code
+NVCCFLAGS += --generate-code arch=compute_70,code=sm_70 # CC 7.x GPUs will use this code
+NVCCFLAGS += --generate-code arch=compute_80,code=sm_80 # CC 8.x GPUs will use this code
+NVCCFLAGS += --generate-code arch=compute_90,code=sm_90 # CC 9.x GPUs will use this code
 
 ############################################################
 
diff --git a/src/Makefile.win32 b/src/Makefile.win32
index 79e553e..700d32d 100644
--- a/src/Makefile.win32
+++ b/src/Makefile.win32
@@ -9,9 +9,13 @@ CUFLAGS = -ccbin="C:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin" -X
 # generate code for various compute capabilities
 NVCCFLAGS += --generate-code arch=compute_11,code=sm_11 # CC 1.1, 1.2 and 1.3 GPUs will use this code (1.0 is not possible for mfaktc)
 NVCCFLAGS += --generate-code arch=compute_20,code=sm_20 # CC 2.x GPUs will use this code, one code fits all!
-NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code 
+NVCCFLAGS += --generate-code arch=compute_30,code=sm_30 # all CC 3.x GPUs _COULD_ use this code
 NVCCFLAGS += --generate-code arch=compute_35,code=sm_35 # but CC 3.5 (3.2?) _CAN_ use funnel shift which is useful for mfaktc
 NVCCFLAGS += --generate-code arch=compute_50,code=sm_50 # CC 5.x GPUs will use this code
+# NVCCFLAGS += --generate-code arch=compute_60,code=sm_60 # CC 6.x GPUs will use this code
+# NVCCFLAGS += --generate-code arch=compute_70,code=sm_70 # CC 7.x GPUs will use this code
+# NVCCFLAGS += --generate-code arch=compute_80,code=sm_80 # CC 8.x GPUs will use this code
+# NVCCFLAGS += --generate-code arch=compute_90,code=sm_90 # CC 9.x GPUs will use this code
 
 LINK = link
 LFLAGS = /nologo /LTCG #/ltcg:pgo