-
Notifications
You must be signed in to change notification settings - Fork 5
/
config.mk
207 lines (178 loc) · 4.86 KB
/
config.mk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# Compiler tool chain (GCC/CLANG/ICC/ICX/ONEAPI/NVCC)
TOOLCHAIN ?= ICC
# ISA of instruction code (X86/ARM)
ISA ?= X86
# Instruction set for instrinsic kernels (NONE/<X86-SIMD>/<ARM-SIMD>)
# with X86-SIMD options: NONE/SSE/AVX/AVX_FMA/AVX2/AVX512
# with ARM-SIMD options: NONE/NEON/SVE/SVE2 (SVE not width-agnostic yet!)
SIMD ?= AVX512
# Optimization scheme (verletlist/clusterpair)
OPT_SCHEME ?= clusterpair
# Enable likwid (true or false)
ENABLE_LIKWID ?= false
# Enable OpenMP parallelization (true or false)
ENABLE_OPENMP ?= false
# SP or DP
DATA_TYPE ?= SP
# AOS or SOA
DATA_LAYOUT ?= AOS
# Debug
DEBUG ?= false
# Sort atoms when reneighboring (true or false)
SORT_ATOMS ?= false
# Simulate only for one atom type, without table lookup for parameters (true or false)
ONE_ATOM_TYPE ?= false
# Trace memory addresses for cache simulator (true or false)
MEM_TRACER ?= false
# Trace indexes and distances for gather-md (true or false)
INDEX_TRACER ?= false
# Compute statistics
COMPUTE_STATS ?= true
# Configurations for verletlist optimization scheme
# Use omp simd pragma when running with half neighbor-lists
ENABLE_OMP_SIMD ?= false
# Configurations for clusterpair optimization scheme
# Use reference version
USE_REFERENCE_VERSION ?= false
# Enable XTC output (a GROMACS file format for trajectories)
XTC_OUTPUT ?= false
# Configurations for CUDA
# Use CUDA pinned memory to optimize transfers
USE_CUDA_HOST_MEMORY ?= false
#Feature options
OPTIONS = -DALIGNMENT=64
#OPTIONS += More options
################################################################
# DO NOT EDIT BELOW !!!
################################################################
DEFINES =
ifeq ($(strip $(TOOLCHAIN)), NVCC)
VECTOR_WIDTH=1
SIMD = NONE
USE_REFERENCE_VERSION = true
endif
ifeq ($(strip $(SIMD)), NONE)
VECTOR_WIDTH=1
USE_REFERENCE_VERSION = true
else
ifeq ($(strip $(ISA)),ARM)
ifeq ($(strip $(SIMD)), NEON)
__ISA_NEON__=true
__SIMD_WIDTH_DBL__=2
else ifeq ($(strip $(SIMD)), SVE)
__ISA_SVE__=true
# needs further specification
__SIMD_WIDTH_DBL__=2
else ifeq ($(strip $(SIMD)), SVE2)
__ISA_SVE__=true
__ISA_SVE2__=true
# needs further specification
__SIMD_WIDTH_DBL__=2
endif
else
# X86
ifeq ($(strip $(SIMD)), SSE)
__ISA_SSE__=true
__SIMD_WIDTH_DBL__=2
else ifeq ($(strip $(SIMD)), AVX)
__ISA_AVX__=true
__SIMD_WIDTH_DBL__=4
else ifeq ($(strip $(SIMD)), AVX_FMA)
__ISA_AVX__=true
__ISA_AVX_FMA__=true
__SIMD_WIDTH_DBL__=4
else ifeq ($(strip $(SIMD)), AVX2)
#__SIMD_KERNEL__=true
__ISA_AVX2__=true
__SIMD_WIDTH_DBL__=4
else ifeq ($(strip $(SIMD)), AVX512)
__ISA_AVX512__=true
__SIMD_WIDTH_DBL__=8
ifeq ($(strip $(DATA_TYPE)), DP)
__SIMD_KERNEL__=true
endif
endif
endif
# SIMD width is specified in double-precision, hence it may
# need to be adjusted for single-precision
ifeq ($(strip $(DATA_TYPE)), SP)
VECTOR_WIDTH=$(shell echo $$(( $(__SIMD_WIDTH_DBL__) * 2 )))
else
VECTOR_WIDTH=$(__SIMD_WIDTH_DBL__)
endif
endif
ifeq ($(strip $(DATA_LAYOUT)),AOS)
DEFINES += -DAOS
endif
ifeq ($(strip $(DATA_TYPE)),SP)
DEFINES += -DPRECISION=1
else
DEFINES += -DPRECISION=2
endif
ifeq ($(strip $(SORT_ATOMS)),true)
DEFINES += -DSORT_ATOMS
endif
ifeq ($(strip $(ONE_ATOM_TYPE)),true)
DEFINES += -DONE_ATOM_TYPE
endif
ifeq ($(strip $(MEM_TRACER)),true)
DEFINES += -DMEM_TRACER
endif
ifeq ($(strip $(INDEX_TRACER)),true)
DEFINES += -DINDEX_TRACER
endif
ifeq ($(strip $(COMPUTE_STATS)),true)
DEFINES += -DCOMPUTE_STATS
endif
ifeq ($(strip $(XTC_OUTPUT)),true)
DEFINES += -DXTC_OUTPUT
endif
ifeq ($(strip $(USE_REFERENCE_VERSION)),true)
DEFINES += -DUSE_REFERENCE_VERSION
endif
ifeq ($(strip $(DEBUG)),true)
DEFINES += -DDEBUG
endif
ifneq ($(VECTOR_WIDTH),)
DEFINES += -DVECTOR_WIDTH=$(VECTOR_WIDTH)
endif
ifeq ($(strip $(__SIMD_KERNEL__)),true)
DEFINES += -D__SIMD_KERNEL__
endif
ifeq ($(strip $(__SSE__)),true)
DEFINES += -D__ISA_SSE__
endif
ifeq ($(strip $(__ISA_AVX__)),true)
DEFINES += -D__ISA_AVX__
endif
ifeq ($(strip $(__ISA_AVX_FMA__)),true)
DEFINES += -D__ISA_AVX_FMA__
endif
ifeq ($(strip $(__ISA_AVX2__)),true)
DEFINES += -D__ISA_AVX2__
endif
ifeq ($(strip $(__ISA_AVX512__)),true)
DEFINES += -D__ISA_AVX512__
endif
ifeq ($(strip $(__ISA_NEON__)),true)
DEFINES += -D__ISA_NEON__
endif
ifeq ($(strip $(__ISA_SVE__)),true)
DEFINES += -D__ISA_SVE__
endif
ifeq ($(strip $(__ISA_SVE2__)),true)
DEFINES += -D__ISA_SVE2__
endif
ifeq ($(strip $(ENABLE_OMP_SIMD)),true)
DEFINES += -DENABLE_OMP_SIMD
endif
ifeq ($(strip $(OPT_SCHEME)),verletlist)
OPT_TAG = VL
else ifeq ($(strip $(OPT_SCHEME)),clusterpair)
OPT_TAG = CP
endif
ifeq ($(strip $(SIMD)),NONE)
TOOL_TAG = $(TOOLCHAIN)-$(ISA)
else
TOOL_TAG = $(TOOLCHAIN)-$(ISA)-$(SIMD)
endif