Skip to content

Commit 12a6978

Browse files
committed
Added Makefile.amdclang. Added best option to make.hip and make.andclang to access fastest compile option
1 parent ede163e commit 12a6978

File tree

5 files changed

+102
-1
lines changed

5 files changed

+102
-1
lines changed

src/Makefile.amdclang

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
include configs/make.amdclang
2+
builddir=./
3+
ifdef EXTRA_CXX_FLAGS
4+
CXXFLAGS2 += $(EXTRA_CXX_FLAGS2)
5+
CXXFLAGS += $(EXTRA_CXX_FLAGS)
6+
endif
7+
8+
ifdef EXTRA_LINK_FLAGS
9+
linklibs += $(EXTRA_LINK_FLAGS)
10+
endif
11+
12+
OBJSW4 =
13+
14+
OBJ = curvitest.o curvilinear4sgc.o Events.o
15+
16+
17+
# prefix object files with build directory
18+
FSW4 = $(addprefix $(builddir)/,$(OBJSW4))
19+
FOBJ = $(addprefix $(builddir)/,$(OBJ)) $(addprefix $(builddir)/,$(QUADPACK))
20+
21+
# prefix
22+
sw4ck: $(FSW4) $(FOBJ)
23+
@echo "*** Configuration file: '" $(foundincfile) "' ***"
24+
@echo "********* User configuration variables **************"
25+
@echo "debug=" $(debug) " proj=" $(proj) " etree=" $(etree) " SW4ROOT"= $(SW4ROOT)
26+
@echo "CXX=" $(CXX) "EXTRA_CXX_FLAGS"= $(EXTRA_CXX_FLAGS)
27+
@echo "FC=" $(FC) " EXTRA_FORT_FLAGS=" $(EXTRA_FORT_FLAGS)
28+
@echo "EXTRA_LINK_FLAGS"= $(EXTRA_LINK_FLAGS)
29+
@echo "******************************************************"
30+
cd $(builddir); $(LINKER) $(LINKFLAGS) -o $@ $(OBJ) $(linklibs)
31+
# test: linking with openmp for the routine rhs4sgcurv.o
32+
# cd $(builddir); $(CXX) $(CXXFLAGS) -qopenmp -o $@ main.o $(OBJ) $(QUADPACK) $(linklibs)
33+
@echo "*** Build directory: " $(builddir) " ***"
34+
35+
$(builddir)/%.o:./%.C
36+
cd $(builddir); $(CXX) $(CXXFLAGS) -c $<
37+
38+
clean:
39+
rm sw4ck *.o *.bc *.s *.cui
40+
41+
format:
42+
clang-format -style Google -i src/*.C
43+
clang-format -style Google -i src/*.h
44+
45+
tags:
46+
etags -o src/TAGS src/*.C src/*.h

src/configs/make.amdclang

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#use make -f Makefile.hipcc
2+
3+
LINKER = amdclang++
4+
CXX = $(PREP) amdclang++
5+
RAJA_LOCATION = /home/users/coe0145/RAJA/Dev2/raja/install_90a
6+
7+
HIP_ROOT_DIR = $(HIP_PATH)
8+
9+
HSA_ROOT_DIR = ${ROCM_PATH}/hsa
10+
11+
GCC_LINK_LINE = -L $(HIP_ROOT_DIR)/lib -L $(HSA_ROOT_DIR)/lib -lhsa-runtime64 -lm -lstdc++ -ldl
12+
13+
14+
MORE_FLAGS = -I${ROCM_PATH}/include -isystem ${ROCM_PATH}/llvm/lib/clang/13.0.0/include/.. -isystem ${ROCM_PATH}/hsa/include -isystem ${ROCM_PATH}/hip/include -D__HIP_ARCH_GFX90a__=1 -O3 -x hip -Winconsistent-missing-override -DNO_RAJA=1 -Wall -Wno-unknown-pragmas --offload-arch=gfx90a -fvectorize -fslp-vectorize -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false
15+
16+
17+
LINKFLAGS = -std=c++11 -fgpu-rdc --offload-arch=gfx90a --hip-link -L ${ROCM_PATH}/lib -lamdhip64
18+
EXTRA_CXX_FLAGS = -O3 -x hip -std=c++11 -I$(RAJA_LOCATION)/include -DENABLE_HIP $(MORE_FLAGS) -Winconsistent-missing-override
19+
20+
EXTRA_CXX_FLAGS2 = -O3 -std=c++11 -I $(CUDA_HOME)/include -I$(RAJA_LOCATION)/include -DRAJA_USE_RESTRICT_PTR -DCUDA_CODE -DENABLE_HIP $(MORE_FLAGS)
21+
22+
EXTRA_LINK_FLAGS = -L $(RAJA_LOCATION)/lib $(GCC_LINK_LINE)
23+
24+
ifeq ($(best),yes)
25+
amd_unroll_fix=yes
26+
magic_sync=yes
27+
endif
28+
29+
ifeq ($(amd_unroll_fix),yes)
30+
MORE_FLAGS+= -DAMD_UNROLL_FIX=1
31+
endif
32+
33+
ifeq ($(magic_sync),yes)
34+
MORE_FLAGS+= -DMAGIC_SYNC=1
35+
endif
36+
37+
ifeq ($(register_count),yes)
38+
MORE_FLAGS+= --save-temps
39+
else
40+
MORE_FLAGS+= -fgpu-rdc
41+
endif

src/configs/make.hip

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ HSA_ROOT_DIR = ${ROCM_PATH}/hsa
1111
GCC_LINK_LINE = -L $(HIP_ROOT_DIR)/lib -L $(HSA_ROOT_DIR)/lib -lhsa-runtime64 -lm -lstdc++ -ldl
1212

1313

14-
MORE_FLAGS = -I${ROCM_PATH}/include -isystem ${ROCM_PATH}/llvm/lib/clang/12.0.0/include/.. -isystem ${ROCM_PATH}/hsa/include -isystem ${ROCM_PATH}/hip/include -D__HIP_ARCH_GFX90a__=1 --cuda-gpu-arch=gfx90a -O3 -x hip -Winconsistent-missing-override --amdgpu-target=gfx90a -DNO_RAJA=1 -Wall -Wno-unknown-pragmas
14+
MORE_FLAGS = -I${ROCM_PATH}/include -isystem ${ROCM_PATH}/llvm/lib/clang/12.0.0/include/.. -isystem ${ROCM_PATH}/hsa/include -isystem ${ROCM_PATH}/hip/include -D__HIP_ARCH_GFX90a__=1 --cuda-gpu-arch=gfx90a -O3 -x hip -Winconsistent-missing-override --amdgpu-target=gfx90a -DNO_RAJA=1 -Wall -Wno-unknown-pragmas
1515

1616

1717
LINKFLAGS = -std=c++11 --amdgpu-target=gfx90a -fgpu-rdc
@@ -21,6 +21,11 @@ EXTRA_CXX_FLAGS2 = -O3 -std=c++11 -I $(CUDA_HOME)/include -I$(RAJA_LOCATION)/i
2121

2222
EXTRA_LINK_FLAGS = -L $(RAJA_LOCATION)/lib $(GCC_LINK_LINE)
2323

24+
ifeq ($(best),yes)
25+
amd_unroll_fix=yes
26+
magic_sync=yes
27+
endif
28+
2429
ifeq ($(amd_unroll_fix),yes)
2530
MORE_FLAGS+= -DAMD_UNROLL_FIX=1
2631
endif
@@ -34,3 +39,4 @@ MORE_FLAGS+= --save-temps
3439
else
3540
MORE_FLAGS+= -fgpu-rdc
3641
endif
42+

src/curvitest.C

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,12 @@ void promo_version(){
314314
std::stringstream s;
315315
#ifdef ENABLE_HIP
316316
s<<"HIP("<<HIP_VERSION_MAJOR<<"."<<HIP_VERSION_MINOR<<"."<<HIP_VERSION_PATCH<<")\n";
317+
#ifdef AMD_UNROLL_FIX
318+
s<<"AMD unroll fix enabled\n";
319+
#endif
320+
#ifdef MAGIC_SYNC
321+
s<<"Magic sync enabled\n";
322+
#endif
317323
#elif ENABLE_CUDA
318324
s<<"CUDA("<<CUDA_VERSION<<")\n";
319325
#else
@@ -322,6 +328,7 @@ void promo_version(){
322328
#ifndef NO_RAJA
323329
s<<"RAJA("<<RAJA_VERSION_MAJOR<<"."<<RAJA_VERSION_MINOR<<"."<<RAJA_VERSION_PATCHLEVEL<<")\n";
324330
#endif
331+
s<<"\n";
325332
std::cout<<s.str();
326333

327334
}

src/run_flux

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
flux mini run -n 1 -c 8 -g 1 -o gpu-affinity=per-task -o cpu-affinity=per-task ./sw4ck sw4ck.in

0 commit comments

Comments
 (0)