Skip to content

Commit 2073da6

Browse files
Merge pull request #228 from LibRapid/test
Support C++20 and beyond
2 parents 01fd51b + 2358af8 commit 2073da6

File tree

139 files changed

+23380
-23026
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

139 files changed

+23380
-23026
lines changed

.gitmodules

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
[submodule "librapid/vendor/jitify"]
22
path = librapid/vendor/jitify
33
url = https://github.com/Pencilcaseman/jitify.git
4-
[submodule "librapid/vendor/Vc"]
5-
path = librapid/vendor/Vc
6-
url = https://github.com/Pencilcaseman/Vc.git
74
[submodule "librapid/vendor/fmt"]
85
path = librapid/vendor/fmt
96
url = https://github.com/fmtlib/fmt.git
@@ -19,3 +16,6 @@
1916
[submodule "librapid/vendor/CLBlast"]
2017
path = librapid/vendor/CLBlast
2118
url = https://github.com/CNugteren/CLBlast.git
19+
[submodule "librapid/vendor/xsimd"]
20+
path = librapid/vendor/xsimd
21+
url = https://github.com/LibRapid/xsimd.git

CMakeLists.txt

+33-19
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,16 @@ cmake_minimum_required(VERSION 3.16)
22
project(librapid)
33
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
44
cmake_policy(SET CMP0077 NEW)
5-
set(CMAKE_CXX_STANDARD 17)
5+
6+
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
7+
message(STATUS "[ LIBRAPID ] LibRapid is a top-level project. Using C++23")
8+
set(CMAKE_CXX_STANDARD 23)
9+
endif ()
10+
11+
# LibRapid requires C++20 or later
12+
if (CMAKE_CXX_STANDARD LESS 20)
13+
message(FATAL_ERROR "LibRapid requires C++20 or later")
14+
endif ()
615

716
# Extract version information
817
file(READ "version.txt" ver)
@@ -38,7 +47,7 @@ option(LIBRAPID_USE_OPENCL "Search for OpenCL and use it if possible" ON)
3847
option(LIBRAPID_USE_CUDA "Attempt to use CUDA" ON)
3948
option(LIBRAPID_USE_MULTIPREC "Include MPIR and MPFR in the LibRapid build" OFF)
4049
option(LIBRAPID_FAST_MATH "Use potentially less accurate operations to increase performance" OFF)
41-
option(LIBRAPID_NATIVE_ARCH "Use the native architecture of the system" OFF)
50+
option(LIBRAPID_NATIVE_ARCH "Use the native architecture of the system" ON)
4251

4352
option(LIBRAPID_CUDA_DOUBLE_VECTOR_WIDTH "Preferred vector width for vectorised kernels" 2)
4453
option(LIBRAPID_CUDA_FLOAT_VECTOR_WIDTH "Preferred vector width for vectorised kernels" 4)
@@ -124,6 +133,12 @@ if (LIBRAPID_STRICT AND LIBRAPID_QUIET)
124133
message(FATAL_ERROR "LIBRAPID_STRICT and LIBRAPID_QUIET cannot be enabled at the same time")
125134
endif ()
126135

136+
# SIMD instructions do not currently work on MacOS
137+
#if (IS_MACOS AND LIBRAPID_NATIVE_ARCH)
138+
# message(WARNING "SIMD instructions are not currently supported on MacOS. Disabling LIBRAPID_NATIVE_ARCH")
139+
# set(LIBRAPID_NATIVE_ARCH OFF)
140+
#endif ()
141+
127142
if (LIBRAPID_STRICT)
128143
# Enable all warnings and treat them as errors
129144
if (MSVC)
@@ -415,19 +430,14 @@ endif ()
415430

416431
# Add dependencies
417432
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/fmt")
418-
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/Vc")
419-
420-
if (NOT MINGW)
421-
# scnlib does not support MinGW, since it does not implement std::from_chars, which is required by the library
422-
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/scnlib")
423-
else ()
424-
message(WARNING "[ LIBRAPID ] scnlib cannot be built by MinGW, so it will not be enabled")
425-
target_compile_definitions(${module_name} PUBLIC LIBRAPID_MINGW)
426-
endif ()
433+
# add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/Vc")
434+
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/xsimd")
435+
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/scnlib")
427436

428437
target_compile_definitions(fmt PUBLIC FMT_HEADER_ONLY)
429-
target_compile_definitions(Vc PRIVATE Vc_HACK_OSTREAM_FOR_TTY)
430-
target_link_libraries(${module_name} PUBLIC fmt scn Vc)
438+
# target_compile_definitions(Vc PRIVATE Vc_HACK_OSTREAM_FOR_TTY)
439+
# target_link_libraries(${module_name} PUBLIC fmt scn Vc xsimd)
440+
target_link_libraries(${module_name} PUBLIC fmt scn xsimd)
431441

432442
if (${LIBRAPID_USE_MULTIPREC})
433443
# Load MPIR
@@ -484,15 +494,19 @@ if (LIBRAPID_FAST_MATH)
484494
target_compile_definitions(${module_name} PUBLIC LIBRAPID_FAST_MATH)
485495
endif ()
486496

487-
set(LIBRAPID_ARCH_FLAGS)
488497
if (LIBRAPID_NATIVE_ARCH)
489498
message(STATUS "[ LIBRAPID ] Compiling for native architecture")
490-
OptimizeForArchitecture()
491-
target_compile_options(${module_name} PUBLIC ${Vc_DEFINITIONS} ${Vc_ARCHITECTURE_FLAGS})
499+
500+
include(ArchDetect2)
501+
target_compile_options(${module_name} PUBLIC ${LIBRAPID_ARCH_FLAGS})
492502
target_compile_definitions(${module_name} PUBLIC LIBRAPID_NATIVE_ARCH)
493-
set(LIBRAPID_ARCH_FLAGS ${Vc_DEFINITIONS} ${Vc_ARCHITECTURE_FLAGS})
494-
message(STATUS "[ LIBRAPID ] Additional Definitions: ${Vc_DEFINITIONS}")
495-
message(STATUS "[ LIBRAPID ] Supported flags: ${Vc_ARCHITECTURE_FLAGS}")
503+
504+
# OptimizeForArchitecture()
505+
# target_compile_options(${module_name} PUBLIC ${Vc_DEFINITIONS} ${Vc_ARCHITECTURE_FLAGS})
506+
# target_compile_definitions(${module_name} PUBLIC LIBRAPID_NATIVE_ARCH)
507+
# set(LIBRAPID_ARCH_FLAGS ${Vc_DEFINITIONS} ${Vc_ARCHITECTURE_FLAGS})
508+
# message(STATUS "[ LIBRAPID ] Additional Definitions: ${Vc_DEFINITIONS}")
509+
# message(STATUS "[ LIBRAPID ] Supported flags: ${Vc_ARCHITECTURE_FLAGS}")
496510
endif ()
497511

498512
# Add defines for CUDA vector widths

cmake/ArchDetect2.cmake

+243
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
INCLUDE(CheckCXXSourceRuns)
2+
3+
set(COMPILER_GNU false)
4+
set(COMPILER_INTEL false)
5+
set(COMPILER_CLANG false)
6+
set(COMPILER_MSVC false)
7+
8+
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
9+
set(COMPILER_GNU true)
10+
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
11+
set(COMPILER_INTEL true)
12+
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
13+
set(COMPILER_CLANG true)
14+
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
15+
set(COMPILER_MSVC true)
16+
else ()
17+
# Unknown Compiler
18+
endif ()
19+
20+
set(LIBRAPID_ARCH_FLAGS)
21+
set(LIBRAPID_ARCH_FOUND)
22+
23+
# Function to test a given SIMD capability
24+
function(check_simd_capability FLAG_GNU FLAG_MSVC NAME TEST_SOURCE VAR)
25+
set(CMAKE_REQUIRED_FLAGS)
26+
if (COMPILER_GNU OR COMPILER_INTEL OR COMPILER_CLANG)
27+
set(CMAKE_REQUIRED_FLAGS "${FLAG_GNU}")
28+
elseif (COMPILER_MSVC) # reserve for WINDOWS
29+
set(CMAKE_REQUIRED_FLAGS "${FLAG_MSVC}")
30+
endif ()
31+
32+
CHECK_CXX_SOURCE_RUNS("${TEST_SOURCE}" ${VAR})
33+
34+
if (${${VAR}})
35+
if (COMPILER_GNU OR COMPILER_INTEL OR COMPILER_CLANG)
36+
# set(LIBRAPID_ARCH_FLAGS "${LIBRAPID_ARCH_FLAGS} ${FLAG_GNU}" PARENT_SCOPE)
37+
38+
list(APPEND LIBRAPID_ARCH_FLAGS ${FLAG_GNU})
39+
set(LIBRAPID_ARCH_FLAGS ${LIBRAPID_ARCH_FLAGS} PARENT_SCOPE)
40+
41+
message(STATUS "[ LIBRAPID ] ${NAME} found: ${FLAG_GNU}")
42+
elseif (MSVC)
43+
# set(LIBRAPID_ARCH_FLAGS "${LIBRAPID_ARCH_FLAGS} ${FLAG_MSVC}" PARENT_SCOPE)
44+
45+
list(APPEND LIBRAPID_ARCH_FLAGS ${FLAG_MSVC})
46+
set(LIBRAPID_ARCH_FLAGS ${LIBRAPID_ARCH_FLAGS} PARENT_SCOPE)
47+
48+
message(STATUS "[ LIBRAPID ] ${NAME} found: ${FLAG_MSVC}")
49+
endif ()
50+
set(LIBRAPID_ARCH_FOUND TRUE PARENT_SCOPE)
51+
else ()
52+
message(STATUS "[ LIBRAPID ] ${NAME} not found")
53+
endif ()
54+
endfunction()
55+
56+
# Check SSE2 (not a valid flag for MSVC)
57+
check_simd_capability("-msse2" "" "SSE2" "
58+
#include <emmintrin.h>
59+
int main() {
60+
__m128i a = _mm_set_epi32 (-1, 2, -3, 4);
61+
__m128i result = _mm_abs_epi32 (a);
62+
return 0;
63+
}" SIMD_SSE2)
64+
65+
# Check SSE3 (not a valid flag for MSVC)
66+
check_simd_capability("-msse3" "" "SSE3" "
67+
#include <pmmintrin.h>
68+
int main() {
69+
__m128 a = _mm_set_ps (-1.0f, 2.0f, -3.0f, 4.0f);
70+
__m128 b = _mm_set_ps (1.0f, 2.0f, 3.0f, 4.0f);
71+
__m128 result = _mm_addsub_ps (a, b);
72+
return 0;
73+
}" SIMD_SSE3)
74+
75+
# Check SSSE3 (not a valid flag for MSVC)
76+
check_simd_capability("-mssse3" "" "SSSE3" "
77+
#include <tmmintrin.h>
78+
int main() {
79+
__m128i a = _mm_set_epi8(-1, 2, -3, 4, -1, 2, -3, 4, -1, 2, -3, 4, -1, 2, -3, 4);
80+
__m128i result = _mm_abs_epi8(a);
81+
return 0;
82+
}" SIMD_SSSE3)
83+
84+
# Check SSE4.1 (not a valid flag for MSVC)
85+
check_simd_capability("-msse4.1" "" "SSE4.1" "
86+
#include <smmintrin.h>
87+
int main() {
88+
__m128i a = _mm_set_epi32(-1, 2, -3, 4);
89+
__m128i result = _mm_abs_epi32(a);
90+
return 0;
91+
}" SIMD_SSE4_1)
92+
93+
# Check SSE4.2 (not a valid flag for MSVC)
94+
check_simd_capability("-msse4.2" "" "SSE4.2" "
95+
#include <nmmintrin.h>
96+
int main() {
97+
__m128i a = _mm_set_epi32(-1, 2, -3, 4);
98+
__m128i result = _mm_abs_epi32(a);
99+
return 0;
100+
}" SIMD_SSE4_2)
101+
102+
# Check AVX
103+
check_simd_capability("-mavx" "/arch:AVX" "AVX" "
104+
#include <immintrin.h>
105+
int main() {
106+
__m256 a = _mm256_set_ps(-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
107+
__m256 result = _mm256_abs_ps(a);
108+
return 0;
109+
}" SIMD_AVX)
110+
111+
# Check AVX2
112+
check_simd_capability("-mavx2" "/arch:AVX2" "AVX2" "
113+
#include <immintrin.h>
114+
int main() {
115+
__m256i a = _mm256_set_epi32(-1, 2, -3, 4, -1, 2, -3, 4);
116+
__m256i result = _mm256_abs_epi32(a);
117+
return 0;
118+
}" SIMD_AVX2)
119+
120+
# Check AVX512F
121+
check_simd_capability("-mavx512f" "/arch:AVX512" "AVX512F" "
122+
#include <immintrin.h>
123+
int main() {
124+
__m512i a = _mm512_set_epi32(-1, 2, -3, 4, -1, 2, -3, 4, -1, 2, -3, 4, -1, 2, -3, 4);
125+
__m512i result = _mm512_abs_epi32(a);
126+
return 0;
127+
}" SIMD_AVX512F)
128+
129+
# Check AVX512BW
130+
check_simd_capability("-mavx512bw" "/arch:AVX512" "AVX512BW" "
131+
#include <immintrin.h>
132+
int main() {
133+
__m512i a = _mm512_set_epi64(-1, 2, -3, 4, -1, 2, -3, 4);
134+
__m512i result = _mm512_abs_epi8(a);
135+
return 0;
136+
}" SIMD_AVX512BW)
137+
138+
# Check AVX512CD
139+
check_simd_capability("-mavx512cd" "/arch:AVX512" "AVX512CD" "
140+
#include <immintrin.h>
141+
int main() {
142+
__m512i a = _mm512_set_epi64(-1, 2, -3, 4, -1, 2, -3, 4);
143+
__m512i result = _mm512_conflict_epi64(a);
144+
return 0;
145+
}" SIMD_AVX512CD)
146+
147+
# Check AVX512DQ
148+
check_simd_capability("-mavx512dq" "/arch:AVX512" "AVX512DQ" "
149+
#include <immintrin.h>
150+
int main() {
151+
__m512d a = _mm512_set_pd(-1.0, 2.0, -3.0, 4.0, -1.0, 2.0, -3.0, 4.0);
152+
__m512d result = _mm512_abs_pd(a);
153+
return 0;
154+
}" SIMD_AVX512DQ)
155+
156+
# Check AVX512ER
157+
check_simd_capability("-mavx512er" "/arch:AVX512" "AVX512ER" "
158+
#include <immintrin.h>
159+
int main() {
160+
__m512d a = _mm512_set_pd(-1.0, 2.0, -3.0, 4.0, -1.0, 2.0, -3.0, 4.0);
161+
__m512d result = _mm512_exp_pd(a);
162+
return 0;
163+
}" SIMD_AVX512ER)
164+
165+
# Check AVX512PF
166+
check_simd_capability("-mavx512pf" "/arch:AVX512" "AVX512PF" "
167+
#include <immintrin.h>
168+
int main() {
169+
__m512 a = _mm512_set_ps(-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
170+
__m512 result = _mm512_exp_ps(a);
171+
return 0;
172+
}" SIMD_AVX512PF)
173+
174+
# ARM
175+
check_simd_capability("-march=armv7-a" "" "ARMv7" "
176+
#include <arm_neon.h>
177+
int main() {
178+
int32x4_t a = vdupq_n_s32(1);
179+
int32x4_t b = vdupq_n_s32(2);
180+
int32x4_t result = vaddq_s32(a, b);
181+
return 0;
182+
}" SIMD_ARMv7)
183+
184+
check_simd_capability("-march=armv8-a" "" "ARMv8" "
185+
#include <arm_neon.h>
186+
int main() {
187+
int32x4_t a = vdupq_n_s32(1);
188+
int32x4_t b = vdupq_n_s32(2);
189+
int32x4_t result = vaddq_s32(a, b);
190+
return 0;
191+
}" SIMD_ARMv8)
192+
193+
# ARM64
194+
check_simd_capability("-march=armv8.1-a" "" "ARMv8.1" "
195+
#include <arm_neon.h>
196+
int main() {
197+
int32x4_t a = vdupq_n_s32(1);
198+
int32x4_t b = vdupq_n_s32(2);
199+
int32x4_t result = vaddq_s32(a, b);
200+
return 0;
201+
}" SIMD_ARMv8_1)
202+
203+
check_simd_capability("-march=armv8.2-a" "" "ARMv8.2" "
204+
#include <arm_neon.h>
205+
int main() {
206+
int32x4_t a = vdupq_n_s32(1);
207+
int32x4_t b = vdupq_n_s32(2);
208+
int32x4_t result = vaddq_s32(a, b);
209+
return 0;
210+
}" SIMD_ARMv8_2)
211+
212+
check_simd_capability("-march=armv8.3-a" "" "ARMv8.3" "
213+
#include <arm_neon.h>
214+
int main() {
215+
int32x4_t a = vdupq_n_s32(1);
216+
int32x4_t b = vdupq_n_s32(2);
217+
int32x4_t result = vaddq_s32(a, b);
218+
return 0;
219+
}" SIMD_ARMv8_3)
220+
221+
check_simd_capability("-march=armv8.4-a" "" "ARMv8.4" "
222+
#include <arm_neon.h>
223+
int main() {
224+
int32x4_t a = vdupq_n_s32(1);
225+
int32x4_t b = vdupq_n_s32(2);
226+
int32x4_t result = vaddq_s32(a, b);
227+
return 0;
228+
}" SIMD_ARMv8_4)
229+
230+
check_simd_capability("-march=armv8.5-a" "" "ARMv8.5" "
231+
#include <arm_neon.h>
232+
int main() {
233+
int32x4_t a = vdupq_n_s32(1);
234+
int32x4_t b = vdupq_n_s32(2);
235+
int32x4_t result = vaddq_s32(a, b);
236+
return 0;
237+
}" SIMD_ARMv8_5)
238+
239+
if (LIBRAPID_ARCH_FOUND)
240+
message(STATUS "[ LIBRAPID ] Architecture Flags: ${LIBRAPID_ARCH_FLAGS}")
241+
else()
242+
message(STATUS "[ LIBRAPID ] Architecture Flags Not Found")
243+
endif()

docs/source/cmakeIntegration.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ but may cause some functions to return slightly incorrect results due to lower p
172172
### ``LIBRAPID_NATIVE_ARCH``
173173

174174
```
175-
DEFAULT: OFF
175+
DEFAULT: ON
176176
```
177177

178178
Enabling this flag compiles librapid with the most advanced instruction set available on the system. This can lead to

examples/example-vector-1.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ namespace lrc = librapid;
55
auto main() -> int {
66
fmt::print("LibRapid Example -- Vector 1\n");
77

8+
#if 0 // Currently broken -- switching SIMD backend
89
// Create a 3 dimensional vector
910
lrc::Vec3d myVector(2, 3, 4);
1011
lrc::Vec3d myOtherVector(10, 5, 8);
@@ -52,6 +53,7 @@ auto main() -> int {
5253
fmt::print("One vector: {}\n", one);
5354
fmt::print("Full vector: {}\n", full);
5455
fmt::print("Random vector: {:.3f}\n", random);
56+
#endif
5557

5658
return 0;
5759
}

librapid/include/librapid/array/array.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
#include "storage.hpp"
77

88
#if defined(LIBRAPID_HAS_OPENCL)
9-
# include "../OpenCL/openclStorage.hpp"
9+
# include "../OpenCL/openclStorage.hpp"
1010
#endif // LIBRAPID_HAS_OPENCL
1111

1212
#if defined(LIBRAPID_HAS_CUDA)
13-
# include "../cuda/cudaStorage.hpp"
13+
# include "../cuda/cudaStorage.hpp"
1414
#endif // LIBRAPID_HAS_CUDA
1515

1616
#include "arrayTypeDef.hpp"

0 commit comments

Comments
 (0)