Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support C++20 and beyond #228

Merged
merged 30 commits into from
Aug 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
b460e43
Merge branch 'develop' into test
Pencilcaseman Aug 4, 2023
4ef621c
Transfer from Vc to xsimd
Pencilcaseman Aug 5, 2023
c588da5
Switch SIMD backend to xsimd
Pencilcaseman Aug 5, 2023
6cf0961
Update dual number library (UNTESTED)
Pencilcaseman Aug 5, 2023
cda9f72
Removed submodule librapid/vendor/xsimd
Pencilcaseman Aug 6, 2023
96d28f2
Continue transfer to xsimd
Pencilcaseman Aug 6, 2023
034c770
xsimd updates
Pencilcaseman Aug 6, 2023
36a598f
Set LIBRAPID_NATIVE_ARCH to ON by default
Pencilcaseman Aug 6, 2023
7c8211a
Bug fix in sinh
Pencilcaseman Aug 6, 2023
eb6a65e
Native Arch does not work on MacOS
Pencilcaseman Aug 6, 2023
addf642
Update CMakeLists.txt
Pencilcaseman Aug 6, 2023
9aaa467
MacOS memory alignment
Pencilcaseman Aug 6, 2023
e5435cc
Run an example that's erroring for more information
Pencilcaseman Aug 6, 2023
b453f6b
Attempt at MacOS segfault fix
Pencilcaseman Aug 7, 2023
78a9112
Update example for more debug info
Pencilcaseman Aug 7, 2023
5cbe302
Did I find the magical error?
Pencilcaseman Aug 7, 2023
8abb36b
Fix MacOS segfault
Pencilcaseman Aug 7, 2023
0a8bbe3
Slowly porting code to C++20 and beyond
Pencilcaseman Aug 10, 2023
6046618
Fix array formatting. Still need to do the rest
Pencilcaseman Aug 11, 2023
0cb3f69
Update clang-format and use spaces, not tabs
Pencilcaseman Aug 12, 2023
2f9f8c7
Continue updating to C++23
Pencilcaseman Aug 13, 2023
6729a39
Require C++23 in tests
Pencilcaseman Aug 13, 2023
9e72beb
C++23?
Pencilcaseman Aug 13, 2023
956a349
C++23 again?
Pencilcaseman Aug 13, 2023
03e045f
Why does this not set the C++ version?
Pencilcaseman Aug 13, 2023
3a020ab
Might have fixed it
Pencilcaseman Aug 13, 2023
9c4c1e8
Template name change
Pencilcaseman Aug 13, 2023
adc4751
Another template name change
Pencilcaseman Aug 13, 2023
8d0922f
oops
Pencilcaseman Aug 13, 2023
2358af8
Implement fmt formatting for half precision types
Pencilcaseman Aug 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
[submodule "librapid/vendor/jitify"]
path = librapid/vendor/jitify
url = https://github.com/Pencilcaseman/jitify.git
[submodule "librapid/vendor/Vc"]
path = librapid/vendor/Vc
url = https://github.com/Pencilcaseman/Vc.git
[submodule "librapid/vendor/fmt"]
path = librapid/vendor/fmt
url = https://github.com/fmtlib/fmt.git
Expand All @@ -19,3 +16,6 @@
[submodule "librapid/vendor/CLBlast"]
path = librapid/vendor/CLBlast
url = https://github.com/CNugteren/CLBlast.git
[submodule "librapid/vendor/xsimd"]
path = librapid/vendor/xsimd
url = https://github.com/LibRapid/xsimd.git
52 changes: 33 additions & 19 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,16 @@ cmake_minimum_required(VERSION 3.16)
project(librapid)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
cmake_policy(SET CMP0077 NEW)
set(CMAKE_CXX_STANDARD 17)

if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
message(STATUS "[ LIBRAPID ] LibRapid is a top-level project. Using C++23")
set(CMAKE_CXX_STANDARD 23)
endif ()

# LibRapid requires C++20 or later
if (CMAKE_CXX_STANDARD LESS 20)
message(FATAL_ERROR "LibRapid requires C++20 or later")
endif ()

# Extract version information
file(READ "version.txt" ver)
Expand Down Expand Up @@ -38,7 +47,7 @@ option(LIBRAPID_USE_OPENCL "Search for OpenCL and use it if possible" ON)
option(LIBRAPID_USE_CUDA "Attempt to use CUDA" ON)
option(LIBRAPID_USE_MULTIPREC "Include MPIR and MPFR in the LibRapid build" OFF)
option(LIBRAPID_FAST_MATH "Use potentially less accurate operations to increase performance" OFF)
option(LIBRAPID_NATIVE_ARCH "Use the native architecture of the system" OFF)
option(LIBRAPID_NATIVE_ARCH "Use the native architecture of the system" ON)

option(LIBRAPID_CUDA_DOUBLE_VECTOR_WIDTH "Preferred vector width for vectorised kernels" 2)
option(LIBRAPID_CUDA_FLOAT_VECTOR_WIDTH "Preferred vector width for vectorised kernels" 4)
Expand Down Expand Up @@ -124,6 +133,12 @@ if (LIBRAPID_STRICT AND LIBRAPID_QUIET)
message(FATAL_ERROR "LIBRAPID_STRICT and LIBRAPID_QUIET cannot be enabled at the same time")
endif ()

# SIMD instructions do not currently work on MacOS
#if (IS_MACOS AND LIBRAPID_NATIVE_ARCH)
# message(WARNING "SIMD instructions are not currently supported on MacOS. Disabling LIBRAPID_NATIVE_ARCH")
# set(LIBRAPID_NATIVE_ARCH OFF)
#endif ()

if (LIBRAPID_STRICT)
# Enable all warnings and treat them as errors
if (MSVC)
Expand Down Expand Up @@ -415,19 +430,14 @@ endif ()

# Add dependencies
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/fmt")
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/Vc")

if (NOT MINGW)
# scnlib does not support MinGW, since it does not implement std::from_chars, which is required by the library
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/scnlib")
else ()
message(WARNING "[ LIBRAPID ] scnlib cannot be built by MinGW, so it will not be enabled")
target_compile_definitions(${module_name} PUBLIC LIBRAPID_MINGW)
endif ()
# add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/Vc")
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/xsimd")
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/scnlib")

target_compile_definitions(fmt PUBLIC FMT_HEADER_ONLY)
target_compile_definitions(Vc PRIVATE Vc_HACK_OSTREAM_FOR_TTY)
target_link_libraries(${module_name} PUBLIC fmt scn Vc)
# target_compile_definitions(Vc PRIVATE Vc_HACK_OSTREAM_FOR_TTY)
# target_link_libraries(${module_name} PUBLIC fmt scn Vc xsimd)
target_link_libraries(${module_name} PUBLIC fmt scn xsimd)

if (${LIBRAPID_USE_MULTIPREC})
# Load MPIR
Expand Down Expand Up @@ -484,15 +494,19 @@ if (LIBRAPID_FAST_MATH)
target_compile_definitions(${module_name} PUBLIC LIBRAPID_FAST_MATH)
endif ()

set(LIBRAPID_ARCH_FLAGS)
if (LIBRAPID_NATIVE_ARCH)
message(STATUS "[ LIBRAPID ] Compiling for native architecture")
OptimizeForArchitecture()
target_compile_options(${module_name} PUBLIC ${Vc_DEFINITIONS} ${Vc_ARCHITECTURE_FLAGS})

include(ArchDetect2)
target_compile_options(${module_name} PUBLIC ${LIBRAPID_ARCH_FLAGS})
target_compile_definitions(${module_name} PUBLIC LIBRAPID_NATIVE_ARCH)
set(LIBRAPID_ARCH_FLAGS ${Vc_DEFINITIONS} ${Vc_ARCHITECTURE_FLAGS})
message(STATUS "[ LIBRAPID ] Additional Definitions: ${Vc_DEFINITIONS}")
message(STATUS "[ LIBRAPID ] Supported flags: ${Vc_ARCHITECTURE_FLAGS}")

# OptimizeForArchitecture()
# target_compile_options(${module_name} PUBLIC ${Vc_DEFINITIONS} ${Vc_ARCHITECTURE_FLAGS})
# target_compile_definitions(${module_name} PUBLIC LIBRAPID_NATIVE_ARCH)
# set(LIBRAPID_ARCH_FLAGS ${Vc_DEFINITIONS} ${Vc_ARCHITECTURE_FLAGS})
# message(STATUS "[ LIBRAPID ] Additional Definitions: ${Vc_DEFINITIONS}")
# message(STATUS "[ LIBRAPID ] Supported flags: ${Vc_ARCHITECTURE_FLAGS}")
endif ()

# Add defines for CUDA vector widths
Expand Down
243 changes: 243 additions & 0 deletions cmake/ArchDetect2.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
INCLUDE(CheckCXXSourceRuns)

set(COMPILER_GNU false)
set(COMPILER_INTEL false)
set(COMPILER_CLANG false)
set(COMPILER_MSVC false)

if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(COMPILER_GNU true)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(COMPILER_INTEL true)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(COMPILER_CLANG true)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(COMPILER_MSVC true)
else ()
# Unknown Compiler
endif ()

set(LIBRAPID_ARCH_FLAGS)
set(LIBRAPID_ARCH_FOUND)

# Function to test a given SIMD capability
function(check_simd_capability FLAG_GNU FLAG_MSVC NAME TEST_SOURCE VAR)
set(CMAKE_REQUIRED_FLAGS)
if (COMPILER_GNU OR COMPILER_INTEL OR COMPILER_CLANG)
set(CMAKE_REQUIRED_FLAGS "${FLAG_GNU}")
elseif (COMPILER_MSVC) # reserve for WINDOWS
set(CMAKE_REQUIRED_FLAGS "${FLAG_MSVC}")
endif ()

CHECK_CXX_SOURCE_RUNS("${TEST_SOURCE}" ${VAR})

if (${${VAR}})
if (COMPILER_GNU OR COMPILER_INTEL OR COMPILER_CLANG)
# set(LIBRAPID_ARCH_FLAGS "${LIBRAPID_ARCH_FLAGS} ${FLAG_GNU}" PARENT_SCOPE)

list(APPEND LIBRAPID_ARCH_FLAGS ${FLAG_GNU})
set(LIBRAPID_ARCH_FLAGS ${LIBRAPID_ARCH_FLAGS} PARENT_SCOPE)

message(STATUS "[ LIBRAPID ] ${NAME} found: ${FLAG_GNU}")
elseif (MSVC)
# set(LIBRAPID_ARCH_FLAGS "${LIBRAPID_ARCH_FLAGS} ${FLAG_MSVC}" PARENT_SCOPE)

list(APPEND LIBRAPID_ARCH_FLAGS ${FLAG_MSVC})
set(LIBRAPID_ARCH_FLAGS ${LIBRAPID_ARCH_FLAGS} PARENT_SCOPE)

message(STATUS "[ LIBRAPID ] ${NAME} found: ${FLAG_MSVC}")
endif ()
set(LIBRAPID_ARCH_FOUND TRUE PARENT_SCOPE)
else ()
message(STATUS "[ LIBRAPID ] ${NAME} not found")
endif ()
endfunction()

# Check SSE2 (not a valid flag for MSVC)
check_simd_capability("-msse2" "" "SSE2" "
#include <emmintrin.h>
int main() {
__m128i a = _mm_set_epi32 (-1, 2, -3, 4);
__m128i result = _mm_abs_epi32 (a);
return 0;
}" SIMD_SSE2)

# Check SSE3 (not a valid flag for MSVC)
check_simd_capability("-msse3" "" "SSE3" "
#include <pmmintrin.h>
int main() {
__m128 a = _mm_set_ps (-1.0f, 2.0f, -3.0f, 4.0f);
__m128 b = _mm_set_ps (1.0f, 2.0f, 3.0f, 4.0f);
__m128 result = _mm_addsub_ps (a, b);
return 0;
}" SIMD_SSE3)

# Check SSSE3 (not a valid flag for MSVC)
check_simd_capability("-mssse3" "" "SSSE3" "
#include <tmmintrin.h>
int main() {
__m128i a = _mm_set_epi8(-1, 2, -3, 4, -1, 2, -3, 4, -1, 2, -3, 4, -1, 2, -3, 4);
__m128i result = _mm_abs_epi8(a);
return 0;
}" SIMD_SSSE3)

# Check SSE4.1 (not a valid flag for MSVC)
check_simd_capability("-msse4.1" "" "SSE4.1" "
#include <smmintrin.h>
int main() {
__m128i a = _mm_set_epi32(-1, 2, -3, 4);
__m128i result = _mm_abs_epi32(a);
return 0;
}" SIMD_SSE4_1)

# Check SSE4.2 (not a valid flag for MSVC)
check_simd_capability("-msse4.2" "" "SSE4.2" "
#include <nmmintrin.h>
int main() {
__m128i a = _mm_set_epi32(-1, 2, -3, 4);
__m128i result = _mm_abs_epi32(a);
return 0;
}" SIMD_SSE4_2)

# Check AVX
check_simd_capability("-mavx" "/arch:AVX" "AVX" "
#include <immintrin.h>
int main() {
__m256 a = _mm256_set_ps(-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 result = _mm256_abs_ps(a);
return 0;
}" SIMD_AVX)

# Check AVX2
check_simd_capability("-mavx2" "/arch:AVX2" "AVX2" "
#include <immintrin.h>
int main() {
__m256i a = _mm256_set_epi32(-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32(a);
return 0;
}" SIMD_AVX2)

# Check AVX512F
check_simd_capability("-mavx512f" "/arch:AVX512" "AVX512F" "
#include <immintrin.h>
int main() {
__m512i a = _mm512_set_epi32(-1, 2, -3, 4, -1, 2, -3, 4, -1, 2, -3, 4, -1, 2, -3, 4);
__m512i result = _mm512_abs_epi32(a);
return 0;
}" SIMD_AVX512F)

# Check AVX512BW
check_simd_capability("-mavx512bw" "/arch:AVX512" "AVX512BW" "
#include <immintrin.h>
int main() {
__m512i a = _mm512_set_epi64(-1, 2, -3, 4, -1, 2, -3, 4);
__m512i result = _mm512_abs_epi8(a);
return 0;
}" SIMD_AVX512BW)

# Check AVX512CD
check_simd_capability("-mavx512cd" "/arch:AVX512" "AVX512CD" "
#include <immintrin.h>
int main() {
__m512i a = _mm512_set_epi64(-1, 2, -3, 4, -1, 2, -3, 4);
__m512i result = _mm512_conflict_epi64(a);
return 0;
}" SIMD_AVX512CD)

# Check AVX512DQ
check_simd_capability("-mavx512dq" "/arch:AVX512" "AVX512DQ" "
#include <immintrin.h>
int main() {
__m512d a = _mm512_set_pd(-1.0, 2.0, -3.0, 4.0, -1.0, 2.0, -3.0, 4.0);
__m512d result = _mm512_abs_pd(a);
return 0;
}" SIMD_AVX512DQ)

# Check AVX512ER
check_simd_capability("-mavx512er" "/arch:AVX512" "AVX512ER" "
#include <immintrin.h>
int main() {
__m512d a = _mm512_set_pd(-1.0, 2.0, -3.0, 4.0, -1.0, 2.0, -3.0, 4.0);
__m512d result = _mm512_exp_pd(a);
return 0;
}" SIMD_AVX512ER)

# Check AVX512PF
check_simd_capability("-mavx512pf" "/arch:AVX512" "AVX512PF" "
#include <immintrin.h>
int main() {
__m512 a = _mm512_set_ps(-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m512 result = _mm512_exp_ps(a);
return 0;
}" SIMD_AVX512PF)

# ARM
check_simd_capability("-march=armv7-a" "" "ARMv7" "
#include <arm_neon.h>
int main() {
int32x4_t a = vdupq_n_s32(1);
int32x4_t b = vdupq_n_s32(2);
int32x4_t result = vaddq_s32(a, b);
return 0;
}" SIMD_ARMv7)

check_simd_capability("-march=armv8-a" "" "ARMv8" "
#include <arm_neon.h>
int main() {
int32x4_t a = vdupq_n_s32(1);
int32x4_t b = vdupq_n_s32(2);
int32x4_t result = vaddq_s32(a, b);
return 0;
}" SIMD_ARMv8)

# ARM64
check_simd_capability("-march=armv8.1-a" "" "ARMv8.1" "
#include <arm_neon.h>
int main() {
int32x4_t a = vdupq_n_s32(1);
int32x4_t b = vdupq_n_s32(2);
int32x4_t result = vaddq_s32(a, b);
return 0;
}" SIMD_ARMv8_1)

check_simd_capability("-march=armv8.2-a" "" "ARMv8.2" "
#include <arm_neon.h>
int main() {
int32x4_t a = vdupq_n_s32(1);
int32x4_t b = vdupq_n_s32(2);
int32x4_t result = vaddq_s32(a, b);
return 0;
}" SIMD_ARMv8_2)

check_simd_capability("-march=armv8.3-a" "" "ARMv8.3" "
#include <arm_neon.h>
int main() {
int32x4_t a = vdupq_n_s32(1);
int32x4_t b = vdupq_n_s32(2);
int32x4_t result = vaddq_s32(a, b);
return 0;
}" SIMD_ARMv8_3)

check_simd_capability("-march=armv8.4-a" "" "ARMv8.4" "
#include <arm_neon.h>
int main() {
int32x4_t a = vdupq_n_s32(1);
int32x4_t b = vdupq_n_s32(2);
int32x4_t result = vaddq_s32(a, b);
return 0;
}" SIMD_ARMv8_4)

check_simd_capability("-march=armv8.5-a" "" "ARMv8.5" "
#include <arm_neon.h>
int main() {
int32x4_t a = vdupq_n_s32(1);
int32x4_t b = vdupq_n_s32(2);
int32x4_t result = vaddq_s32(a, b);
return 0;
}" SIMD_ARMv8_5)

if (LIBRAPID_ARCH_FOUND)
message(STATUS "[ LIBRAPID ] Architecture Flags: ${LIBRAPID_ARCH_FLAGS}")
else()
message(STATUS "[ LIBRAPID ] Architecture Flags Not Found")
endif()
2 changes: 1 addition & 1 deletion docs/source/cmakeIntegration.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ but may cause some functions to return slightly incorrect results due to lower p
### ``LIBRAPID_NATIVE_ARCH``

```
DEFAULT: OFF
DEFAULT: ON
```

Enabling this flag compiles librapid with the most advanced instruction set available on the system. This can lead to
Expand Down
2 changes: 2 additions & 0 deletions examples/example-vector-1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ namespace lrc = librapid;
auto main() -> int {
fmt::print("LibRapid Example -- Vector 1\n");

#if 0 // Currently broken -- switching SIMD backend
// Create a 3 dimensional vector
lrc::Vec3d myVector(2, 3, 4);
lrc::Vec3d myOtherVector(10, 5, 8);
Expand Down Expand Up @@ -52,6 +53,7 @@ auto main() -> int {
fmt::print("One vector: {}\n", one);
fmt::print("Full vector: {}\n", full);
fmt::print("Random vector: {:.3f}\n", random);
#endif

return 0;
}
4 changes: 2 additions & 2 deletions librapid/include/librapid/array/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
#include "storage.hpp"

#if defined(LIBRAPID_HAS_OPENCL)
# include "../OpenCL/openclStorage.hpp"
# include "../OpenCL/openclStorage.hpp"
#endif // LIBRAPID_HAS_OPENCL

#if defined(LIBRAPID_HAS_CUDA)
# include "../cuda/cudaStorage.hpp"
# include "../cuda/cudaStorage.hpp"
#endif // LIBRAPID_HAS_CUDA

#include "arrayTypeDef.hpp"
Expand Down
Loading