From a5401c22685a5edc536f7720e93c1b42900b76a6 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 6 Feb 2017 14:33:10 +0100 Subject: [PATCH 01/19] Check availability of __sync functions and provide unsafe alternatives --- CMakeExt/Code/test_builtin_sync.c | 11 + CMakeExt/Threading.cmake | 37 +- .../base/include/dash/dart/base/atomic.h | 374 ++++++++++++++++-- 3 files changed, 391 insertions(+), 31 deletions(-) create mode 100644 CMakeExt/Code/test_builtin_sync.c diff --git a/CMakeExt/Code/test_builtin_sync.c b/CMakeExt/Code/test_builtin_sync.c new file mode 100644 index 000000000..88683e494 --- /dev/null +++ b/CMakeExt/Code/test_builtin_sync.c @@ -0,0 +1,11 @@ + + +int main(int argc, char **argv) +{ + int val = 0; + int res = __sync_add_and_fetch(&val, 1); + + if (res != 1) return 1; + + return 0; +} diff --git a/CMakeExt/Threading.cmake b/CMakeExt/Threading.cmake index 09dfd4a91..354c8a5c6 100644 --- a/CMakeExt/Threading.cmake +++ b/CMakeExt/Threading.cmake @@ -1,4 +1,4 @@ -## Flags to enabel support for multi-threading +## Flags to enable support for multi-threading ## # At the moment, ENABLE_THREADING enables DART_THREADING_PTHREADS since @@ -7,16 +7,35 @@ if (ENABLE_THREADING) + MESSAGE(STATUS "Checking for builtin __sync_add_and_fetch") + TRY_COMPILE(DART_SYNC_BUILTINS ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/CMakeExt/Code/test_builtin_sync.c + OUTPUT_VARIABLE OUTPUT) + if (DART_SYNC_BUILTINS) + MESSAGE(STATUS "Found builtin __sync_add_and_fetch") + set(CMAKE_C_FLAGS + "${CMAKE_C_FLAGS_RELEASE} -DDART_HAVE_SYNC_BUILTINS") + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS_RELEASE} -DDART_HAVE_SYNC_BUILTINS") + else() + # error out for now + MESSAGE(STATUS "Compiling builtin __sync_add_and_fetch failed with error " + ${OUTPUT}) + message(FATAL_ERROR "Support for builtin __sync atomics required if " + "building with thread support enabled!") + endif() + # Find support for pthreads find_package(Threads REQUIRED) - set(CMAKE_C_FLAGS_RELEASE - "${CMAKE_C_FLAGS_RELEASE} -pthread -DDART_ENABLE_THREADING -DDART_THREADING_PTHREADS -DDASH_ENABLE_THREADING") - set(CMAKE_CXX_FLAGS_RELEASE - "${CMAKE_CXX_FLAGS_RELEASE} -pthread -DDART_ENABLE_THREADING -DDART_THREADING_PTHREADS -DDASH_ENABLE_THREADING") - set(CMAKE_C_FLAGS_DEBUG - "${CMAKE_C_FLAGS_DEBUG} -pthread -DDART_ENABLE_THREADING -DDART_THREADING_PTHREADS -DDASH_ENABLE_THREADING") - set(CMAKE_CXX_FLAGS_DEBUG - "${CMAKE_CXX_FLAGS_DEBUG} -pthread -DDART_ENABLE_THREADING -DDART_THREADING_PTHREADS -DDASH_ENABLE_THREADING") + set(CMAKE_C_FLAGS + "${CMAKE_C_FLAGS} -pthread -DDART_ENABLE_THREADSUPPORT") + set(CMAKE_C_FLAGS + "${CMAKE_C_FLAGS} -DDART_THREADING_PTHREADS -DDASH_ENABLE_THREADSUPPORT") + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -pthread -DDART_ENABLE_THREADSUPPORT") + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -DDART_HAVE_PTHREADS -DDASH_ENABLE_THREADSUPPORT") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread") + endif() diff --git a/dart-impl/base/include/dash/dart/base/atomic.h b/dart-impl/base/include/dash/dart/base/atomic.h index d93b1ae49..f305f1e83 100644 --- a/dart-impl/base/include/dash/dart/base/atomic.h +++ b/dart-impl/base/include/dash/dart/base/atomic.h @@ -16,27 +16,357 @@ #define DASH_DART_BASE_ATOMIC_H_ -#define DART_FETCH_AND_ADD64(ptr, val) __sync_fetch_and_add((int64_t *)(ptr), (val)) -#define DART_FETCH_AND_ADD32(ptr, val) __sync_fetch_and_add((int32_t *)(ptr), (val)) -#define DART_FETCH_AND_SUB64(ptr, val) __sync_fetch_and_sub((int64_t *)(ptr), (val)) -#define DART_FETCH_AND_SUB32(ptr, val) __sync_fetch_and_sub((int32_t *)(ptr), (val)) - -#define DART_FETCH_AND_INC64(ptr) __sync_fetch_and_add((int64_t *)(ptr), 1LL) -#define DART_FETCH_AND_INC32(ptr) __sync_fetch_and_add((int32_t *)(ptr), 1) -#define DART_FETCH_AND_INC16(ptr) __sync_fetch_and_add((int16_t *)(ptr), 1) -#define DART_FETCH_AND_INC8(ptr) __sync_fetch_and_add((int8_t *)(ptr), 1) -#define DART_FETCH_AND_INCPTR(ptr) __sync_fetch_and_add((void **)(ptr), sizeof(**ptr)) - -#define DART_FETCH_AND_DEC64(ptr) __sync_fetch_and_sub((int64_t *)(ptr), 1LL) -#define DART_FETCH_AND_DEC32(ptr) __sync_fetch_and_sub((int32_t *)(ptr), 1) -#define DART_FETCH_AND_DEC16(ptr) __sync_fetch_and_sub((int16_t *)(ptr), 1) -#define DART_FETCH_AND_DEC8(ptr) __sync_fetch_and_sub((int8_t *)(ptr), 1) -#define DART_FETCH_AND_DECPTR(ptr) __sync_fetch_and_sub((void **)(ptr), sizeof(**ptr)) - -#define DART_COMPARE_AND_SWAP64(ptr, oldval, newval) __sync_val_compare_and_swap((int64_t *)(ptr), (int64_t)(oldval), (int64_t)(newval)) -#define DART_COMPARE_AND_SWAP32(ptr, oldval, newval) __sync_val_compare_and_swap((int32_t *)(ptr), (int32_t)(oldval), (int32_t)(newval)) -#define DART_COMPARE_AND_SWAP16(ptr, oldval, newval) __sync_val_compare_and_swap((int16_t *)(ptr), (int16_t)(oldval), (int16_t)(newval)) -#define DART_COMPARE_AND_SWAP8(ptr, oldval, newval) __sync_val_compare_and_swap((int8_t *)(ptr), (int8_t )(oldval), (int8_t )(newval)) -#define DART_COMPARE_AND_SWAPPTR(ptr, oldval, newval) __sync_val_compare_and_swap((void **)(ptr), (void *)(oldval), (void *)(newval)) +#if DART_HAVE_SYNC_BUILTINS +/** + * Atomic operations using the __sync* builtin functions. + * + * All macros are specialized for 64, 32, 16, and 8 bit wide integers + * as well as for pointer types. + * + * See https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html#g_t_005f_005fsync-Builtins + * for details. + * + * Fall-back options are provided as unsafe options in case the built-in + * functions are not available. + */ + + +#define DART_FETCH64(ptr) \ + DART_FETCH_AND_ADD64(ptr, 0) +#define DART_FETCH32(ptr) \ + DART_FETCH_AND_ADD32(ptr, 0) +#define DART_FETCH16(ptr) \ + DART_FETCH_AND_ADD16(ptr, 0) +#define DART_FETCH8(ptr) \ + DART_FETCH_AND_ADD8(ptr, 0) +#define DART_FETCHPTR(ptr) \ + DART_FETCH_AND_ADDPTR(ptr, 0) + +#define DART_FETCH_AND_ADD64(ptr, val) \ + __sync_fetch_and_add((int64_t *)(ptr), (val)) +#define DART_FETCH_AND_ADD32(ptr, val) \ + __sync_fetch_and_add((int32_t *)(ptr), (val)) +#define DART_FETCH_AND_ADD16(ptr, val) \ + __sync_fetch_and_add((int16_t *)(ptr), (val)) +#define DART_FETCH_AND_ADD8(ptr, val) \ + __sync_fetch_and_add((int8_t *)(ptr), (val)) + +#define DART_FETCH_AND_SUB64(ptr, val) \ + __sync_fetch_and_sub((int64_t *)(ptr), (val)) +#define DART_FETCH_AND_SUB32(ptr, val) \ + __sync_fetch_and_sub((int32_t *)(ptr), (val)) +#define DART_FETCH_AND_SUB16(ptr, val) \ + __sync_fetch_and_sub((int16_t *)(ptr), (val)) +#define DART_FETCH_AND_SUB8(ptr, val) \ + __sync_fetch_and_sub((int8_t *)(ptr), (val)) + +#define DART_FETCH_AND_INC64(ptr) \ + __sync_fetch_and_add((int64_t *)(ptr), 1LL) +#define DART_FETCH_AND_INC32(ptr) \ + __sync_fetch_and_add((int32_t *)(ptr), 1) +#define DART_FETCH_AND_INC16(ptr) \ + __sync_fetch_and_add((int16_t *)(ptr), 1) +#define DART_FETCH_AND_INC8(ptr) \ + __sync_fetch_and_add((int8_t *)(ptr), 1) +#define DART_FETCH_AND_INCPTR(ptr) \ + __sync_fetch_and_add((void **)(ptr), sizeof(**(ptr))) + +#define DART_FETCH_AND_DEC64(ptr) \ + __sync_fetch_and_sub((int64_t *)(ptr), 1LL) +#define DART_FETCH_AND_DEC32(ptr) \ + __sync_fetch_and_sub((int32_t *)(ptr), 1) +#define DART_FETCH_AND_DEC16(ptr) \ + __sync_fetch_and_sub((int16_t *)(ptr), 1) +#define DART_FETCH_AND_DEC8(ptr) \ + __sync_fetch_and_sub((int8_t *)(ptr), 1) +#define DART_FETCH_AND_DECPTR(ptr) \ + __sync_fetch_and_sub((void **)(ptr), sizeof(**(ptr))) + + + +#define DART_ADD_AND_FETCH64(ptr, val) \ + __sync_add_and_fetch((int64_t *)(ptr), (val)) +#define DART_ADD_AND_FETCH32(ptr, val) \ + __sync_add_and_fetch((int32_t *)(ptr), (val)) +#define DART_ADD_AND_FETCH16(ptr, val) \ + __sync_add_and_fetch((int16_t *)(ptr), (val)) +#define DART_ADD_AND_FETCH8(ptr, val) \ + __sync_add_and_fetch((int8_t *)(ptr), (val)) +#define DART_ADD_AND_FETCHPTR(ptr, cnt) \ + __sync_fetch_and_sub((void **)(ptr), (cnt) * sizeof(**(ptr))) + +#define DART_SUB_AND_FETCH64(ptr, val) \ + __sync_sub_and_fetch((int64_t *)(ptr), (val)) +#define DART_SUB_AND_FETCH32(ptr, val) \ + __sync_sub_and_fetch((int32_t *)(ptr), (val)) +#define DART_SUB_AND_FETCH16(ptr, val) \ + __sync_sub_and_fetch((int16_t *)(ptr), (val)) +#define DART_SUB_AND_FETCH8(ptr, val) \ + __sync_sub_and_fetch((int8_t *)(ptr), (val)) +#define DART_SUB_AND_FETCHPTR(ptr, cnt) \ + __sync_sub_and_fetch((void **)(ptr), (cnt) * sizeof(**(ptr))) + +#define DART_INC_AND_FETCH64(ptr) \ + __sync_add_and_fetch((int64_t *)(ptr), 1LL) +#define DART_INC_AND_FETCH32(ptr) \ + __sync_add_and_fetch((int32_t *)(ptr), 1) +#define DART_INC_AND_FETCH16(ptr) \ + __sync_add_and_fetch((int16_t *)(ptr), 1) +#define DART_INC_AND_FETCH8(ptr) \ + __sync_add_and_fetch((int8_t *)(ptr), 1) +#define DART_INC_AND_FETCHPTR(ptr) \ + __sync_add_and_fetch((void **)(ptr), sizeof(**(ptr))) + +#define DART_DEC_AND_FETCH64(ptr) \ + __sync_sub_and_fetch((int64_t *)(ptr), 1LL) +#define DART_DEC_AND_FETCH32(ptr) \ + __sync_sub_and_fetch((int32_t *)(ptr), 1) +#define DART_DEC_AND_FETCH16(ptr) \ + __sync_sub_and_fetch((int16_t *)(ptr), 1) +#define DART_DEC_AND_FETCH8(ptr) \ + __sync_sub_and_fetch((int8_t *)(ptr), 1) +#define DART_DEC_AND_FETCHPTR(ptr) \ + __sync_sub_and_fetch((void **)(ptr), sizeof(**ptr)) + + +#define DART_COMPARE_AND_SWAP64(ptr, oldval, newval) \ + __sync_val_compare_and_swap((int64_t *)(ptr), \ + (int64_t )(oldval), \ + (int64_t )(newval)) +#define DART_COMPARE_AND_SWAP32(ptr, oldval, newval) \ + __sync_val_compare_and_swap((int32_t *)(ptr), \ + (int32_t )(oldval), \ + (int32_t )(newval)) +#define DART_COMPARE_AND_SWAP16(ptr, oldval, newval) \ + __sync_val_compare_and_swap((int16_t *)(ptr), \ + (int16_t )(oldval), \ + (int16_t )(newval)) +#define DART_COMPARE_AND_SWAP8(ptr, oldval, newval) \ + __sync_val_compare_and_swap((int8_t *)(ptr), \ + (int8_t )(oldval), \ + (int8_t )(newval)) +#define DART_COMPARE_AND_SWAPPTR(ptr, oldval, newval) \ + __sync_val_compare_and_swap((void **)(ptr), \ + (void *)(oldval), \ + (void *)(newval)) + +#else + +#define DART_MAYBE_UNUSED __attribute__ ((unused)) + +/** + * Fall-back version in case __sync* functions are not available. + * + * These surrogates are NOT THREADSAFE! + */ + +static inline int64_t +DART_MAYBE_UNUSED +__fetch_and_add64(int64_t *ptr, int64_t val) { + int64_t res = *ptr; + *ptr += val; + return res; +} + +static inline int32_t +DART_MAYBE_UNUSED +__fetch_and_add32(int32_t *ptr, int32_t val) { + int32_t res = *ptr; + *ptr += val; + return res; +} + +static inline int16_t +DART_MAYBE_UNUSED +__fetch_and_add16(int16_t *ptr, int16_t val) { + int16_t res = *ptr; + *ptr += val; + return res; +} + +static inline int8_t +DART_MAYBE_UNUSED +__fetch_and_add16(int8_t *ptr, int8_t val) { + int8_t val = *ptr; + *ptr += val; + return val; +} + +static inline void * +DART_MAYBE_UNUSED +__fetch_and_addptr(char **ptr, int64_t val) { + char * res = *ptr; + *ptr += val; + return res; +} + + + +#define DART_FETCH_AND_ADD64(ptr, val) \ + __fetch_and_add64((ptr), (val)) +#define DART_FETCH_AND_ADD32(ptr, val) \ + __fetch_and_add32((ptr), (val)) +#define DART_FETCH_AND_ADD16(ptr, val) \ + __fetch_and_add16((ptr), (val)) +#define DART_FETCH_AND_ADD8(ptr, val) \ + __fetch_and_add8((ptr), (val)) + +#define DART_FETCH_AND_SUB64(ptr, val) \ + __fetch_and_add64((ptr), (-1) * (val)) +#define DART_FETCH_AND_SUB32(ptr, val) \ + __fetch_and_add32((ptr), (-1) * (val)) +#define DART_FETCH_AND_SUB16(ptr, val) \ + __fetch_and_add16((ptr), (-1) * (val)) +#define DART_FETCH_AND_SUB8(ptr, val) \ + __fetch_and_add8((ptr), (-1) * (val)) + + +#define DART_FETCH_AND_INC64(ptr) \ + ((*(int64_t *)ptr)++) +#define DART_FETCH_AND_INC32(ptr) \ + ((*(int64_t *)ptr)++) +#define DART_FETCH_AND_INC16(ptr) \ + ((*(int64_t *)ptr)++) +#define DART_FETCH_AND_INC8(ptr) \ + ((*(int64_t *)ptr)++) +#define DART_FETCH_AND_INCPTR(ptr) \ + __fetch_and_addptr((void **)(ptr), sizeof(**(ptr))) + + +#define DART_FETCH_AND_DEC64(ptr) \ + ((*(int64_t *)ptr)--) +#define DART_FETCH_AND_DEC32(ptr) \ + ((*(int32_t *)ptr)--) +#define DART_FETCH_AND_DEC16(ptr) \ + ((*(int16_t *)ptr)--) +#define DART_FETCH_AND_DEC8(ptr) \ + ((*(int8_t *)ptr)--) +#define DART_FETCH_AND_DECPTR(ptr) \ + __fetch_and_addptr((char **)(ptr), (-1) * sizeof(**(ptr))) + + + +#define DART_ADD_AND_FETCH64(ptr, val) \ + (*(int64_t *)ptr += (val)) +#define DART_ADD_AND_FETCH32(ptr, val) \ + (*(int32_t *)ptr += (val)) +#define DART_ADD_AND_FETCH16(ptr, val) \ + (*(int16_t *)ptr += (val)) +#define DART_ADD_AND_FETCH8(ptr, val) \ + (*(int8_t *)ptr += (val)) +#define DART_ADD_AND_FETCHPTR(ptr, cnt) \ + (void*)(*(char **)ptr += (cnt) * sizeof(**(ptr))) + + +#define DART_SUB_AND_FETCH64(ptr, val) \ + (*(int64_t *)(ptr) -= (val)) +#define DART_SUB_AND_FETCH32(ptr, val) \ + (*(int32_t *)(ptr) -= (val)) +#define DART_SUB_AND_FETCH16(ptr, val) \ + (*(int16_t *)(ptr) -= (val)) +#define DART_SUB_AND_FETCH8(ptr, val) \ + (*(int8_t *)(ptr) -= (val)) +#define DART_SUB_AND_FETCHPTR(ptr, cnt) \ + (void*)(*(char **)(ptr) -= (cnt) * sizeof(**(ptr))) + + +#define DART_INC_AND_FETCH64(ptr) \ + (++(*(int64_t *)(ptr))) +#define DART_INC_AND_FETCH32(ptr) \ + (++(*(int32_t *)(ptr))) +#define DART_INC_AND_FETCH16(ptr) \ + (++(*(int16_t *)(ptr))) +#define DART_INC_AND_FETCH8(ptr) \ + (++(*(int8_t *)(ptr))) +#define DART_INC_AND_FETCHPTR(ptr) \ + (++(*(void **)(ptr))) + + +#define DART_DEC_AND_FETCH64(ptr) \ + (--(*(int64_t *)(ptr))) +#define DART_DEC_AND_FETCH32(ptr) \ + (--(*(int32_t *)(ptr))) +#define DART_DEC_AND_FETCH16(ptr) \ + (--(*(int16_t *)(ptr))) +#define DART_DEC_AND_FETCH8(ptr) \ + (--(*(int8_t *)(ptr))) +#define DART_DEC_AND_FETCHPTR(ptr) \ + (--(*(void **)(ptr))) + + +static inline int64_t +DART_MAYBE_UNUSED +__compare_and_swap64(int64_t *ptr, int64_t oldval, int64_t newval) { + int64_t res = *ptr; + if (*ptr == oldval) { + *ptr = newval; + } + return val; +} + +static inline int32_t +DART_MAYBE_UNUSED +__compare_and_swap32(int32_t *ptr, int32_t oldval, int32_t newval) { + int32_t res = *ptr; + if (*ptr == oldval) { + *ptr = newval; + } + return val; +} + +static inline int16_t +DART_MAYBE_UNUSED +__compare_and_swap16(int16_t *ptr, int16_t oldval, int16_t newval) { + int16_t res = *ptr; + if (*ptr == oldval) { + *ptr = newval; + } + return val; +} + +static inline int8_t +DART_MAYBE_UNUSED +__compare_and_swap8(int8_t *ptr, int8_t oldval, int8_t newval) { + int8_t res = *ptr; + if (*ptr == oldval) { + *ptr = newval; + } + return val; +} + +static inline void* +DART_MAYBE_UNUSED +__compare_and_swapptr(void **ptr, void *oldval, void *newval) { + void *res = *ptr; + if (*ptr == oldval) { + *ptr = newval; + } + return val; +} + + +#define DART_COMPARE_AND_SWAP64(ptr, oldval, newval) \ + __compare_and_swap64((int64_t *)(ptr), \ + (int64_t )(oldval), \ + (int64_t )(newval)) +#define DART_COMPARE_AND_SWAP32(ptr, oldval, newval) \ + __compare_and_swap32((int32_t *)(ptr), \ + (int32_t )(oldval), \ + (int32_t )(newval)) +#define DART_COMPARE_AND_SWAP16(ptr, oldval, newval) \ + __compare_and_swap16((int16_t *)(ptr), \ + (int16_t )(oldval), \ + (int16_t )(newval)) +#define DART_COMPARE_AND_SWAP8(ptr, oldval, newval) \ + __compare_and_swap8((int8_t *)(ptr), \ + (int8_t )(oldval), \ + (int8_t )(newval)) +#define DART_COMPARE_AND_SWAPPTR(ptr, oldval, newval) \ + __compare_and_swapptr((void **)(ptr), \ + (void *)(oldval), \ + (void *)(newval)) + + +#endif /* DART_HAVE_SYNC_BUILTINS */ #endif /* DASH_DART_BASE_ATOMIC_H_ */ From 92c180280ba0d96aa31d3fa53bd3365075450b08 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 6 Feb 2017 14:53:05 +0100 Subject: [PATCH 02/19] Use [DART|DASH]_ENABLE_THREADSUPPORT and DART_HAVE_PTHREADS guards --- CMakeExt/Threading.cmake | 6 ++--- build.cov.sh | 1 + build.dev.sh | 1 + build.mic.sh | 1 + build.minimal.sh | 1 + build.nasty.sh | 1 + build.sh | 2 +- dart-impl/base/include/dash/dart/base/mutex.h | 22 +++++++++---------- dart-impl/mpi/src/dart_initialization.c | 4 ++-- dash/src/Init.cc | 2 +- dash/test/main.cc | 4 ++-- 11 files changed, 25 insertions(+), 20 deletions(-) diff --git a/CMakeExt/Threading.cmake b/CMakeExt/Threading.cmake index 354c8a5c6..ac0f4a640 100644 --- a/CMakeExt/Threading.cmake +++ b/CMakeExt/Threading.cmake @@ -1,11 +1,11 @@ ## Flags to enable support for multi-threading ## -# At the moment, ENABLE_THREADING enables DART_THREADING_PTHREADS since +# At the moment, DART_ENABLE_THREADSUPPORT enables DART_HAVE_PTHREADS since # Pthreads are the only threading implementation currently supported. ## -if (ENABLE_THREADING) +if (ENABLE_THREADSUPPORT) MESSAGE(STATUS "Checking for builtin __sync_add_and_fetch") TRY_COMPILE(DART_SYNC_BUILTINS ${CMAKE_BINARY_DIR} @@ -30,7 +30,7 @@ if (ENABLE_THREADING) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -DDART_ENABLE_THREADSUPPORT") set(CMAKE_C_FLAGS - "${CMAKE_C_FLAGS} -DDART_THREADING_PTHREADS -DDASH_ENABLE_THREADSUPPORT") + "${CMAKE_C_FLAGS} -DDART_HAVE_PTHREADS -DDASH_ENABLE_THREADSUPPORT") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -DDART_ENABLE_THREADSUPPORT") set(CMAKE_CXX_FLAGS diff --git a/build.cov.sh b/build.cov.sh index 9bcf43f89..defa9718c 100755 --- a/build.cov.sh +++ b/build.cov.sh @@ -59,6 +59,7 @@ rm -Rf $BUILD_DIR/* -DDART_IF_VERSION=3.2 \ -DINSTALL_PREFIX=$HOME/opt/dash-0.3.0/ \ -DDART_IMPLEMENTATIONS=mpi \ + -DENABLE_THREADSUPPORT=ON \ -DENABLE_DEVELOPER_COMPILER_WARNINGS=OFF \ -DENABLE_LT_OPTIMIZATION=OFF \ -DENABLE_ASSERTIONS=ON \ diff --git a/build.dev.sh b/build.dev.sh index b769e6469..b9b9240bf 100755 --- a/build.dev.sh +++ b/build.dev.sh @@ -61,6 +61,7 @@ rm -Rf $BUILD_DIR/* -DDART_IF_VERSION=3.2 \ -DINSTALL_PREFIX=$HOME/opt/dash-0.3.0-dev/ \ -DDART_IMPLEMENTATIONS=mpi \ + -DENABLE_THREADSUPPORT=ON \ -DENABLE_ASSERTIONS=ON \ -DENABLE_LT_OPTIMIZATION=OFF \ -DENABLE_DEVELOPER_COMPILER_WARNINGS=ON \ diff --git a/build.mic.sh b/build.mic.sh index 747fd8134..420c0cf96 100755 --- a/build.mic.sh +++ b/build.mic.sh @@ -51,6 +51,7 @@ rm -Rf $BUILD_DIR/* -DDART_IF_VERSION=3.2 \ -DINSTALL_PREFIX=$HOME/opt/dash-0.3.0-mic/ \ -DDART_IMPLEMENTATIONS=mpi \ + -DENABLE_THREADSUPPORT=ON \ -DENABLE_DEVELOPER_COMPILER_WARNINGS=OFF \ -DENABLE_EXTENDED_COMPILER_WARNINGS=OFF \ -DENABLE_ASSERTIONS=OFF \ diff --git a/build.minimal.sh b/build.minimal.sh index 54fd3618a..b5ff6ab24 100755 --- a/build.minimal.sh +++ b/build.minimal.sh @@ -53,6 +53,7 @@ rm -Rf $BUILD_DIR/* -DDART_IF_VERSION=3.2 \ -DINSTALL_PREFIX=$HOME/opt/dash-0.3.0/ \ -DDART_IMPLEMENTATIONS=mpi \ + -DENABLE_THREADSUPPORT=OFF \ -DENABLE_DEVELOPER_COMPILER_WARNINGS=OFF \ -DENABLE_EXTENDED_COMPILER_WARNINGS=OFF \ -DENABLE_LT_OPTIMIZATION=OFF \ diff --git a/build.nasty.sh b/build.nasty.sh index b31cbf98c..16714b1d7 100755 --- a/build.nasty.sh +++ b/build.nasty.sh @@ -66,6 +66,7 @@ rm -Rf $BUILD_DIR/* -DDART_IF_VERSION=3.2 \ -DINSTALL_PREFIX=$HOME/opt/dash-0.3.0-nasty \ -DDART_IMPLEMENTATIONS=mpi \ + -DENABLE_THREADSUPPORT=ON \ -DENABLE_DEVELOPER_COMPILER_WARNINGS=OFF \ -DENABLE_EXTENDED_COMPILER_WARNINGS=OFF \ -DENABLE_LT_OPTIMIZATION=OFF \ diff --git a/build.sh b/build.sh index e3daaa3ab..3ea7787c2 100755 --- a/build.sh +++ b/build.sh @@ -59,7 +59,7 @@ rm -Rf $BUILD_DIR/* -DDART_IF_VERSION=3.2 \ -DINSTALL_PREFIX=$HOME/opt/dash-0.3.0/ \ -DDART_IMPLEMENTATIONS=mpi \ - -DENABLE_THREADING=ON \ + -DENABLE_THREADSUPPORT=ON \ -DENABLE_DEVELOPER_COMPILER_WARNINGS=OFF \ -DENABLE_EXTENDED_COMPILER_WARNINGS=OFF \ -DENABLE_LT_OPTIMIZATION=OFF \ diff --git a/dart-impl/base/include/dash/dart/base/mutex.h b/dart-impl/base/include/dash/dart/base/mutex.h index b41c7ed2a..7690dc670 100644 --- a/dart-impl/base/include/dash/dart/base/mutex.h +++ b/dart-impl/base/include/dash/dart/base/mutex.h @@ -3,20 +3,20 @@ #include -#if defined(DART_ENABLE_THREADING) && !defined(DART_THREADING_PTHREADS) -#error "Thread support has been enabled but DART_THREADING_PTHREADS is not defined!" +#if defined(DART_ENABLE_THREADSUPPORT) && !defined(DART_HAVE_PTHREADS) +#error "Thread support has been enabled but PTHREADS support is not available!" #endif -#if !defined(DART_ENABLE_THREADING) && defined(DART_THREADING_PTHREADS) -#undef DART_THREADING_PTHREADS +#if !defined(DART_ENABLE_THREADSUPPORT) && defined(DART_HAVE_PTHREADS) +#undef DART_HAVE_PTHREADS #endif -#ifdef DART_THREADING_PTHREADS +#ifdef DART_HAVE_PTHREADS #include #endif typedef struct dart_mutex { -#ifdef DART_THREADING_PTHREADS +#ifdef DART_HAVE_PTHREADS pthread_mutex_t mutex; #else // required since C99 does not allow empty structs @@ -29,7 +29,7 @@ static inline dart_ret_t dart_mutex_init(dart_mutex_t *mutex) { -#ifdef DART_THREADING_PTHREADS +#ifdef DART_HAVE_PTHREADS pthread_mutex_init(&mutex->mutex, NULL); return DART_OK; #else @@ -41,7 +41,7 @@ static inline dart_ret_t dart_mutex_lock(dart_mutex_t *mutex) { -#ifdef DART_THREADING_PTHREADS +#ifdef DART_HAVE_PTHREADS pthread_mutex_lock(&mutex->mutex); return DART_OK; #else @@ -53,7 +53,7 @@ static inline dart_ret_t dart_mutex_unlock(dart_mutex_t *mutex) { -#ifdef DART_THREADING_PTHREADS +#ifdef DART_HAVE_PTHREADS pthread_mutex_unlock(&mutex->mutex); return DART_OK; #else @@ -65,7 +65,7 @@ static inline dart_ret_t dart_mutex_trylock(dart_mutex_t *mutex) { -#ifdef DART_THREADING_PTHREADS +#ifdef DART_HAVE_PTHREADS pthread_mutex_trylock(&mutex->mutex); return DART_OK; #else @@ -78,7 +78,7 @@ static inline dart_ret_t dart_mutex_destroy(dart_mutex_t *mutex) { -#ifdef DART_THREADING_PTHREADS +#ifdef DART_HAVE_PTHREADS pthread_mutex_destroy(&mutex->mutex); return DART_OK; #else diff --git a/dart-impl/mpi/src/dart_initialization.c b/dart-impl/mpi/src/dart_initialization.c index ab6ee5d21..3787e66e8 100644 --- a/dart-impl/mpi/src/dart_initialization.c +++ b/dart-impl/mpi/src/dart_initialization.c @@ -213,14 +213,14 @@ dart_ret_t dart_init_thread( _init_by_dart = 1; DART_LOG_DEBUG("dart_init: MPI_Init"); int thread_required = MPI_THREAD_MULTIPLE; -#ifdef DART_ENABLE_THREADING +#ifdef DART_ENABLE_THREADSUPPORT MPI_Init_thread(argc, argv, thread_required, &thread_provided); DART_LOG_DEBUG("MPI_Init_thread provided = %i\n", thread_provided); #else MPI_Init(argc, argv); #endif } else { -#ifdef DART_ENABLE_THREADING +#ifdef DART_ENABLE_THREADSUPPORT MPI_Query_thread(&thread_provided); DART_LOG_DEBUG("MPI_Query_thread provided = %i\n", thread_provided); #endif diff --git a/dash/src/Init.cc b/dash/src/Init.cc index efa7922a5..4372577ca 100644 --- a/dash/src/Init.cc +++ b/dash/src/Init.cc @@ -34,7 +34,7 @@ void dash::init(int * argc, char ** *argv) DASH_LOG_DEBUG("dash::init", "dash::util::Config::init()"); dash::util::Config::init(); -#if DASH_ENABLE_THREADING +#if DASH_ENABLE_THREADSUPPORT DASH_LOG_DEBUG("dash::init", "dart_init_thread()"); dart_thread_level_t provided_mt; dart_init_thread(argc, argv, &provided_mt); diff --git a/dash/test/main.cc b/dash/test/main.cc index e6ad2d4a7..08a6697bf 100644 --- a/dash/test/main.cc +++ b/dash/test/main.cc @@ -29,13 +29,13 @@ int main(int argc, char * argv[]) // Init MPI #ifdef MPI_SUPPORT -#ifdef DASH_ENABLE_THREADING +#ifdef DASH_ENABLE_THREADSUPPORT int thread_required = MPI_THREAD_MULTIPLE; int thread_provided; // ignored here MPI_Init_thread(&argc, &argv, thread_required, &thread_provided); #else MPI_Init(&argc, &argv); -#endif // DASH_ENABLE_THREADING +#endif // DASH_ENABLE_THREADSUPPORT MPI_Comm_rank(MPI_COMM_WORLD, &team_myid); MPI_Comm_size(MPI_COMM_WORLD, &team_size); From b5f0f959f77f31632ab026da6ccba8bbffdec8a2 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 6 Feb 2017 15:50:59 +0100 Subject: [PATCH 03/19] dart_thread_level_t -> dart_thread_support_level_t --- dart-if/v3.2/include/dash/dart/if/dart_initialization.h | 2 +- dart-if/v3.2/include/dash/dart/if/dart_types.h | 2 +- dart-impl/mpi/src/dart_initialization.c | 2 +- dash/src/Init.cc | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dart-if/v3.2/include/dash/dart/if/dart_initialization.h b/dart-if/v3.2/include/dash/dart/if/dart_initialization.h index f574e5b39..9c50550b4 100644 --- a/dart-if/v3.2/include/dash/dart/if/dart_initialization.h +++ b/dart-if/v3.2/include/dash/dart/if/dart_initialization.h @@ -52,7 +52,7 @@ dart_ret_t dart_init(int *argc, char ***argv); dart_ret_t dart_init_thread( int* argc, char*** argv, - dart_thread_level_t * thread_safety); + dart_thread_support_level_t * thread_safety); /** * Finalize the DASH runtime. diff --git a/dart-if/v3.2/include/dash/dart/if/dart_types.h b/dart-if/v3.2/include/dash/dart/if/dart_types.h index 926e10943..6454c2c55 100644 --- a/dart-if/v3.2/include/dash/dart/if/dart_types.h +++ b/dart-if/v3.2/include/dash/dart/if/dart_types.h @@ -225,7 +225,7 @@ typedef enum * the underlying runtime. */ DART_THREAD_MULTIPLE = 10 -} dart_thread_level_t; +} dart_thread_support_level_t; /** * Scopes of locality domains. diff --git a/dart-impl/mpi/src/dart_initialization.c b/dart-impl/mpi/src/dart_initialization.c index 3787e66e8..7c63c312b 100644 --- a/dart-impl/mpi/src/dart_initialization.c +++ b/dart-impl/mpi/src/dart_initialization.c @@ -194,7 +194,7 @@ dart_ret_t dart_init( dart_ret_t dart_init_thread( int* argc, char*** argv, - dart_thread_level_t * provided) + dart_thread_support_level_t * provided) { if (_dart_initialized) { DART_LOG_ERROR("dart_init(): DART is already initialized"); diff --git a/dash/src/Init.cc b/dash/src/Init.cc index 4372577ca..9bd053a2b 100644 --- a/dash/src/Init.cc +++ b/dash/src/Init.cc @@ -36,7 +36,7 @@ void dash::init(int * argc, char ** *argv) #if DASH_ENABLE_THREADSUPPORT DASH_LOG_DEBUG("dash::init", "dart_init_thread()"); - dart_thread_level_t provided_mt; + dart_thread_support_level_t provided_mt; dart_init_thread(argc, argv, &provided_mt); dash::_multithreaded = (provided_mt == DART_THREAD_MULTIPLE); if (!dash::_multithreaded) { From e4c48420ad4d763b0738a5c55c78bc344054b6a1 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 6 Feb 2017 15:51:37 +0100 Subject: [PATCH 04/19] Add missing CMake changes --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 130479c88..086b842f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,7 +93,7 @@ option(ENABLE_HDF5 "Specify whether HDF5 features are enabled" on) option(ENABLE_NASTYMPI "Specify whether the NastyMPI proxy should be enabled" off) -option(ENABLE_THREADING +option(ENABLE_THREADSUPPORT "Specify whether support for multithreading should be compiled" off) if (BUILD_COVERAGE_TESTS) @@ -131,7 +131,7 @@ endif() if (ENABLE_NASTYMPI) include(${CMAKE_SOURCE_DIR}/CMakeExt/NastyMPI.cmake) endif() -if (ENABLE_THREADING) +if (ENABLE_THREADSUPPORT) include(${CMAKE_SOURCE_DIR}/CMakeExt/Threading.cmake) endif() @@ -289,8 +289,8 @@ message(INFO "HDF5 support: (ENABLE_HDF5) " ${ENABLE_HDF5}) message(INFO "Enabled DART backends: (DART_IMPLEMENTATIONS) " ${DART_IMPLEMENTATIONS}) -message(INFO "Enable multithreading: (ENABLE_MULTITHREADING) " - ${ENABLE_MULTITHREADING}) +message(INFO "Enable multithreading: (ENABLE_THREADSUPPORT) " + ${ENABLE_THREADSUPPORT}) message(INFO "C compiler id: ${CMAKE_C_COMPILER_ID}") message(INFO "C++ compiler id: ${CMAKE_CXX_COMPILER_ID}") if (MPI_FOUND) From f25d6f9cfbc983c876bc03e77694ac793d37d9da Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 6 Feb 2017 16:28:25 +0100 Subject: [PATCH 05/19] Fix fall-back surrogates used if thread-support is disabled --- dart-impl/base/include/dash/dart/base/atomic.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dart-impl/base/include/dash/dart/base/atomic.h b/dart-impl/base/include/dash/dart/base/atomic.h index f305f1e83..638253aa3 100644 --- a/dart-impl/base/include/dash/dart/base/atomic.h +++ b/dart-impl/base/include/dash/dart/base/atomic.h @@ -153,7 +153,7 @@ #else -#define DART_MAYBE_UNUSED __attribute__ ((unused)) +#define DART_MAYBE_UNUSED __attribute__((unused)) /** * Fall-back version in case __sync* functions are not available. @@ -187,10 +187,10 @@ __fetch_and_add16(int16_t *ptr, int16_t val) { static inline int8_t DART_MAYBE_UNUSED -__fetch_and_add16(int8_t *ptr, int8_t val) { - int8_t val = *ptr; +__fetch_and_add8(int8_t *ptr, int8_t val) { + int8_t res = *ptr; *ptr += val; - return val; + return res; } static inline void * @@ -302,7 +302,7 @@ __compare_and_swap64(int64_t *ptr, int64_t oldval, int64_t newval) { if (*ptr == oldval) { *ptr = newval; } - return val; + return res; } static inline int32_t @@ -312,7 +312,7 @@ __compare_and_swap32(int32_t *ptr, int32_t oldval, int32_t newval) { if (*ptr == oldval) { *ptr = newval; } - return val; + return res; } static inline int16_t @@ -322,7 +322,7 @@ __compare_and_swap16(int16_t *ptr, int16_t oldval, int16_t newval) { if (*ptr == oldval) { *ptr = newval; } - return val; + return res; } static inline int8_t @@ -332,7 +332,7 @@ __compare_and_swap8(int8_t *ptr, int8_t oldval, int8_t newval) { if (*ptr == oldval) { *ptr = newval; } - return val; + return res; } static inline void* @@ -342,7 +342,7 @@ __compare_and_swapptr(void **ptr, void *oldval, void *newval) { if (*ptr == oldval) { *ptr = newval; } - return val; + return res; } From d20ebf46487c436231cb6661b374376c24bac8c4 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 6 Feb 2017 16:29:32 +0100 Subject: [PATCH 06/19] Remove dash::check_summa_pattern_constraints --- dash/include/dash/algorithm/SUMMA.h | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/dash/include/dash/algorithm/SUMMA.h b/dash/include/dash/algorithm/SUMMA.h index 21d34a090..085fb8989 100644 --- a/dash/include/dash/algorithm/SUMMA.h +++ b/dash/include/dash/algorithm/SUMMA.h @@ -129,19 +129,6 @@ using summa_pattern_constraints = dash::summa_pattern_layout_constraints, typename MatrixType::pattern_type>; -template< - typename MatrixTypeA, - typename MatrixTypeB, - typename MatrixTypeC -> -using check_summa_pattern_constraints = - typename std::enable_if< - summa_pattern_constraints::satisfied::value && - summa_pattern_constraints::satisfied::value && - summa_pattern_constraints::satisfied::value, - void>; - - /** * Multiplies two matrices using the SUMMA algorithm. * Performs \c (2 * (nunits-1) * nunits^2) async copy operations of @@ -196,8 +183,7 @@ void summa( >::satisfied::value; static_assert( - std::is_same::value || - std::is_same::value, + std::is_floating_point::value, "dash::summa expects matrix element type double or float"); DASH_LOG_DEBUG("dash::summa()"); @@ -634,10 +620,11 @@ template< typename MatrixTypeC > typename -dash::check_summa_pattern_constraints< - MatrixTypeA, - MatrixTypeB, - MatrixTypeC >::type +std::enable_if< + summa_pattern_constraints::satisfied::value && + summa_pattern_constraints::satisfied::value && + summa_pattern_constraints::satisfied::value, + void> mmult( /// Matrix to multiply, extents n x m MatrixTypeA & A, From c29ff99868177b779d7fe9d4798e3e2742e8394b Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 6 Feb 2017 16:47:19 +0100 Subject: [PATCH 07/19] Fix path to dash-ci.sh script in CI documentation --- doc/CI.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/CI.md b/doc/CI.md index 96c9d4731..9e1c22c88 100644 --- a/doc/CI.md +++ b/doc/CI.md @@ -174,7 +174,7 @@ The best way to debug is to spin up a interactive container using the correspond docker run -it -v $( pwd ):/opt/dash dashproject/ci:openmpi2 ``` -Inside the container, `cd` to `/opt/dash` and execute `/bin/bash /opt/dash/scripts/dash-ci.sh` to run the CI. If you are only interessted in a single target, pass it to the CI as described above: `dash-ci.sh $TARGET`. +Inside the container, `cd` to `/opt/dash` and execute `/bin/bash /opt/dash/dash/dash/scripts/dash-ci.sh` to run the CI. If you are only interessted in a single target, pass it to the CI as described above: `dash-ci.sh $TARGET`. To leave the container again, just type `exit`. From 188fb6b2ed99a9b0bc2d6c6c66a8573234014c7a Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Fri, 10 Feb 2017 11:53:09 +0100 Subject: [PATCH 08/19] Disable GCC warning for EXPECT_EQ_U --- dash/test/TestBase.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dash/test/TestBase.h b/dash/test/TestBase.h index fdb6ee661..d1965b0db 100644 --- a/dash/test/TestBase.h +++ b/dash/test/TestBase.h @@ -26,10 +26,6 @@ namespace internal { #define ASSERT_GT_U(e,a) EXPECT_GT(e,a) << "Unit " << dash::myid().id #define ASSERT_LE_U(e,a) EXPECT_LE(e,a) << "Unit " << dash::myid().id #define ASSERT_GE_U(e,a) EXPECT_GE(e,a) << "Unit " << dash::myid().id -#define ASSERT_DOUBLE_EQ_U(e,a) \ - EXPECT_DOUBLE_EQ(e,a) << "Unit " << dash::myid().id -#define ASSERT_FLOAT_EQ_U(e,a) \ - EXPECT_FLOAT_EQ(e,a) << "Unit " << dash::myid().id #define EXPECT_TRUE_U(b) EXPECT_TRUE(b) << "Unit " << dash::myid().id #define EXPECT_FALSE_U(b) EXPECT_FALSE(b) << "Unit " << dash::myid().id @@ -49,6 +45,9 @@ namespace internal { * GTest seems to have a workaround for that case, which we might * adopt. */ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wconversion-null" +#endif // defined(__GNUC__) template typename std::enable_if::value, ::testing::AssertionResult>::type assert_float_eq( From 94c47f5dd0b05b42d937a532fe3d3e68aace0879 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Fri, 10 Feb 2017 18:47:40 +0100 Subject: [PATCH 09/19] Fix debug output for thread-support status --- dart-impl/mpi/src/dart_initialization.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dart-impl/mpi/src/dart_initialization.c b/dart-impl/mpi/src/dart_initialization.c index 7c63c312b..5b2ad80f4 100644 --- a/dart-impl/mpi/src/dart_initialization.c +++ b/dart-impl/mpi/src/dart_initialization.c @@ -227,7 +227,7 @@ dart_ret_t dart_init_thread( } *provided = (thread_provided == MPI_THREAD_MULTIPLE) ? DART_THREAD_MULTIPLE : DART_THREAD_SINGLE; DART_LOG_DEBUG("dart_init_thread >> thread support enabled: %s\n", - (provided == DART_THREAD_MULTIPLE) ? "yes" : "no"); + (*provided == DART_THREAD_MULTIPLE) ? "yes" : "no"); return do_init(); } From 378f0140249b233b44e786834493d29e576fc894 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Fri, 10 Feb 2017 18:48:09 +0100 Subject: [PATCH 10/19] Fix compiler warning about initialization ordering --- dash/include/dash/map/UnorderedMap.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/dash/include/dash/map/UnorderedMap.h b/dash/include/dash/map/UnorderedMap.h index 2f941e380..e8da1ed2a 100644 --- a/dash/include/dash/map/UnorderedMap.h +++ b/dash/include/dash/map/UnorderedMap.h @@ -211,11 +211,10 @@ class UnorderedMap UnorderedMap( size_type nelem = 0, Team & team = dash::Team::All()) - : local(this), - _team(&team), + : _team(&team), _myid(team.myid()), - _remote_size(0), - _key_hash(team) + _key_hash(team), + local(this) { DASH_LOG_TRACE_VAR("UnorderedMap(nelem,team)", nelem); if (_team->size() > 0) { @@ -228,12 +227,11 @@ class UnorderedMap size_type nelem, size_type nlbuf, Team & team = dash::Team::All()) - : local(this), - _team(&team), + : _team(&team), _myid(team.myid()), - _remote_size(0), _key_hash(team), - _local_buffer_size(nlbuf) + _local_buffer_size(nlbuf), + local(this) { DASH_LOG_TRACE("UnorderedMap(nelem,nlbuf,team)", "nelem:", nelem, "nlbuf:", nlbuf); From 77a3280ee7328a0d7d63bf7b2854fc3c2a01f189 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Fri, 10 Feb 2017 18:48:57 +0100 Subject: [PATCH 11/19] Add trace output for team create --- dart-impl/mpi/src/dart_team_group.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dart-impl/mpi/src/dart_team_group.c b/dart-impl/mpi/src/dart_team_group.c index 603b1502a..cf9059695 100644 --- a/dart-impl/mpi/src/dart_team_group.c +++ b/dart-impl/mpi/src/dart_team_group.c @@ -642,6 +642,8 @@ dart_ret_t dart_team_create( MPI_Win_lock_all(0, win); DART_LOG_DEBUG("TEAMCREATE - create team %d from parent team %d", *newteam, teamid); + DART_LOG_TRACE("TEAMCREATE - team:%d comm:%p win:%p subcomm:%p", + *newteam, team_data->comm, team_data->window, subcomm); } return DART_OK; From 293e510fcc4c51219f65ceb98997940ea4c89100 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Fri, 10 Feb 2017 18:49:41 +0100 Subject: [PATCH 12/19] Extend thread-safety tests to include parallel mem register --- dash/test/ThreadsafetyTest.cc | 178 ++++++++++++++++++++++++++++------ 1 file changed, 151 insertions(+), 27 deletions(-) diff --git a/dash/test/ThreadsafetyTest.cc b/dash/test/ThreadsafetyTest.cc index 669cbc3bb..dfd148a41 100644 --- a/dash/test/ThreadsafetyTest.cc +++ b/dash/test/ThreadsafetyTest.cc @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -23,6 +24,11 @@ TEST_F(ThreadsafetyTest, ThreadInit) { TEST_F(ThreadsafetyTest, ConcurrentPut) { + using elem_t = int; + using array_t = dash::Array; + + constexpr size_t elem_per_thread = 100; + if (!dash::is_multithreaded()) { SKIP_TEST_MSG("requires support for multi-threading"); } @@ -39,13 +45,32 @@ TEST_F(ThreadsafetyTest, ConcurrentPut) { } } - dash::Array array(dash::size() * num_threads); - dash::default_index_t idx = ((dash::myid() + 1) % dash::size()) * num_threads; + size_t size = dash::size() * num_threads * elem_per_thread; + array_t src(size); + array_t dst(size); + #pragma omp parallel { int thread_id = omp_get_thread_num(); - for (int i = 0; i < 100; ++i) { - array[idx + thread_id] = thread_id; + array_t::index_type base_idx = thread_id * elem_per_thread; + for (size_t i = 0; i < elem_per_thread; ++i) { + src.local[base_idx + i] = thread_id; + } + } + + src.barrier(); + +#pragma omp parallel + { + int thread_id = omp_get_thread_num(); + array_t::index_type src_idx = dash::myid() + * (elem_per_thread * num_threads) + + (elem_per_thread * thread_id); + array_t::index_type dst_idx = ((dash::myid() + 1) % dash::size()) + * (elem_per_thread * num_threads) + + (elem_per_thread * thread_id); + for (size_t i = 0; i < elem_per_thread; ++i) { + dst[dst_idx + i] = src[src_idx + i]; } } @@ -54,9 +79,11 @@ TEST_F(ThreadsafetyTest, ConcurrentPut) { // for (int i = 0; i < num_threads; ++i) { // std::cout << "[" << dash::myid() << "] array[" << i << "] = " << (int)array[i] << std::endl; // } - + size_t pos = 0; for (int i = 0; i < num_threads; ++i) { - ASSERT_EQ_U(array.local[i], i); + for (size_t j = 0; j < elem_per_thread; ++j) { + ASSERT_EQ_U(dst.local[pos++], i); + } } #endif @@ -72,6 +99,10 @@ TEST_F(ThreadsafetyTest, ConcurrentAlloc) { SKIP_TEST_MSG("requires at least 4 units"); } + using elem_t = int; + using array_t = dash::Array; + constexpr size_t elem_per_thread = 100; + #if !defined(_OPENMP) SKIP_TEST_MSG("requires support for OpenMP"); #else @@ -86,31 +117,58 @@ TEST_F(ThreadsafetyTest, ConcurrentAlloc) { dash::Team& team_split = team_all.split(2); ASSERT_GT_U(team_all.size(), 0); ASSERT_GT_U(team_split.size(), 0); - dash::Array arr_all; - dash::Array arr_split; + array_t arr_all; + array_t arr_split; #pragma omp parallel num_threads(2) { int thread_id = omp_get_thread_num(); + dash::Team *team = (thread_id == 0) ? &team_all : &team_split; + array_t *arr = (thread_id == 0) ? &arr_all : &arr_split; for (int i = 0; i < 100; ++i) { // thread 0: contribute to allocation on team_all - if (thread_id == 0) { - if (i) { - arr_all.deallocate(); - ASSERT_EQ_U(arr_all.size(), 0); - } - arr_all.allocate(team_all.size(), dash::DistributionSpec<1>(), team_all); - ASSERT_EQ_U(arr_all.size(), team_all.size()); - arr_all[(team_all.myid() + 1) % team_all.size()] = 0; - } else if(thread_id == 1) { - if (i) { - arr_split.deallocate(); - ASSERT_EQ_U(arr_split.size(), 0); - } - arr_split.allocate(team_split.size(), dash::DistributionSpec<1>(), team_split); - ASSERT_EQ_U(arr_split.size(), team_split.size()); - arr_split[(team_split.myid() + 1) % team_split.size()] = 1; + if (i) { + arr->deallocate(); } + ASSERT_EQ_U(arr->size(), 0); + arr->allocate(elem_per_thread * team->size(), + dash::DistributionSpec<1>(), *team); + ASSERT_EQ_U(arr->size(), elem_per_thread * team->size()); +#pragma omp barrier + ASSERT_NE_U( + arr_all[0].dart_gptr().segid, + arr_split[0].dart_gptr().segid); +#pragma omp barrier + array_t::index_type base = + ((team->myid() + 1) % team->size()) * elem_per_thread; + for (size_t j = 0; j < elem_per_thread; ++j) { + (*arr)[base + j] = thread_id; + } +// if (thread_id == 0) { +// if (i) { +// arr_all.deallocate(); +// ASSERT_EQ_U(arr_all.size(), 0); +// } +// arr_all.allocate(elem_per_thread * team_all.size(), +// dash::DistributionSpec<1>(), team_all); +// ASSERT_EQ_U(arr_all.size(), elem_per_thread * team_all.size()); +// array_t::index_type base = (team_all.myid() + 1) % team_all.size(); +// for (size_t j = 0; j < elem_per_thread; ++j) { +// arr_all[base + j] = 0; +// } +// } else if(thread_id == 1) { +// if (i) { +// arr_split.deallocate(); +// ASSERT_EQ_U(arr_split.size(), 0); +// } +// arr_split.allocate(elem_per_thread * team_split.size(), +// dash::DistributionSpec<1>(), team_split); +// ASSERT_EQ_U(arr_split.size(), elem_per_thread * team_split.size()); +// array_t::index_type base = (team_split.myid() + 1) % team_split.size(); +// for (size_t j = 0; j < elem_per_thread; ++j) { +// arr_split[base + j] = 1; +// } +// } } #pragma omp barrier @@ -118,14 +176,80 @@ TEST_F(ThreadsafetyTest, ConcurrentAlloc) { { arr_all.barrier(); arr_split.barrier(); - ASSERT_EQ_U(arr_all.local[0], 0); - ASSERT_EQ_U(arr_split.local[0], 1); + for (size_t i = 0; i < elem_per_thread; ++i) { + ASSERT_EQ_U(arr_all.local[i], 0); + ASSERT_EQ_U(arr_split.local[i], 1); + } } #pragma omp barrier } -#endif +#endif //!defined(_OPENMP) +} +TEST_F(ThreadsafetyTest, ConcurrentAttach) { + using elem_t = int; + using allocator_t = dash::allocator::DynamicAllocator; + if (!dash::is_multithreaded()) { + SKIP_TEST_MSG("requires support for multi-threading"); + } + + if (dash::size() < 4) { + SKIP_TEST_MSG("requires at least 4 units"); + } + + constexpr size_t elem_per_thread = 100; + +#if !defined(_OPENMP) + SKIP_TEST_MSG("requires support for OpenMP"); +#else + int num_threads; +#pragma omp parallel +#pragma omp master + { + num_threads = omp_get_num_threads(); + } + + dash::Team& team_all = dash::Team::All(); + dash::Team& team_split = team_all.split(2); + ASSERT_GT_U(team_all.size(), 0); + ASSERT_GT_U(team_split.size(), 0); + +#pragma omp parallel num_threads(2) + { + int thread_id = omp_get_thread_num(); + dash::Team *team = (thread_id == 0) ? &team_all : &team_split; + for (int i = 0; i < 1; ++i) { + allocator_t allocator(*team); + elem_t *vals = allocator.allocate_local(elem_per_thread); + for (size_t j = 0; j < elem_per_thread; ++j) { + vals[j] = thread_id; + } + std::cout << "vals: " << vals << std::endl; + dart_gptr_t gptr = allocator.attach(vals, elem_per_thread); + ASSERT_NE_U(DART_GPTR_NULL, gptr); + ASSERT_LT_U(gptr.segid, 0); // attached memory has segment ID < 0 + elem_t check[elem_per_thread]; + dart_gptr_t gptr_r = gptr; + gptr_r.unitid = team->global_id( + dash::team_unit_t((team->myid() + 1) % team->size())); + dart_storage_t ds = dash::dart_storage(elem_per_thread); + ASSERT_EQ_U( + dart_get_blocking(check, gptr_r, ds.nelem, ds.dtype), + DART_OK); + + team->barrier(); + + for (size_t j = 0; j < elem_per_thread; ++j) { + ASSERT_EQ_U(check[j], thread_id); + } + team->barrier(); + + allocator.deallocate(gptr); + } +#pragma omp barrier + } +#endif //!defined(_OPENMP) } From 8a265c03263f0838dbf6e6cd1ccdb56e30759f33 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 13 Feb 2017 10:06:23 +0100 Subject: [PATCH 13/19] BUG: Fix an issue with the conversion from global to local unit IDs in shared-memory get. --- dart-impl/mpi/src/dart_communication.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dart-impl/mpi/src/dart_communication.c b/dart-impl/mpi/src/dart_communication.c index 7f8087e34..397feda82 100644 --- a/dart-impl/mpi/src/dart_communication.c +++ b/dart-impl/mpi/src/dart_communication.c @@ -127,6 +127,8 @@ dart_ret_t dart_get( #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) DART_LOG_DEBUG("dart_get: shared windows enabled"); if (seg_id >= 0 && team_data->sharedmem_tab[gptr.unitid].id >= 0) { + // store the team-local ID in the gptr + gptr.unitid = target_unitid_rel.id; return get_shared_mem(team_data, dest, gptr, nelem, dtype); } #else @@ -464,6 +466,8 @@ dart_ret_t dart_get_handle( DART_LOG_DEBUG("dart_get_handle: shared windows enabled"); if (seg_id >= 0 && team_data->sharedmem_tab[gptr.unitid].id >= 0) { + // store the team-local ID in the gptr + gptr.unitid = target_unitid_rel.id; dart_ret_t ret = get_shared_mem(team_data, dest, gptr, nelem, dtype); /* @@ -840,7 +844,11 @@ dart_ret_t dart_get_blocking( #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) DART_LOG_DEBUG("dart_get_blocking: shared windows enabled"); + // store the team-local ID in the gptr + gptr.unitid = target_unitid_rel.id; if (seg_id >= 0 && team_data->sharedmem_tab[gptr.unitid].id >= 0) { + // store the team-local ID in the gptr + gptr.unitid = target_unitid_rel.id; return get_shared_mem(team_data, dest, gptr, nelem, dtype); } #else From 88ae3c0f6776d626d512c162ad5f74854b5940a4 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 13 Feb 2017 10:08:12 +0100 Subject: [PATCH 14/19] Add dash::memfree and check return value of dart_memalloc in dash::memalloc --- dash/include/dash/GlobMem.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dash/include/dash/GlobMem.h b/dash/include/dash/GlobMem.h index a41e21c21..477d053bf 100644 --- a/dash/include/dash/GlobMem.h +++ b/dash/include/dash/GlobMem.h @@ -492,10 +492,18 @@ GlobPtr memalloc(size_t nelem) { dart_gptr_t gptr; dart_storage_t ds = dart_storage(nelem); - dart_memalloc(ds.nelem, ds.dtype, &gptr); + if (dart_memalloc(ds.nelem, ds.dtype, &gptr) != DART_OK) { + return GlobPtr(nullptr); + } return GlobPtr(gptr); } +template +void memfree(GlobPtr ptr) +{ + dart_memfree(ptr.dart_gptr()); +} + } // namespace dash #endif // DASH__GLOBMEM_H_ From 5d58ffa89674df6c58a057aacc7eda7261c602f9 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 13 Feb 2017 10:10:13 +0100 Subject: [PATCH 15/19] Add test for concurrent calls to dart_memalloc --- dash/test/ThreadsafetyTest.cc | 114 +++++++++++++++++++++------------- 1 file changed, 70 insertions(+), 44 deletions(-) diff --git a/dash/test/ThreadsafetyTest.cc b/dash/test/ThreadsafetyTest.cc index dfd148a41..2f1734f20 100644 --- a/dash/test/ThreadsafetyTest.cc +++ b/dash/test/ThreadsafetyTest.cc @@ -15,6 +15,9 @@ #include #endif +static constexpr int thread_iterations = 100; +static constexpr size_t elem_per_thread = 100; + TEST_F(ThreadsafetyTest, ThreadInit) { int mpi_thread; MPI_Query_thread(&mpi_thread); @@ -27,8 +30,6 @@ TEST_F(ThreadsafetyTest, ConcurrentPut) { using elem_t = int; using array_t = dash::Array; - constexpr size_t elem_per_thread = 100; - if (!dash::is_multithreaded()) { SKIP_TEST_MSG("requires support for multi-threading"); } @@ -101,17 +102,10 @@ TEST_F(ThreadsafetyTest, ConcurrentAlloc) { using elem_t = int; using array_t = dash::Array; - constexpr size_t elem_per_thread = 100; #if !defined(_OPENMP) SKIP_TEST_MSG("requires support for OpenMP"); #else - int num_threads; -#pragma omp parallel -#pragma omp master - { - num_threads = omp_get_num_threads(); - } dash::Team& team_all = dash::Team::All(); dash::Team& team_split = team_all.split(2); @@ -125,7 +119,7 @@ TEST_F(ThreadsafetyTest, ConcurrentAlloc) { int thread_id = omp_get_thread_num(); dash::Team *team = (thread_id == 0) ? &team_all : &team_split; array_t *arr = (thread_id == 0) ? &arr_all : &arr_split; - for (int i = 0; i < 100; ++i) { + for (int i = 0; i < thread_iterations; ++i) { // thread 0: contribute to allocation on team_all if (i) { arr->deallocate(); @@ -144,31 +138,6 @@ TEST_F(ThreadsafetyTest, ConcurrentAlloc) { for (size_t j = 0; j < elem_per_thread; ++j) { (*arr)[base + j] = thread_id; } -// if (thread_id == 0) { -// if (i) { -// arr_all.deallocate(); -// ASSERT_EQ_U(arr_all.size(), 0); -// } -// arr_all.allocate(elem_per_thread * team_all.size(), -// dash::DistributionSpec<1>(), team_all); -// ASSERT_EQ_U(arr_all.size(), elem_per_thread * team_all.size()); -// array_t::index_type base = (team_all.myid() + 1) % team_all.size(); -// for (size_t j = 0; j < elem_per_thread; ++j) { -// arr_all[base + j] = 0; -// } -// } else if(thread_id == 1) { -// if (i) { -// arr_split.deallocate(); -// ASSERT_EQ_U(arr_split.size(), 0); -// } -// arr_split.allocate(elem_per_thread * team_split.size(), -// dash::DistributionSpec<1>(), team_split); -// ASSERT_EQ_U(arr_split.size(), elem_per_thread * team_split.size()); -// array_t::index_type base = (team_split.myid() + 1) % team_split.size(); -// for (size_t j = 0; j < elem_per_thread; ++j) { -// arr_split[base + j] = 1; -// } -// } } #pragma omp barrier @@ -200,17 +169,9 @@ TEST_F(ThreadsafetyTest, ConcurrentAttach) { SKIP_TEST_MSG("requires at least 4 units"); } - constexpr size_t elem_per_thread = 100; - #if !defined(_OPENMP) SKIP_TEST_MSG("requires support for OpenMP"); #else - int num_threads; -#pragma omp parallel -#pragma omp master - { - num_threads = omp_get_num_threads(); - } dash::Team& team_all = dash::Team::All(); dash::Team& team_split = team_all.split(2); @@ -221,7 +182,7 @@ TEST_F(ThreadsafetyTest, ConcurrentAttach) { { int thread_id = omp_get_thread_num(); dash::Team *team = (thread_id == 0) ? &team_all : &team_split; - for (int i = 0; i < 1; ++i) { + for (int i = 0; i < thread_iterations; ++i) { allocator_t allocator(*team); elem_t *vals = allocator.allocate_local(elem_per_thread); for (size_t j = 0; j < elem_per_thread; ++j) { @@ -253,3 +214,68 @@ TEST_F(ThreadsafetyTest, ConcurrentAttach) { } #endif //!defined(_OPENMP) } + + +TEST_F(ThreadsafetyTest, ConcurrentMemAlloc) { + + using elem_t = int; + using pointer_t = dash::GlobPtr; + + if (!dash::is_multithreaded()) { + SKIP_TEST_MSG("requires support for multi-threading"); + } + + if (dash::size() < 4) { + SKIP_TEST_MSG("requires at least 4 units"); + } + + static constexpr size_t elem_per_thread = 10; + +#if !defined(_OPENMP) + SKIP_TEST_MSG("requires support for OpenMP"); +#else + + int num_threads; +#pragma omp parallel + { +#pragma omp master + { + num_threads = omp_get_num_threads(); + } + } + + dash::Team& team_all = dash::Team::All(); + dash::Team& team_split = team_all.split(2); + ASSERT_GT_U(team_all.size(), 0); + ASSERT_GT_U(team_split.size(), 0); + + pointer_t ptr[num_threads]; + +#pragma omp parallel num_threads(2) + { + int thread_id = omp_get_thread_num(); + dash::Team *team = (thread_id == 0) ? &team_all : &team_split; + dash::Array arr; + arr.allocate(team->size(), + dash::DistributionSpec<1>(), *team); + + for (int i = 0; i < thread_iterations; ++i) { + ptr[thread_id] = dash::memalloc(elem_per_thread); +#pragma omp barrier +#pragma omp master + ASSERT_NE_U(ptr[0], ptr[1]); +#pragma omp barrier + arr.local[0] = ptr[thread_id]; + arr.barrier(); + pointer_t rptr = arr[((team->myid() + 1) % team->size())]; + for (size_t i = 0; i < elem_per_thread; ++i) { + rptr[i] = thread_id; + } + arr.barrier(); + ASSERT_EQ_U(static_cast(ptr[thread_id][0]), thread_id); + arr.barrier(); + dash::memfree(ptr[thread_id]); + } + } +#endif //!defined(_OPENMP) +} From 196a60f937457a410f175ed1933b502560242dc8 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 13 Feb 2017 14:04:20 +0100 Subject: [PATCH 16/19] Fix fix for team-based unit IDs in shared memory get --- dart-impl/mpi/src/dart_communication.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/dart-impl/mpi/src/dart_communication.c b/dart-impl/mpi/src/dart_communication.c index 397feda82..3fecce8c8 100644 --- a/dart-impl/mpi/src/dart_communication.c +++ b/dart-impl/mpi/src/dart_communication.c @@ -844,8 +844,6 @@ dart_ret_t dart_get_blocking( #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) DART_LOG_DEBUG("dart_get_blocking: shared windows enabled"); - // store the team-local ID in the gptr - gptr.unitid = target_unitid_rel.id; if (seg_id >= 0 && team_data->sharedmem_tab[gptr.unitid].id >= 0) { // store the team-local ID in the gptr gptr.unitid = target_unitid_rel.id; From 555773f85f62508a841d82a37d0f644c03ed6074 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 13 Feb 2017 14:05:01 +0100 Subject: [PATCH 17/19] Add test case for concurrent dash::Algorithm invocation --- dash/test/ThreadsafetyTest.cc | 105 +++++++++++++++++++++++++--------- dash/test/ThreadsafetyTest.h | 12 +++- 2 files changed, 89 insertions(+), 28 deletions(-) diff --git a/dash/test/ThreadsafetyTest.cc b/dash/test/ThreadsafetyTest.cc index 2f1734f20..1341e8824 100644 --- a/dash/test/ThreadsafetyTest.cc +++ b/dash/test/ThreadsafetyTest.cc @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -15,7 +16,7 @@ #include #endif -static constexpr int thread_iterations = 100; +static constexpr int thread_iterations = 10; static constexpr size_t elem_per_thread = 100; TEST_F(ThreadsafetyTest, ThreadInit) { @@ -37,16 +38,8 @@ TEST_F(ThreadsafetyTest, ConcurrentPut) { #if !defined(_OPENMP) SKIP_TEST_MSG("requires support for OpenMP"); #else - int num_threads; -#pragma omp parallel - { -#pragma omp master - { - num_threads = omp_get_num_threads(); - } - } - size_t size = dash::size() * num_threads * elem_per_thread; + size_t size = dash::size() * _num_threads * elem_per_thread; array_t src(size); array_t dst(size); @@ -65,10 +58,10 @@ TEST_F(ThreadsafetyTest, ConcurrentPut) { { int thread_id = omp_get_thread_num(); array_t::index_type src_idx = dash::myid() - * (elem_per_thread * num_threads) + * (elem_per_thread * _num_threads) + (elem_per_thread * thread_id); array_t::index_type dst_idx = ((dash::myid() + 1) % dash::size()) - * (elem_per_thread * num_threads) + * (elem_per_thread * _num_threads) + (elem_per_thread * thread_id); for (size_t i = 0; i < elem_per_thread; ++i) { dst[dst_idx + i] = src[src_idx + i]; @@ -77,11 +70,8 @@ TEST_F(ThreadsafetyTest, ConcurrentPut) { dash::barrier(); -// for (int i = 0; i < num_threads; ++i) { -// std::cout << "[" << dash::myid() << "] array[" << i << "] = " << (int)array[i] << std::endl; -// } size_t pos = 0; - for (int i = 0; i < num_threads; ++i) { + for (int i = 0; i < _num_threads; ++i) { for (size_t j = 0; j < elem_per_thread; ++j) { ASSERT_EQ_U(dst.local[pos++], i); } @@ -117,10 +107,11 @@ TEST_F(ThreadsafetyTest, ConcurrentAlloc) { #pragma omp parallel num_threads(2) { int thread_id = omp_get_thread_num(); + // thread 0: contribute to allocation on team_all dash::Team *team = (thread_id == 0) ? &team_all : &team_split; array_t *arr = (thread_id == 0) ? &arr_all : &arr_split; for (int i = 0; i < thread_iterations; ++i) { - // thread 0: contribute to allocation on team_all +#pragma omp barrier if (i) { arr->deallocate(); } @@ -183,12 +174,12 @@ TEST_F(ThreadsafetyTest, ConcurrentAttach) { int thread_id = omp_get_thread_num(); dash::Team *team = (thread_id == 0) ? &team_all : &team_split; for (int i = 0; i < thread_iterations; ++i) { +#pragma omp barrier allocator_t allocator(*team); elem_t *vals = allocator.allocate_local(elem_per_thread); for (size_t j = 0; j < elem_per_thread; ++j) { vals[j] = thread_id; } - std::cout << "vals: " << vals << std::endl; dart_gptr_t gptr = allocator.attach(vals, elem_per_thread); ASSERT_NE_U(DART_GPTR_NULL, gptr); ASSERT_LT_U(gptr.segid, 0); // attached memory has segment ID < 0 @@ -235,21 +226,13 @@ TEST_F(ThreadsafetyTest, ConcurrentMemAlloc) { SKIP_TEST_MSG("requires support for OpenMP"); #else - int num_threads; -#pragma omp parallel - { -#pragma omp master - { - num_threads = omp_get_num_threads(); - } - } dash::Team& team_all = dash::Team::All(); dash::Team& team_split = team_all.split(2); ASSERT_GT_U(team_all.size(), 0); ASSERT_GT_U(team_split.size(), 0); - pointer_t ptr[num_threads]; + pointer_t ptr[_num_threads]; #pragma omp parallel num_threads(2) { @@ -260,6 +243,7 @@ TEST_F(ThreadsafetyTest, ConcurrentMemAlloc) { dash::DistributionSpec<1>(), *team); for (int i = 0; i < thread_iterations; ++i) { +#pragma omp barrier ptr[thread_id] = dash::memalloc(elem_per_thread); #pragma omp barrier #pragma omp master @@ -279,3 +263,70 @@ TEST_F(ThreadsafetyTest, ConcurrentMemAlloc) { } #endif //!defined(_OPENMP) } + + +TEST_F(ThreadsafetyTest, ConcurrentAlgorithm) { + + using elem_t = int; + using array_t = dash::Array; + + if (!dash::is_multithreaded()) { + SKIP_TEST_MSG("requires support for multi-threading"); + } + + if (dash::size() < 4) { + SKIP_TEST_MSG("requires at least 4 units"); + } + + static constexpr size_t elem_per_thread = 10; + +#if !defined(_OPENMP) + SKIP_TEST_MSG("requires support for OpenMP"); +#else + + dash::Team& team_all = dash::Team::All(); + dash::Team& team_split = team_all.split(2); + ASSERT_GT_U(team_all.size(), 0); + ASSERT_GT_U(team_split.size(), 0); + +#pragma omp parallel num_threads(2) + { + int thread_id = omp_get_thread_num(); + dash::Team *team = (thread_id == 0) ? &team_all : &team_split; + size_t num_elem = team->size() * elem_per_thread; + array_t arr(num_elem, *team); + elem_t *vals = new elem_t[num_elem]; + for (int i = 0; i < thread_iterations; ++i) { +#pragma omp barrier + dash::fill(arr.begin(), arr.end(), thread_id); + ASSERT_EQ_U(arr.local[0], thread_id); + elem_t acc = dash::accumulate(arr.begin(), arr.end(), 0); + // TODO: dash::accumulate is still broken + if (team->myid() == 0) { + ASSERT_EQ_U(num_elem * thread_id, acc); + } + dash::copy(arr.begin(), arr.end(), vals); + ASSERT_EQ_U(vals[team->myid() * elem_per_thread], thread_id); + + std::function f = [=](const elem_t& val){ + ASSERT_EQ_U(thread_id, val); + }; + dash::for_each(arr.begin(), arr.end(), f); + + std::function g = [=](){ + return (thread_id + 1) * (team->myid() + 1); + }; + dash::generate(arr.begin(), arr.end(), g); + // wait here because dash::generate does not block + arr.barrier(); + elem_t min = *(dash::min_element(arr.begin(), arr.end())); + ASSERT_EQ_U((thread_id + 1), min); + elem_t max = *(dash::max_element(arr.begin(), arr.end())); + ASSERT_EQ_U((thread_id + 1) * team->size(), max); + arr.barrier(); + } + delete[] vals; + } +#endif // !defined(_OPENMP) +} + diff --git a/dash/test/ThreadsafetyTest.h b/dash/test/ThreadsafetyTest.h index 95df18505..6ba47a163 100644 --- a/dash/test/ThreadsafetyTest.h +++ b/dash/test/ThreadsafetyTest.h @@ -15,9 +15,19 @@ * Test fixture for onesided operations provided by DART. */ class ThreadsafetyTest : public dash::test::TestBase { +protected: + int _num_threads; + public: ThreadsafetyTest() { - + + #pragma omp parallel + { + #pragma omp master + { + _num_threads = omp_get_num_threads(); + } + } } virtual ~ThreadsafetyTest() { From 5ba4ce2b44b85d9054a50b3269a88a569bb77465 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 13 Feb 2017 14:10:41 +0100 Subject: [PATCH 18/19] Fix braces in DART_FETCH_AND_DECPTR --- dart-impl/base/include/dash/dart/base/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dart-impl/base/include/dash/dart/base/atomic.h b/dart-impl/base/include/dash/dart/base/atomic.h index 638253aa3..081dee191 100644 --- a/dart-impl/base/include/dash/dart/base/atomic.h +++ b/dart-impl/base/include/dash/dart/base/atomic.h @@ -127,7 +127,7 @@ #define DART_DEC_AND_FETCH8(ptr) \ __sync_sub_and_fetch((int8_t *)(ptr), 1) #define DART_DEC_AND_FETCHPTR(ptr) \ - __sync_sub_and_fetch((void **)(ptr), sizeof(**ptr)) + __sync_sub_and_fetch((void **)(ptr), sizeof(**(ptr))) #define DART_COMPARE_AND_SWAP64(ptr, oldval, newval) \ From 459d4164d15e5b11595572053c632fc0da2421e2 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Mon, 13 Feb 2017 15:47:20 +0100 Subject: [PATCH 19/19] Use DASH_ENABLE_OPENMP to guard OpenMP statements --- dash/test/ThreadsafetyTest.cc | 20 ++++++++++---------- dash/test/ThreadsafetyTest.h | 12 +++++++++--- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/dash/test/ThreadsafetyTest.cc b/dash/test/ThreadsafetyTest.cc index 1341e8824..6281dac7c 100644 --- a/dash/test/ThreadsafetyTest.cc +++ b/dash/test/ThreadsafetyTest.cc @@ -12,7 +12,7 @@ #include -#if defined(_OPENMP) +#if defined(DASH_ENABLE_OPENMP) #include #endif @@ -35,7 +35,7 @@ TEST_F(ThreadsafetyTest, ConcurrentPut) { SKIP_TEST_MSG("requires support for multi-threading"); } -#if !defined(_OPENMP) +#if !defined(DASH_ENABLE_OPENMP) SKIP_TEST_MSG("requires support for OpenMP"); #else @@ -93,7 +93,7 @@ TEST_F(ThreadsafetyTest, ConcurrentAlloc) { using elem_t = int; using array_t = dash::Array; -#if !defined(_OPENMP) +#if !defined(DASH_ENABLE_OPENMP) SKIP_TEST_MSG("requires support for OpenMP"); #else @@ -144,7 +144,7 @@ TEST_F(ThreadsafetyTest, ConcurrentAlloc) { #pragma omp barrier } -#endif //!defined(_OPENMP) +#endif //!defined(DASH_ENABLE_OPENMP) } TEST_F(ThreadsafetyTest, ConcurrentAttach) { @@ -160,7 +160,7 @@ TEST_F(ThreadsafetyTest, ConcurrentAttach) { SKIP_TEST_MSG("requires at least 4 units"); } -#if !defined(_OPENMP) +#if !defined(DASH_ENABLE_OPENMP) SKIP_TEST_MSG("requires support for OpenMP"); #else @@ -203,7 +203,7 @@ TEST_F(ThreadsafetyTest, ConcurrentAttach) { } #pragma omp barrier } -#endif //!defined(_OPENMP) +#endif //!defined(DASH_ENABLE_OPENMP) } @@ -222,7 +222,7 @@ TEST_F(ThreadsafetyTest, ConcurrentMemAlloc) { static constexpr size_t elem_per_thread = 10; -#if !defined(_OPENMP) +#if !defined(DASH_ENABLE_OPENMP) SKIP_TEST_MSG("requires support for OpenMP"); #else @@ -261,7 +261,7 @@ TEST_F(ThreadsafetyTest, ConcurrentMemAlloc) { dash::memfree(ptr[thread_id]); } } -#endif //!defined(_OPENMP) +#endif //!defined(DASH_ENABLE_OPENMP) } @@ -280,7 +280,7 @@ TEST_F(ThreadsafetyTest, ConcurrentAlgorithm) { static constexpr size_t elem_per_thread = 10; -#if !defined(_OPENMP) +#if !defined(DASH_ENABLE_OPENMP) SKIP_TEST_MSG("requires support for OpenMP"); #else @@ -327,6 +327,6 @@ TEST_F(ThreadsafetyTest, ConcurrentAlgorithm) { } delete[] vals; } -#endif // !defined(_OPENMP) +#endif // !defined(DASH_ENABLE_OPENMP) } diff --git a/dash/test/ThreadsafetyTest.h b/dash/test/ThreadsafetyTest.h index 6ba47a163..43f0195ee 100644 --- a/dash/test/ThreadsafetyTest.h +++ b/dash/test/ThreadsafetyTest.h @@ -10,24 +10,30 @@ #include "TestBase.h" +#if defined(DASH_ENABLE_OPENMP) +#include +#endif // DASH_ENABLE_OPENMP + /** * Test fixture for onesided operations provided by DART. */ class ThreadsafetyTest : public dash::test::TestBase { protected: - int _num_threads; + int _num_threads = 1; public: ThreadsafetyTest() { - #pragma omp parallel +#if defined(DASH_ENABLE_OPENMP) +#pragma omp parallel { - #pragma omp master +#pragma omp master { _num_threads = omp_get_num_threads(); } } +#endif // DASH_ENABLE_OPENMP } virtual ~ThreadsafetyTest() {