diff --git a/config/qthread_check_atomics.m4 b/config/qthread_check_atomics.m4 deleted file mode 100644 index cd0628a8f..000000000 --- a/config/qthread_check_atomics.m4 +++ /dev/null @@ -1,197 +0,0 @@ -# -*- Autoconf -*- -# -# Copyright (c) 2008 Sandia Corporation -# - -# QTHREAD_CHECK_ATOMICS([action-if-found], [action-if-not-found]) -# ------------------------------------------------------------------------------ -AC_DEFUN([QTHREAD_CHECK_ATOMICS], [ -AC_REQUIRE([QTHREAD_DETECT_COMPILER_TYPE]) -AC_ARG_ENABLE([builtin-atomics], - [AS_HELP_STRING([--disable-builtin-atomics], - [force the use of inline-assembly (if possible) rather than compiler-builtins for atomics. This is useful for working around some compiler bugs; normally, it's preferable to use compiler builtins.])]) -AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-32], - [qthread_cv_atomic_CAS32], - [AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#include -#include /* for uint32_t */ - -int main(void) -{ -uint32_t bar=1, old=1, new=2; -uint32_t foo = __sync_val_compare_and_swap(&bar, old, new); -return (int)foo; -}]])], - [qthread_cv_atomic_CAS32="yes"], - [qthread_cv_atomic_CAS32="no"])]) -AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-64], - [qthread_cv_atomic_CAS64], - [AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#include -#include /* for uint64_t */ - -int main(void) -{ -uint64_t bar=1, old=1, new=2; -uint64_t foo = __sync_val_compare_and_swap(&bar, old, new); -return foo; -}]])], - [qthread_cv_atomic_CAS64="yes"], - [qthread_cv_atomic_CAS64="no"])]) -AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-ptr], - [qthread_cv_atomic_CASptr], - [AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#include - -int main(void) -{ -void *bar=(void*)1, *old=(void*)1, *new=(void*)2; -void *foo = __sync_val_compare_and_swap(&bar, old, new); -return (int)(long)foo; -}]])], - [qthread_cv_atomic_CASptr="yes"], - [qthread_cv_atomic_CASptr="no"])]) -AS_IF([test "x$qthread_cv_atomic_CAS32" = "xyes" && test "x$qthread_cv_atomic_CAS64" = "xyes" && test "x$qthread_cv_atomic_CASptr" = "xyes"], - [qthread_cv_atomic_CAS=yes], - [qthread_cv_atomic_CAS=no]) -AC_ARG_ENABLE([cmpxchg16b], - [AS_HELP_STRING([--enable-cmpxchg16b], - [forces acceptance or rejection of the cmpxchg16b instruction; useful primarily for cross-compiling])]) -AC_CACHE_CHECK([whether the compiler supports CMPXCHG16B], - [qthread_cv_cmpxchg16b], - [AS_IF([test "x$qthread_cv_asm_arch" = xAMD64], - [AC_RUN_IFELSE([AC_LANG_SOURCE([[ -#include /* for uint64_t and intptr_t (C99) */ -struct m128 { -uint64_t a,b; -}; -int main(void) -{ -char blob[sizeof(struct m128)*4]; -intptr_t b2 = (intptr_t)blob; -struct m128 *one, *two, *three; -if (b2 & 0xf) { // fix alignment -b2 += 0xf; -b2 -= (b2 & 0xf); -} -one = (struct m128*)b2; -two = one+1; -three = two+1; -one->a = 1; -one->b = 2; -two->a = 3; -two->b = 4; -three->a = 5; -three->b = 6; -__asm__ __volatile__ ("lock cmpxchg16b %2" -:"=a"(three->a),"=d"(three->b),"+m"(*two) -:"a"(two->a),"d"(two->b),"b"(one->a),"c"(one->b) -:"cc", "memory"); -if (three->a != 3) { -return -1; -} else { -return 0; -} -}]])], - [qthread_cv_cmpxchg16b="yes"], - [qthread_cv_cmpxchg16b="no"], - [AS_IF([test "x$enable_cmpxchg16b" = x], - [case "$host" in # for vim: ( ( - x86_64-*) qthread_cv_cmpxchg16b="yes" ;; - *) qthread_cv_cmpxchg16b="no" ;; - esac], - [qthread_cv_cmpxchg16b="$enable_cmpxchg16b"])])], - [qthread_cv_cmpxchg16b="no"])]) -qthread_cv_atomic_CAS128="$qthread_cv_cmpxchg16b" -AC_CACHE_CHECK([whether compiler supports builtin atomic incr], - [qthread_cv_atomic_incr], - [AS_IF([test "$1" -eq 8], - [AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#include -#include /* for uint64_t */ - -int main(void) -{ -uint64_t bar=1; -uint64_t foo = __sync_fetch_and_add(&bar, 1); -return foo; -}]])], - [qthread_cv_atomic_incr="yes"], - [qthread_cv_atomic_incr="no"])], - [AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#include -#include /* for uint32_t */ - -int main(void) -{ -uint32_t bar=1; -uint32_t foo = __sync_fetch_and_add(&bar, 1); -return foo; -}]])], - [qthread_cv_atomic_incr="yes"], - [qthread_cv_atomic_incr="no"])]) - ]) -AS_IF([test "$qthread_cv_atomic_incr" = "yes"], - [AC_CACHE_CHECK([whether builtin atomic increment works correctly], - [qt_cv_atomic_incr_works], - [AS_IF([test "$1" -eq 8], - [AC_RUN_IFELSE([AC_LANG_SOURCE([[ -#include -#include /* for uint64_t */ - -int main(int argc, char *argv[]) -{ -uint64_t master = 0; -if ((__sync_fetch_and_add(&master, 1) != 0) || (master != 1)) { - return -1; -} -master = 0xFFFFFFFF; -if ((__sync_fetch_and_add(&master, 1) != 0xFFFFFFFF) || - (master != 0x100000000ULL)) { - return -2; -} -master = 0; -if ((__sync_fetch_and_add(&master, 0x100000000ULL) != 0) || - (master != 0x100000000ULL)) { - return -3; -} -master = 0; -__sync_fetch_and_add(&master, 0x100000000ULL); -if (master != 0x100000000ULL) { - return -4; -} -return 0; -}]])], - [qt_cv_atomic_incr_works="yes"], - [qt_cv_atomic_incr_works="no"], - [qt_cv_atomic_incr_works="assuming yes"])], - [AC_RUN_IFELSE([AC_LANG_SOURCE([[ -#include -#include /* for uint32_t */ - -int main(void) -{ -uint64_t master = 0; -if ((__sync_fetch_and_add(&master, 1) != 0) || (master != 1)) { - return -1; -} -return 0; -}]])], - [qt_cv_atomic_incr_works="yes"], - [qt_cv_atomic_incr_works="no"], - [qt_cv_atomic_incr_works="assuming yes"])]) - ])]) -AS_IF([test "x$qthread_cv_atomic_CASptr" = "xyes"], - [AC_DEFINE([QTHREAD_ATOMIC_CAS_PTR],[1], - [if the compiler supports __sync_val_compare_and_swap on pointers])]) -AS_IF([test "x$qthread_cv_atomic_CAS32" = "xyes"], - [AC_DEFINE([QTHREAD_ATOMIC_CAS32],[1], - [if the compiler supports __sync_val_compare_and_swap on 32-bit ints])]) -AS_IF([test "x$qthread_cv_atomic_CAS64" = "xyes"], - [AC_DEFINE([QTHREAD_ATOMIC_CAS64],[1], - [if the compiler supports __sync_val_compare_and_swap on 64-bit ints])]) -AS_IF([test "x$qthread_cv_atomic_CAS" = "xyes"], - [AC_DEFINE([QTHREAD_ATOMIC_CAS],[1],[if the compiler supports __sync_val_compare_and_swap])]) -AS_IF([test "$qthread_cv_atomic_incr" = "yes" -a "$qt_cv_atomic_incr_works" != "no"], - [AC_DEFINE([QTHREAD_ATOMIC_INCR],[1],[if the compiler supports __sync_fetch_and_add])]) -]) diff --git a/config/qthread_check_libnuma.m4 b/config/qthread_check_libnuma.m4 deleted file mode 100644 index a17554018..000000000 --- a/config/qthread_check_libnuma.m4 +++ /dev/null @@ -1,58 +0,0 @@ -# -*- Autoconf -*- -# -# Copyright (c) 2008 Sandia Corporation -# - -# QTHREAD_CHECK_LIBNUMA([action-if-found], [action-if-not-found]) -# ------------------------------------------------------------------------------ -AC_DEFUN([QTHREAD_CHECK_LIBNUMA], [ -AC_CHECK_HEADERS([numa.h], - [libnuma_happy=yes - break], - [libnuma_happy=no]) -QT_OLDLIBS="$LIBS" -AS_IF([test "x$libnuma_happy" = "xyes"], - [AC_SEARCH_LIBS([numa_available], - [numa], - [libnuma_happy=yes], - [libnuma_happy=no])]) -AS_IF([test "x$libnuma_happy" = "xyes"], - [AC_MSG_CHECKING(if NUMA is available) - LIBS="$LIBS -lnuma" - AC_TRY_RUN([ -#include -int main() { return ( numa_available() != -1 ) ? 0 : 1; } - ], - [libnuma_happy=yes], - [libnuma_happy=no - LIBS="$QT_OLDLIBS"], - [libnuma_happy=no - LIBS="$QT_OLDLIBS"]) - AC_MSG_RESULT($libnuma_happy) - ]) - -AS_IF([test "x$libnuma_happy" = "xyes"], - [ - dnl okay, so at this point, we need to determine what KIND of - dnl libnuma interface we're dealing with - AC_CHECK_FUNC([numa_allocate_nodemask], - [AC_DEFINE([QTHREAD_LIBNUMA_V2],[1],[if libnuma provides numa_allocate_nodemask])]) - AC_CHECK_FUNCS([numa_num_configured_cpus numa_num_thread_cpus numa_bitmask_nbytes numa_distance]) - AS_IF([test "x$ac_cv_func_numa_distance" = "xyes"], - [AC_TRY_RUN([ -#include -int main() { return (numa_distance(0,0) >= 0); } - ], - [numa_distance_happy=yes], - [numa_distance_happy=no], - [numa_distance_happy=yes])]) - AS_IF([test "x$numa_distance_happy" = "xyes"], - [AC_DEFINE([QTHREAD_NUMA_DISTANCE_WORKING],[1],[if libnuma's numa_distance() function works])]) - ]) - -AS_IF([test "x$libnuma_happy" = "xyes"], - [AC_DEFINE([QTHREAD_HAVE_LIBNUMA],[1],[if libnuma is available]) - AS_IF([test "x$ac_cv_func_numa_allocate_nodemask" == "xyes"], - [$2],[$1])], - [$3]) -]) diff --git a/config/qthread_check_linux.m4 b/config/qthread_check_linux.m4 deleted file mode 100644 index 1379c774f..000000000 --- a/config/qthread_check_linux.m4 +++ /dev/null @@ -1,36 +0,0 @@ -# -*- Autoconf -*- -# -# Copyright (c) 2008 Sandia Corporation -# - -# QTHREAD_CHECK_LINUX([action-if-found], [action-if-not-found]) -# ------------------------------------------------------------------------------ -AC_DEFUN([QTHREAD_CHECK_LINUX], [ -AC_CACHE_CHECK([for _SC_NPROCESSORS_CONF], - [qthread_cv_sc_nprocessors_conf], - [AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#include - -int main() { - return sysconf(_SC_NPROCESSORS_CONF); -}]])], - [qthread_cv_sc_nprocessors_conf=yes], - [qthread_cv_sc_nprocessors_conf=no])]) -AS_IF([test "x$qthread_cv_sc_nprocessors_conf" = xyes], - [AC_DEFINE([HAVE_SC_NPROCESSORS_CONF], [1], [define if you have _SC_NPROCESSORS_CONF])], - [AC_CACHE_CHECK([for HW_NCPU], - [qthread_cv_hw_ncpu], - [AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#include - -int main() { - int name[2] = { CTL_HW, HW_NCPU }; - unsigned int oldv; - unsigned int oldvlen = sizeof(oldv); - return sysctl(name, &oldv, &oldvlen, NULL, 0); -}]])], - [qthread_cv_hw_ncpu=yes], - [qthread_cv_hw_ncpu=no])]) - AS_IF([test "x$qthread_cv_hw_ncpu" = xyes], - [AC_DEFINE([HAVE_HW_NCPU], [1], [define if you have HW_NCPU and CTL_HW])])]) -]) diff --git a/config/qthread_check_machtopo.m4 b/config/qthread_check_machtopo.m4 deleted file mode 100644 index 5bbf68414..000000000 --- a/config/qthread_check_machtopo.m4 +++ /dev/null @@ -1,29 +0,0 @@ -# -*- Autoconf -*- -# -# Copyright (c) 2008 Sandia Corporation -# - -# QTHREAD_CHECK_MACHTOPO([action-if-found], [action-if-not-found]) -# ------------------------------------------------------------------------------ -AC_DEFUN([QTHREAD_CHECK_MACHTOPO], [ - qt_allgoodsofar=yes - AC_CHECK_HEADERS([mach/mach_init.h mach/thread_policy.h],[], - [qt_allgoodsofar=no - break]) - AS_IF([test "x$qt_allgoodsofar" = xyes], - [AC_SEARCH_LIBS([thread_policy_set],[],[], - [qt_allgoodsofar=no])]) - AS_IF([test "x$qt_allgoodsofar" = xyes], - [AC_SEARCH_LIBS([thread_policy_get],[],[], - [qt_allgoodsofar=no])]) - AS_IF([test "x$qt_allgoodsofar" = xyes], - [AC_CHECK_DECL([THREAD_AFFINITY_POLICY_COUNT],[], - [qt_allgoodsofar=no], - [[#include -#include ]])]) - - AS_IF([test "x$qt_allgoodsofar" = xyes], - [AC_DEFINE([QTHREAD_HAVE_MACHTOPO],[1],[if the machine has a MacOS-style Mach topology interface]) - $1], - [$2]) -]) diff --git a/config/qthread_check_plpa.m4 b/config/qthread_check_plpa.m4 deleted file mode 100644 index 61d209d1b..000000000 --- a/config/qthread_check_plpa.m4 +++ /dev/null @@ -1,37 +0,0 @@ -# -*- Autoconf -*- -# -# Copyright (c) 2008 Sandia Corporation -# - -# QTHREAD_CHECK_PLPA([action-if-found], [action-if-not-found]) -# ------------------------------------------------------------------------------ -AC_DEFUN([QTHREAD_CHECK_PLPA], [ -AC_CHECK_LIB([plpa],[plpa_api_probe], - [plpa_found=yes - LIBS="$LIBS -lplpa" - AC_MSG_CHECKING(whether plpa works) - AC_RUN_IFELSE([AC_LANG_SOURCE([[ -#include -#include -#include - -int main(int argc, char* argv[]) { - plpa_api_type_t p; - if (0 == plpa_api_probe(&p) && PLPA_PROBE_OK == p) { - return 0; - } else { - assert(0); - return -1; - } -} - ]])], - [plpa_happy=yes], - [plpa_happy=no], - [plpa_happy=no]) - AC_MSG_RESULT($plpa_happy) - ]) - AS_IF([test "x$plpa_happy" = "xyes"], - [AC_DEFINE([QTHREAD_USE_PLPA],[1],[define to 1 if PLPA is available and works]) - $1], - [$2]) -]) diff --git a/configure.ac b/configure.ac index c875aadb0..ec2bee63c 100644 --- a/configure.ac +++ b/configure.ac @@ -6,7 +6,7 @@ ## --------------------- ## ## Autoconf Requirements ## ## --------------------- ## -AC_PREREQ(2.59) +AC_PREREQ(2.71) ## ---------------------------------- ## ## Autoconf / Automake Initialization ## @@ -18,28 +18,14 @@ AC_CONFIG_AUX_DIR([config]) AC_CONFIG_MACRO_DIR([config]) AC_CONFIG_SRCDIR([src/qthread.c]) -dnl Require at least AM 1.7. Prior versions require AM_CONFIG_HEADER -dnl and have no equivalent to AC_CONFIG_HEADERS (the plural version), -dnl which is a pain. AM 1.7 also added support for putting target -dnl information in AM_CONDITIONAL blocks and using the += modifier. -dnl ... removed "check-news" because of automatic version generation -# Automake's silent rules were implemented in the same version that -# color-tests was implemented, so we can use one to detect the other. -# This nasty, dirty, unreliable trick is strongly discouraged by its author: -# http://blog.flameeyes.eu/trackbacks?article_id=5155 -m4_ifdef([AM_SILENT_RULES], - [m4_define([qt_color_tests], [color-tests])], - [m4_define([qt_color_tests], [])]) -AM_INIT_AUTOMAKE([foreign subdir-objects dist-bzip2 no-define 1.7 ]qt_color_tests) -# If Automake supports silent rules, enable them (credit to Brian) -m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) +AM_INIT_AUTOMAKE([foreign subdir-objects dist-bzip2 no-define 1.16 ]) +AM_SILENT_RULES([yes]) # clean some extra things... CLEANFILES="*~ .\#* .gdb_history" AC_SUBST(CLEANFILES) AC_CANONICAL_HOST -AC_PATH_PROG([timelimit_path],[timelimit]) ## ----------------- ## ## Check the options ## @@ -53,21 +39,6 @@ AC_ARG_ENABLE([debugging], [AS_HELP_STRING([--enable-debugging], [turns off optimization and turns on debug flags])]) -AC_ARG_ENABLE([inlined-functions], - [AS_HELP_STRING([--disable-inlined-functions], - [prevent functions from being inlined; this - sometimes is useful for analysis of the code in - a debugger or some other tool])]) - -AC_ARG_ENABLE([hardware-atomics], - [AS_HELP_STRING([--disable-hardware-atomics], - [use mutexes to do the internal atomics; - compatible with more compilers and more - platforms, but slow.])]) -AC_ARG_ENABLE([hardware-increments],[], - [AS_IF([test "x$enable_hardware_atomics" != x], - [enable_hardware_atomics="$enable_hardware_increments"])]) - AC_ARG_ENABLE([hardware-timer], [AS_HELP_STRING([--disable-hardware-timer], [force the use of gettimeofday even if there is a @@ -335,10 +306,9 @@ AC_ARG_ENABLE([third-party-benchmarks], AC_ARG_WITH([scheduler], [AS_HELP_STRING([--with-scheduler=[[type]]], [Specify the scheduler. Options when using - single-threaded shepherds are: nemesis (default), - lifo, mdlifo, mutexfifo, and mtsfifo. Options - when using multi-threaded shepherds are: sherwood - (default), distrib and nottingham. Details on + single-threaded shepherds are: nemesis (default). + Options when using multi-threaded shepherds are: + sherwood (default), and distrib. Details on these options are in the SCHEDULING file.])]) AC_ARG_WITH([sinc], @@ -455,11 +425,6 @@ AS_IF([test "x$enable_picky" = xyes], esac]) QTHREAD_CHECK_ASSEMBLY([have_assembly=1], [have_assembly=0]) -case "$qthread_cv_asm_arch" in - POWERPC32) - compile_compat_atomic=yes - ;; -esac case "$qthread_cv_asm_arch" in POWERPC*) AS_IF([test "x$qthread_cv_c_compiler_type" = "xApple-GNU4.0"], @@ -580,25 +545,8 @@ case "$qthread_cv_c_compiler_type" in ;; esac -# Figure out whether the compiler has builtin atomic operations -AS_IF([test "x$enable_hardware_atomics" != xno], - [QTHREAD_CHECK_ATOMICS($sizeof_aligned_t)]) -QTHREAD_VAMACROS - QTHREAD_BUILTIN_SYNCHRONIZE -AS_IF([test "x$have_assembly" = "x0" -a "x$qthread_cv_atomic_CAS32" = "xno" -a "x$qthread_cv_atomic_CAS64" = "xno" -a "x$qthread_cv_atomic_incr" = "xno"], - [AC_MSG_NOTICE(Compiling on a compiler without inline assembly support and without builtin atomics. This will be slow!) - AS_IF([test "x$enable_hardware_atomics" = x], - [AC_MSG_NOTICE(turning on software increments ($have_assembly)) - enable_hardware_increments=no]) - AS_IF([test "x$enable_hardware_atomics" = "xyes"], - [AC_MSG_ERROR([No assembly available and software increments disabled.])])], - [AS_IF([test "x$enable_hardware_atomics" = x], [enable_hardware_atomics="yes"])]) - -AS_IF([test "x$enable_hardware_atomics" = "xno"], - [compile_compat_atomic=yes - AC_DEFINE([QTHREAD_MUTEX_INCREMENT], [1], [Use mutexes instead of assembly for atomic increment])]) AS_IF([test "x$enable_guard_pages" = "xyes"], [AC_DEFINE([QTHREAD_GUARD_PAGES], [1], [Use guard pages to detect stack overruns])], [enable_guard_pages="no"]) @@ -670,18 +618,9 @@ AS_IF([test "x$with_scheduler" != "x"], default) [with_scheduler="sherwood"] ;; - sherwood|nemesis|lifo|mutexfifo|mtsfifo|distrib) + sherwood|nemesis|distrib) # all valid options that require no additional configuration ;; - mdlifo) - [with_scheduler=lifo] - [using_mdlifo=yes] - AC_DEFINE([QTHREAD_LIFO_MULTI_DEQUEUER], [1], [Enable multiple-dequeuer support for lifo scheduler]) - ;; - nottingham) - AS_IF([test "x$qthread_cv_atomic_CAS128" != "xyes"], - [AC_MSG_ERROR([The $with_scheduler scheduler requires a 128-bit CAS.])]) - ;; *) AC_MSG_ERROR([Unknown scheduler option]) ;; @@ -699,16 +638,10 @@ AS_IF([test "x$enable_internal_spinlock" = "x"], esac]) -AS_IF([test "x$enable_hardware_atomics" = "xno"], - [AS_IF([test "x$with_scheduler" != "xsherwood"], - [with_scheduler="sherwood" - AC_MSG_WARN([Forcing scheduler to be sherwood, since hardware atomic support is lacking.])]) - AS_IF([test "x$enable_lf_fegs" = "xyes"], - [AC_MSG_ERROR([FEBs cannot use a lock-free hash table, since hardware atomic support is lacking.])])], - [AS_IF([test "x$enable_internal_spinlock" = x], - [enable_internal_spinlock=yes]) - AS_IF([test "x$enable_internal_spinlock" = xyes], - [AC_DEFINE([USE_INTERNAL_SPINLOCK], [1], [Use Porterfield spinlock])])]) +AS_IF([test "x$enable_internal_spinlock" = x], + [enable_internal_spinlock=yes]) +AS_IF([test "x$enable_internal_spinlock" = xyes], + [AC_DEFINE([USE_INTERNAL_SPINLOCK], [1], [Use Porterfield spinlock])]) AS_IF([test "x$enable_steal_profiling" = xyes], [AC_DEFINE([STEAL_PROFILE], [1], [Support dynamic profile of steal infomation])], @@ -860,11 +793,6 @@ AS_IF([test "x$enable_lazy_threadids" = "xno" -o "x$enable_debug" != "xno"], enable_lazy_threadids=no], [enable_lazy_threadids=yes]) -AS_IF([test "x$enable_inlined_functions" != "xno"], - [qinline_define=inline], - [qinline_define=""]) -AC_DEFINE_UNQUOTED([QINLINE], [$qinline_define], [Allow function inlining to be toggled]) - AS_IF([test "x$enable_header_syscall_interception" == xyes], [AC_DEFINE([USE_HEADER_SYSCALLS], [1], [Define to allow blocking syscalls to be mangled into qthread-specific variants])]) @@ -876,30 +804,11 @@ AS_IF([test "x$qthread_topo" != xno], [AS_IF([test "x$qthread_topo" = "xnone_specified"], [qthread_topo=no]) # First, check for hwloc, since it gives me the most portable/flexible/reliable/detailed information. - AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xbinders -o "x$qthread_topo" = xhwloc -o "x$qthread_topo" = xhwloc_v2], - [QTHREAD_CHECK_HWLOC([AS_IF([test "x$qthread_topo" != xhwloc -a "x$qthread_topo" != xhwloc_v2 -a "x$qthread_topo" != xbinders], + AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xbinders -o "x$qthread_topo" = xhwloc], + [QTHREAD_CHECK_HWLOC([AS_IF([test "x$qthread_topo" != xhwloc -a "x$qthread_topo" != xbinders], [qthread_topo=hwloc])], [AS_IF([test "x$qthread_topo" != xno], [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])]) - AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xlibnuma -o "x$qthread_topo" = xlibnumaV2], - [QTHREAD_CHECK_LIBNUMA([qthread_topo=libnuma], - [qthread_topo=libnumaV2], - [AS_IF([test "x$qthread_topo" != xno], - [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])]) - # Third, check any others. - AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xmach], - [QTHREAD_CHECK_MACHTOPO([qthread_topo=mach], - [AS_IF([test "x$qthread_topo" != xno], - [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])]) - # PLPA is deprecated in favor of hwloc - AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xplpa], - [QTHREAD_CHECK_PLPA([qthread_topo=plpa], - [AS_IF([test "x$qthread_topo" != xno], - [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])]) - AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xsys], - [QTHREAD_CHECK_LINUX([qthread_topo=sys], - [AS_IF([test "x$qthread_topo" = xsys], - [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])]) ]) AS_IF([test "x$enable_lf_febs" == "xyes"], @@ -926,11 +835,9 @@ AM_CONDITIONAL([QTHREAD_TIMER_TYPE_GETTIME], [test "x$qthread_timer_type" = "xcl AM_CONDITIONAL([QTHREAD_TIMER_TYPE_MACH], [test "x$qthread_timer_type" = "xmach"]) AM_CONDITIONAL([QTHREAD_TIMER_TYPE_GETHRTIME], [test "x$qthread_timer_type" = "xgethrtime"]) AM_CONDITIONAL([QTHREAD_TIMER_TYPE_GETTIMEOFDAY], [test "x$qthread_timer_type" = "xgettimeofday"]) -AM_CONDITIONAL([COMPILE_COMPAT_ATOMIC], [test "x$compile_compat_atomic" = "xyes"]) AM_CONDITIONAL([COMPILE_SPAWNCACHE], [test "x$enable_spawn_cache" = "xyes"]) AM_CONDITIONAL([COMPILE_EUREKAS], [test "x$enable_eurekas" = "xyes"]) AM_CONDITIONAL([HAVE_GUARD_PAGES], [test "x$enable_guard_pages" = "xyes"]) -AM_CONDITIONAL([HAVE_PROG_TIMELIMIT], [test "x$timelimit_path" != "x"]) AM_CONDITIONAL([QTHREAD_PERFORMANCE], [test "$enable_performance_monitoring" = "yes"]) AM_CONDITIONAL([WANT_SINGLE_WORKER_SCHEDULER], [test "x$with_scheduler" = "xnemesis" -o "x$with_scheduler" = "xlifo" -o "x$with_scheduler" = "xmutexfifo" -o "x$with_scheduler" = "xmtsfifo" -o "x$with_scheduler" = "xmdlifo"]) AM_CONDITIONAL([COMPILE_OMP_BENCHMARKS], [test "x$have_openmp" = "xyes"]) @@ -1004,17 +911,13 @@ AS_IF([test "x$enable_spawn_cache" = "xyes"], [AS_IF([test "x$pool_string" != "x"], [pool_string="$pool_string, spawns"], [pool_string="spawns"])]) -AS_IF([test "x$enable_hardware_atomics" = "xno"], - [AS_IF([test "x$ac_cv_func_pthread_spin_init" = "xyes"], - [incr_string="Spin (software)"], - [incr_string="Mutex (software)"])], - [AS_IF([test "x$qthread_cv_atomic_incr" = "xyes" -a "x$qthread_cv_atomic_CAS" = "xyes"], - [incr_string="Compiler Builtin (both)"], - [AS_IF([test "x$qthread_cv_atomic_incr" = "xyes"], - [incr_string="Compiler Builtin/$qthread_cv_asm_arch"], - [AS_IF([test "x$qthread_cv_atomic_CAS" = "xyes"], - [incr_string="$qthread_cv_asm_arch/Compiler Builtin"], - [incr_string="$qthread_cv_asm_arch"])])])]) +AS_IF([test "x$qthread_cv_atomic_incr" = "xyes" -a "x$qthread_cv_atomic_CAS" = "xyes"], + [incr_string="Compiler Builtin (both)"], + [AS_IF([test "x$qthread_cv_atomic_incr" = "xyes"], + [incr_string="Compiler Builtin/$qthread_cv_asm_arch"], + [AS_IF([test "x$qthread_cv_atomic_CAS" = "xyes"], + [incr_string="$qthread_cv_asm_arch/Compiler Builtin"], + [incr_string="$qthread_cv_asm_arch"])])]) AS_IF([test "x$enable_lf_febs" = xno], [feb_string="lock-based hash"], [feb_string="lock-free"]) diff --git a/include/56reader-rwlock.h b/include/56reader-rwlock.h index 9a8238f0a..586609725 100644 --- a/include/56reader-rwlock.h +++ b/include/56reader-rwlock.h @@ -30,14 +30,14 @@ struct tlrw_lock { typedef struct tlrw_lock rwlock_t; -static QINLINE void rwlock_init(rwlock_t *l) { +static inline void rwlock_init(rwlock_t *l) { unsigned int i; l->owner = 0; for (i = 0; i < sizeof l->readers; i++) l->readers[i] = 0; } -static QINLINE void rwlock_rdlock(rwlock_t *l, int id) { +static inline void rwlock_rdlock(rwlock_t *l, int id) { assert(id >= 0); for (;;) { l->readers[id] = 1; @@ -53,12 +53,12 @@ static QINLINE void rwlock_rdlock(rwlock_t *l, int id) { } } -static QINLINE void rwlock_rdunlock(rwlock_t *l, int id) { +static inline void rwlock_rdunlock(rwlock_t *l, int id) { assert(id >= 0); l->readers[id] = 0; } -static QINLINE void rwlock_wrlock(rwlock_t *l, int id) { +static inline void rwlock_wrlock(rwlock_t *l, int id) { assert(id >= 0); id = id + 1; @@ -73,7 +73,7 @@ static QINLINE void rwlock_wrlock(rwlock_t *l, int id) { } } -static QINLINE void rwlock_wrunlock(rwlock_t *l) { +static inline void rwlock_wrunlock(rwlock_t *l) { l->owner = 0; MACHINE_FENCE; } diff --git a/include/qt_addrstat.h b/include/qt_addrstat.h index 9163d6e67..857ed3327 100644 --- a/include/qt_addrstat.h +++ b/include/qt_addrstat.h @@ -6,7 +6,7 @@ /* This allocates a new, initialized addrstat structure, which is used for * keeping track of the FEB status of an address. It expects a shepherd pointer * to use to find the right memory pool to use. */ -static QINLINE qthread_addrstat_t *qthread_addrstat_new(void) { /*{{{ */ +static inline qthread_addrstat_t *qthread_addrstat_new(void) { /*{{{ */ qthread_addrstat_t *ret = ALLOC_ADDRSTAT(); QTHREAD_FASTLOCK_INIT_PTR(&ret->lock); QTHREAD_FASTLOCK_LOCK(&ret->lock); diff --git a/include/qt_affinity.h b/include/qt_affinity.h index 8e4ecf177..bc989e60f 100644 --- a/include/qt_affinity.h +++ b/include/qt_affinity.h @@ -12,10 +12,6 @@ typedef struct qthread_shepherd_s qthread_shepherd_t; #endif -#if defined(QTHREAD_HAVE_LIBNUMA) -#define QTHREAD_HAVE_MEM_AFFINITY -#endif - #if defined(QTHREAD_HAVE_HWLOC) && (HWLOC_API_VERSION > 0x00010000) #define QTHREAD_HAVE_MEM_AFFINITY #endif diff --git a/include/qt_atomics.h b/include/qt_atomics.h index ba83161ca..4b3ed7151 100644 --- a/include/qt_atomics.h +++ b/include/qt_atomics.h @@ -138,7 +138,7 @@ extern pthread_mutexattr_t _fastlock_attr; * !defined(QTHREAD_ATOMIC_INCR). */ #if defined(USE_INTERNAL_SPINLOCK) && USE_INTERNAL_SPINLOCK && \ - defined(QTHREAD_ATOMIC_INCR) && !defined(QTHREAD_MUTEX_INCREMENT) + defined(QTHREAD_ATOMIC_INCR) #define QTHREAD_TRYLOCK_TYPE qt_spin_trylock_t #define QTHREAD_TRYLOCK_INIT(x) \ @@ -303,52 +303,7 @@ extern pthread_mutexattr_t _fastlock_attr; t.tv_nsec -= ((t.tv_nsec >= 1000000000) ? 1000000000 : 0); \ qassert(pthread_cond_timedwait(&(c), &(m), &t), 0); \ } while (0) -#ifdef QTHREAD_MUTEX_INCREMENT -#define QTHREAD_CASLOCK(var) \ - var; \ - QTHREAD_FASTLOCK_TYPE var##_caslock -#define QTHREAD_CASLOCK_STATIC(var) \ - var; \ - static QTHREAD_FASTLOCK_TYPE var##_caslock -#define QTHREAD_CASLOCK_EXPLICIT_DECL(name) QTHREAD_FASTLOCK_TYPE name; -#define QTHREAD_CASLOCK_EXPLICIT_INIT(name) QTHREAD_FASTLOCK_INIT(name) -#define QTHREAD_CASLOCK_INIT(var, i) \ - var = i; \ - QTHREAD_FASTLOCK_INIT(var##_caslock) -#define QTHREAD_CASLOCK_DESTROY(var) QTHREAD_FASTLOCK_DESTROY(var##_caslock) -#define QTHREAD_CASLOCK_READ(var) \ - (void *)qt_cas_read_ui((uintptr_t *)&(var), &(var##_caslock)) -#define QTHREAD_CASLOCK_READ_UI(var) \ - qt_cas_read_ui((uintptr_t *)&(var), &(var##_caslock)) -#define QT_CAS(var, oldv, newv) \ - qt_cas((void **)&(var), (void *)(oldv), (void *)(newv), &(var##_caslock)) -#define QT_CAS_(var, oldv, newv, caslock) \ - qt_cas((void **)&(var), (void *)(oldv), (void *)(newv), &(caslock)) - -static QINLINE void *qt_cas(void **const ptr, - void *const oldv, - void *const newv, - QTHREAD_FASTLOCK_TYPE *lock) { - void *ret; - - QTHREAD_FASTLOCK_LOCK(lock); - ret = *ptr; - if (*ptr == oldv) { *ptr = newv; } - QTHREAD_FASTLOCK_UNLOCK(lock); - return ret; -} - -static QINLINE uintptr_t qt_cas_read_ui(uintptr_t *const ptr, - QTHREAD_FASTLOCK_TYPE *mutex) { - uintptr_t ret; - QTHREAD_FASTLOCK_LOCK(mutex); - ret = *ptr; - QTHREAD_FASTLOCK_UNLOCK(mutex); - return ret; -} - -#else /* ifdef QTHREAD_MUTEX_INCREMENT */ #define QTHREAD_CASLOCK(var) (var) #define QTHREAD_CASLOCK_STATIC(var) (var) #define QTHREAD_CASLOCK_EXPLICIT_DECL(name) @@ -364,7 +319,7 @@ static QINLINE uintptr_t qt_cas_read_ui(uintptr_t *const ptr, #ifdef QTHREAD_ATOMIC_CAS_PTR #define qt_cas(P, O, N) (void *)__sync_val_compare_and_swap((P), (O), (N)) #else -static QINLINE void * +static inline void * qt_cas(void **const ptr, void *const oldv, void *const newv) { /*{{{*/ #if defined(HAVE_GCC_INLINE_ASSEMBLY) #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) @@ -423,57 +378,15 @@ qt_cas(void **const ptr, void *const oldv, void *const newv) { /*{{{*/ } /*}}}*/ #endif /* ATOMIC_CAS_PTR */ -#endif /* MUTEX_INCREMENT */ -#ifndef QTHREAD_MUTEX_INCREMENT #define qthread_internal_atomic_read_s(op, lock) (*op) #define qthread_internal_incr(op, lock, val) qthread_incr(op, val) #define qthread_internal_incr_s(op, lock, val) qthread_incr(op, val) #define qthread_internal_decr(op, lock) qthread_incr(op, -1) #define qthread_internal_incr_mod(op, m, lock) qthread_internal_incr_mod_(op, m) #define QTHREAD_OPTIONAL_LOCKARG -#else -#define qthread_internal_incr_mod(op, m, lock) \ - qthread_internal_incr_mod_(op, m, lock) -#define QTHREAD_OPTIONAL_LOCKARG , QTHREAD_FASTLOCK_TYPE *lock - -static QINLINE aligned_t qthread_internal_incr(aligned_t *operand, - QTHREAD_FASTLOCK_TYPE *lock, - int val) { /*{{{ */ - aligned_t retval; - - QTHREAD_FASTLOCK_LOCK(lock); - retval = *operand; - *operand += val; - QTHREAD_FASTLOCK_UNLOCK(lock); - return retval; -} /*}}} */ -static QINLINE saligned_t qthread_internal_incr_s(saligned_t *operand, - QTHREAD_FASTLOCK_TYPE *lock, - int val) { /*{{{ */ - saligned_t retval; - - QTHREAD_FASTLOCK_LOCK(lock); - retval = *operand; - *operand += val; - QTHREAD_FASTLOCK_UNLOCK(lock); - return retval; -} /*}}} */ - -static QINLINE saligned_t qthread_internal_atomic_read_s( - saligned_t *operand, QTHREAD_FASTLOCK_TYPE *lock) { /*{{{ */ - saligned_t retval; - - QTHREAD_FASTLOCK_LOCK(lock); - retval = *operand; - QTHREAD_FASTLOCK_UNLOCK(lock); - return retval; -} /*}}} */ - -#endif /* ifndef QTHREAD_MUTEX_INCREMENT */ - -static QINLINE aligned_t qthread_internal_incr_mod_( +static inline aligned_t qthread_internal_incr_mod_( aligned_t *operand, unsigned int const max QTHREAD_OPTIONAL_LOCKARG) { /*{{{ */ aligned_t retval; @@ -628,12 +541,6 @@ static QINLINE aligned_t qthread_internal_incr_mod_( ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) && \ (QTHREAD_BITS == 32)) */ -#elif defined(QTHREAD_MUTEX_INCREMENT) - QTHREAD_FASTLOCK_LOCK(lock); - retval = (*operand)++; - *operand *= (*operand < max); - QTHREAD_FASTLOCK_UNLOCK(lock); - #elif QTHREAD_ATOMIC_CAS aligned_t oldval, newval; @@ -651,7 +558,7 @@ static QINLINE aligned_t qthread_internal_incr_mod_( return retval; } /*}}} */ -static QINLINE void *qt_internal_atomic_swap_ptr(void **addr, +static inline void *qt_internal_atomic_swap_ptr(void **addr, void *newval) { /*{{{*/ void *oldval = atomic_load_explicit((void *_Atomic *)addr, memory_order_relaxed); diff --git a/include/qt_blocking_structs.h b/include/qt_blocking_structs.h index 15de71813..6a0cf1f84 100644 --- a/include/qt_blocking_structs.h +++ b/include/qt_blocking_structs.h @@ -82,14 +82,14 @@ extern qt_mpool generic_addrstat_pool; #else extern qt_mpool generic_addrres_pool; -static QINLINE qthread_addrres_t *ALLOC_ADDRRES(void) { /*{{{ */ +static inline qthread_addrres_t *ALLOC_ADDRRES(void) { /*{{{ */ qthread_addrres_t *tmp = (qthread_addrres_t *)qt_mpool_alloc(generic_addrres_pool); return tmp; } /*}}} */ -static QINLINE void FREE_ADDRRES(qthread_addrres_t *t) { /*{{{ */ +static inline void FREE_ADDRRES(qthread_addrres_t *t) { /*{{{ */ qt_mpool_free(generic_addrres_pool, t); } /*}}} */ diff --git a/include/qt_debug.h b/include/qt_debug.h index f72faf935..e0ae4dacb 100644 --- a/include/qt_debug.h +++ b/include/qt_debug.h @@ -12,7 +12,7 @@ #define ALLOC_SCRIBBLE(ptr, sz) memset((ptr), 0x55, (sz)) #define FREE_SCRIBBLE(ptr, sz) memset((ptr), 0x77, (sz)) -static QINLINE void *MALLOC(size_t sz) { +static inline void *MALLOC(size_t sz) { void *ret = qt_malloc(sz); ALLOC_SCRIBBLE(ret, sz); @@ -254,7 +254,7 @@ extern QTHREAD_FASTLOCK_TYPE output_lock; #define qthread_debug(level, format, ...) \ qthread_debug_(level, "%s(%u): " format, __FUNCTION__, __LINE__, __VA_ARGS__) -static QINLINE void qthread_debug_(int level, char const *format, ...) +static inline void qthread_debug_(int level, char const *format, ...) { /*{{{ */ va_list args; diff --git a/include/qt_gcd.h b/include/qt_gcd.h index 1b9e4e0b5..be94ccc30 100644 --- a/include/qt_gcd.h +++ b/include/qt_gcd.h @@ -4,7 +4,7 @@ #include -static QINLINE size_t qt_gcd(size_t a, size_t b) { +static inline size_t qt_gcd(size_t a, size_t b) { #ifdef QTHREAD_SHIFT_GCD size_t k = 0; if (a == 0) return b; @@ -32,7 +32,7 @@ static QINLINE size_t qt_gcd(size_t a, size_t b) { #endif } -static QINLINE size_t qt_lcm(size_t a, size_t b) { /*{{{ */ +static inline size_t qt_lcm(size_t a, size_t b) { /*{{{ */ size_t tmp = qt_gcd(a, b); /* on 32 bit platforms, it's pretty easy for a * b to overflow so we force * 64 bit multiplication*/ diff --git a/include/qt_profiling.h b/include/qt_profiling.h index 671536535..55bfebfee 100644 --- a/include/qt_profiling.h +++ b/include/qt_profiling.h @@ -73,7 +73,7 @@ #define QTHREAD_FEB_UNIQUERECORD2(TYPE, ADDR, SHEP) \ qt_hash_put((SHEP)->unique##TYPE##addrs, (void *)(ADDR), (void *)(ADDR)) -static QINLINE void +static inline void qthread_unique_collect(qt_key_t const key, void *value, void *id) { /*{{{*/ qt_hash_put_locked((qt_hash)id, key, value); } /*}}}*/ diff --git a/include/qt_shepherd_innards.h b/include/qt_shepherd_innards.h index 2c9b4a1e4..bf2a6d9f3 100644 --- a/include/qt_shepherd_innards.h +++ b/include/qt_shepherd_innards.h @@ -99,13 +99,6 @@ struct qthread_shepherd_s { size_t num_threads; /* number of threads handled */ #endif #ifdef QTHREAD_FEB_PROFILING -#ifdef QTHREAD_MUTEX_INCREMENT - qt_hash uniqueincraddrs; /* the unique addresses that are incremented */ - double incr_maxtime; /* maximum time spent in a single increment */ - double incr_time; /* total time spent incrementing */ - size_t incr_count; /* number of increments */ -#endif - qt_hash uniquelockaddrs; /* the unique addresses that are locked */ double aquirelock_maxtime; /* max time spent aquiring locks */ double aquirelock_time; /* total time spent aquiring locks */ @@ -132,7 +125,7 @@ struct qthread_shepherd_s { extern TLS_DECL(qthread_shepherd_t *, shepherd_structs); -static QINLINE qthread_shepherd_t *qthread_internal_getshep(void) { +static inline qthread_shepherd_t *qthread_internal_getshep(void) { qthread_worker_t *w = (qthread_worker_t *)TLS_GET(shepherd_structs); if (w == NULL) { return NULL; @@ -141,7 +134,7 @@ static QINLINE qthread_shepherd_t *qthread_internal_getshep(void) { } } -static QINLINE qthread_worker_t *qthread_internal_getworker(void) { +static inline qthread_worker_t *qthread_internal_getworker(void) { return (qthread_worker_t *)TLS_GET(shepherd_structs); } diff --git a/include/qt_threadqueue_stack.h b/include/qt_threadqueue_stack.h index e58105ebb..65dc68c64 100644 --- a/include/qt_threadqueue_stack.h +++ b/include/qt_threadqueue_stack.h @@ -34,15 +34,15 @@ static void qt_stack_free(qt_stack_t *stack) { stack->base = stack->top = stack->capacity = 0; } -static QINLINE int qt_stack_is_empty(qt_stack_t *stack) { +static inline int qt_stack_is_empty(qt_stack_t *stack) { return (stack->empty); } -static QINLINE int qt_stack_is_full(qt_stack_t *stack) { +static inline int qt_stack_is_full(qt_stack_t *stack) { return (stack->base == ((stack->top + 1) % stack->capacity)); } -static QINLINE int qt_stack_size(qt_stack_t *stack) { +static inline int qt_stack_size(qt_stack_t *stack) { if (stack->top >= stack->base) { return (stack->top - stack->base); } else { @@ -70,21 +70,21 @@ static void qt_stack_resize(qt_stack_t *stack) { free(old_storage); } -static QINLINE void qt_stack_push(qt_stack_t *stack, qthread_t *t) { +static inline void qt_stack_push(qt_stack_t *stack, qthread_t *t) { if (qt_stack_is_full(stack)) { qt_stack_resize(stack); } stack->top = (stack->top + 1) % (stack->capacity); stack->storage[stack->top] = t; stack->empty = 0; } -static QINLINE void qt_stack_enq_base(qt_stack_t *stack, qthread_t *t) { +static inline void qt_stack_enq_base(qt_stack_t *stack, qthread_t *t) { if (qt_stack_is_full(stack)) { qt_stack_resize(stack); } stack->storage[stack->base] = t; stack->base = (stack->base - 1 + stack->capacity) % (stack->capacity); stack->empty = 0; } -static QINLINE qthread_t *qt_stack_pop(qt_stack_t *stack) { +static inline qthread_t *qt_stack_pop(qt_stack_t *stack) { if (qt_stack_is_empty(stack)) { return (NULL); } qthread_t *t = stack->storage[stack->top]; assert(t != NULL); diff --git a/include/qthread/common.h.in b/include/qthread/common.h.in index 79f047437..aa1bc1efc 100644 --- a/include/qthread/common.h.in +++ b/include/qthread/common.h.in @@ -8,7 +8,7 @@ #define QTHREAD_COMMON_H /* Whether C compiler supports GCC style inline assembly */ -#undef HAVE_GCC_INLINE_ASSEMBLY +#define HAVE_GCC_INLINE_ASSEMBLY /* if the compiler supports inline assembly, we can prevent reordering */ #undef COMPILER_FENCE @@ -16,19 +16,6 @@ /* Architecture type of assembly to use */ #undef QTHREAD_ASSEMBLY_ARCH -/* use mutexes when incrementing, rather than architecture-specific assembly - */ -#undef QTHREAD_MUTEX_INCREMENT - -/* use inlined functions */ -#undef QTHREAD_INLINE - -/* if the compiler supports __attribute__((deprecated)) */ -#undef Q_DEPRECATED - -/* Allow functions to be inlined */ -#undef QINLINE - #ifndef __powerpc #define BITFIELD_ORDER_REVERSE #else @@ -40,19 +27,19 @@ #endif /* builtin cas supported */ -#undef QTHREAD_ATOMIC_CAS +#define QTHREAD_ATOMIC_CAS 1 /* if the compiler supports __sync_val_compare_and_swap on 32-bit ints */ -#undef QTHREAD_ATOMIC_CAS32 +#define QTHREAD_ATOMIC_CAS32 1 /* if the compiler supports __sync_val_compare_and_swap on 64-bit ints */ -#undef QTHREAD_ATOMIC_CAS64 +#define QTHREAD_ATOMIC_CAS64 1 /* if the compiler supports __sync_val_compare_and_swap on pointers */ -#undef QTHREAD_ATOMIC_CAS_PTR +#define QTHREAD_ATOMIC_CAS_PTR 1 /* builtin incr supported */ -#undef QTHREAD_ATOMIC_INCR +#define QTHREAD_ATOMIC_INCR 1 #ifdef __cplusplus #ifdef __GNUC__ diff --git a/include/qthread/qarray.h b/include/qthread/qarray.h index 4a5fdae8e..f45b3e34c 100644 --- a/include/qthread/qarray.h +++ b/include/qthread/qarray.h @@ -105,7 +105,7 @@ void qarray_dist_like(qarray const *ref, qarray *mod); #define qarray_elem(a, i) qarray_elem_nomigrate(a, i) void *qarray_elem_migrate(qarray const *a, size_t const index); -QINLINE static void *qarray_elem_nomigrate(qarray const *a, +inline static void *qarray_elem_nomigrate(qarray const *a, size_t const index) { if ((a == NULL) || (index > a->count)) { return NULL; } diff --git a/include/qthread/qthread.h b/include/qthread/qthread.h index d8971f284..07b266759 100644 --- a/include/qthread/qthread.h +++ b/include/qthread/qthread.h @@ -675,16 +675,6 @@ int qthread_spinlocks_destroy(qthread_spinlock_t *a); int qthread_lock_init(aligned_t const *a, bool const is_recursive); int qthread_lock_destroy(aligned_t *a); -#if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32 -uint32_t qthread_incr32_(uint32_t *, int32_t); -uint64_t qthread_incr64_(uint64_t *, int64_t); -float qthread_fincr_(float *, float); -double qthread_dincr_(double *, double); -uint32_t qthread_cas32_(uint32_t *, uint32_t, uint32_t); -uint64_t qthread_cas64_(uint64_t *, uint64_t, uint64_t); -#endif // if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == - // QTHREAD_POWERPC32) - /* the following three functions implement variations on atomic increment. It * is done with architecture-specific assembly (on supported architectures, * when possible) and does NOT use FEB's or lock/unlock unless the architecture @@ -692,11 +682,8 @@ uint64_t qthread_cas64_(uint64_t *, uint64_t, uint64_t); * All of these functions return the value of the contents of the operand * *after* incrementing. */ -static QINLINE float qthread_fincr(float *operand, float incr) { /*{{{ */ -#if defined(QTHREAD_MUTEX_INCREMENT) - return qthread_fincr_(operand, incr); - -#elif QTHREAD_ATOMIC_CAS && !defined(HAVE_GCC_INLINE_ASSEMBLY) +static inline float qthread_fincr(float *operand, float incr) { /*{{{ */ +#if QTHREAD_ATOMIC_CAS union { float f; uint32_t i; @@ -713,7 +700,7 @@ static QINLINE float qthread_fincr(float *operand, float incr) { /*{{{ */ #elif !defined(HAVE_GCC_INLINE_ASSEMBLY) #error Qthreads requires either mutex increments, inline assembly, or compiler CAS builtins -#else // if defined(QTHREAD_MUTEX_INCREMENT) +#else // if QTHREAD_ATOMIC_CAS #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || \ (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) union { @@ -803,15 +790,11 @@ static QINLINE float qthread_fincr(float *operand, float incr) { /*{{{ */ #error Unsupported assembly architecture for qthread_fincr #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || // (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) -#endif // if defined(QTHREAD_MUTEX_INCREMENT) +#endif // if QTHREAD_ATOMIC_CAS } /*}}} */ -static QINLINE double qthread_dincr(double *operand, double incr) { /*{{{ */ -#if defined(QTHREAD_MUTEX_INCREMENT) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) - return qthread_dincr_(operand, incr); - -#elif QTHREAD_ATOMIC_CAS && !defined(HAVE_GCC_INLINE_ASSEMBLY) +static inline double qthread_dincr(double *operand, double incr) { /*{{{ */ +#if QTHREAD_ATOMIC_CAS union { uint64_t i; double d; @@ -828,8 +811,7 @@ static QINLINE double qthread_dincr(double *operand, double incr) { /*{{{ */ #elif !defined(HAVE_GCC_INLINE_ASSEMBLY) #error Qthreads requires either mutex increments, inline assembly, or compiler CAS builtins -#else // if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == - // QTHREAD_POWERPC32) +#else // if QTHREAD_ATOMIC_CAS #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) uint64_t scratch_int; double incremented_value; @@ -997,19 +979,15 @@ static QINLINE double qthread_dincr(double *operand, double incr) { /*{{{ */ #else // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) #error Unimplemented assembly architecture for qthread_dincr #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) -#endif // if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == - // QTHREAD_POWERPC32) +#endif // if QTHREAD_ATOMIC_CAS } /*}}} */ -static QINLINE uint32_t qthread_incr32(uint32_t *operand, +static inline uint32_t qthread_incr32(uint32_t *operand, uint32_t incr) { /*{{{ */ -#ifdef QTHREAD_MUTEX_INCREMENT - return qthread_incr32_(operand, incr); - -#elif defined(QTHREAD_ATOMIC_INCR) +#if defined(QTHREAD_ATOMIC_INCR) return __sync_fetch_and_add(operand, incr); -#elif !defined(HAVE_GCC_INLINE_ASSEMBLY) && QTHREAD_ATOMIC_CAS +#elif QTHREAD_ATOMIC_CAS uint32_t oldval, newval; do { oldval = *operand; @@ -1020,7 +998,7 @@ static QINLINE uint32_t qthread_incr32(uint32_t *operand, #elif !defined(HAVE_GCC_INLINE_ASSEMBLY) #error Qthreads requires either mutex increments, inline assembly, or compiler atomic builtins -#else // ifdef QTHREAD_MUTEX_INCREMENT +#else // if defined(QTHREAD_ATOMIC_INCR) #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || \ (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) uint32_t retval; @@ -1052,19 +1030,15 @@ static QINLINE uint32_t qthread_incr32(uint32_t *operand, #error Unimplemented assembly architecture for qthread_incr32 #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || // (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) -#endif // ifdef QTHREAD_MUTEX_INCREMENT +#endif // if defined(QTHREAD_ATOMIC_INCR) } /*}}} */ -static QINLINE uint64_t qthread_incr64(uint64_t *operand, +static inline uint64_t qthread_incr64(uint64_t *operand, uint64_t incr) { /*{{{ */ -#if defined(QTHREAD_MUTEX_INCREMENT) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) - return qthread_incr64_(operand, incr); - -#elif defined(QTHREAD_ATOMIC_INCR) +#if defined(QTHREAD_ATOMIC_INCR) return __sync_fetch_and_add(operand, incr); -#elif !defined(HAVE_GCC_INLINE_ASSEMBLY) && QTHREAD_ATOMIC_CAS +#elif QTHREAD_ATOMIC_CAS uint64_t oldval, newval; do { oldval = *operand; @@ -1075,7 +1049,7 @@ static QINLINE uint64_t qthread_incr64(uint64_t *operand, #elif !defined(HAVE_GCC_INLINE_ASSEMBLY) #error Qthreads requires either mutex increments, inline assembly, or compiler atomic builtins -#else // if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32 +#else // if defined(QTHREAD_ATOMIC_CAS) #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) uint64_t retval; uint64_t incrd = incrd; /* no initializing */ @@ -1167,10 +1141,10 @@ static QINLINE uint64_t qthread_incr64(uint64_t *operand, #else // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) #error Unimplemented assembly architecture for qthread_incr64 #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) -#endif // if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32 +#endif // if defined(QTHREAD_ATOMIC_INCR) } /*}}} */ -static QINLINE int64_t qthread_incr_xx(void *addr, +static inline int64_t qthread_incr_xx(void *addr, int64_t incr, size_t length) { /*{{{ */ switch (length) { @@ -1185,15 +1159,10 @@ static QINLINE int64_t qthread_incr_xx(void *addr, uint64_t qthread_syncvar_incrF(syncvar_t *restrict operand, uint64_t inc); -#if !defined(QTHREAD_ATOMIC_CAS) || defined(QTHREAD_MUTEX_INCREMENT) -static QINLINE uint32_t qthread_cas32(uint32_t *operand, +#if !defined(QTHREAD_ATOMIC_CAS) +static inline uint32_t qthread_cas32(uint32_t *operand, uint32_t oldval, uint32_t newval) { /*{{{ */ -#ifdef QTHREAD_MUTEX_INCREMENT // XXX: this is only valid if you don't read - // *operand without the lock - return qthread_cas32_(operand, oldval, newval); - -#else #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || \ (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) uint32_t result; @@ -1230,16 +1199,11 @@ static QINLINE uint32_t qthread_cas32(uint32_t *operand, #error Unimplemented assembly architecture for qthread_cas32 #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || // (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) -#endif // ifdef QTHREAD_MUTEX_INCREMENT } /*}}} */ -static QINLINE uint64_t qthread_cas64(uint64_t *operand, +static inline uint64_t qthread_cas64(uint64_t *operand, uint64_t oldval, uint64_t newval) { /*{{{ */ -#ifdef QTHREAD_MUTEX_INCREMENT - return qthread_cas64_(operand, oldval, newval); - -#else #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) uint64_t result; __asm__ __volatile__("A_%=:\n\t" @@ -1322,10 +1286,9 @@ static QINLINE uint64_t qthread_cas64(uint64_t *operand, #else // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) #error Unimplemented assembly architecture for qthread_cas64 #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) -#endif // ifdef QTHREAD_MUTEX_INCREMENT } /*}}} */ -static QINLINE aligned_t qthread_cas_xx(aligned_t *addr, +static inline aligned_t qthread_cas_xx(aligned_t *addr, aligned_t oldval, aligned_t newval, size_t length) { /*{{{ */ @@ -1342,7 +1305,7 @@ static QINLINE aligned_t qthread_cas_xx(aligned_t *addr, return 0; /* compiler check */ } /*}}} */ -static QINLINE void * +static inline void * qthread_cas_ptr_(void **addr, void *oldval, void *newval) { /*{{{*/ #if (QTHREAD_BITS == 32) return (void *)(uintptr_t)qthread_cas32( @@ -1397,7 +1360,7 @@ Q_ENDCXX /* */ #ifndef __cplusplus -#if defined(QTHREAD_ATOMIC_INCR) && !defined(QTHREAD_MUTEX_INCREMENT) +#if defined(QTHREAD_ATOMIC_INCR) #define qthread_incr(ADDR, INCVAL) __sync_fetch_and_add(ADDR, INCVAL) #else #define qthread_incr(ADDR, INCVAL) \ diff --git a/include/qthread/syncvar.hpp b/include/qthread/syncvar.hpp index 174040065..6d8bf6d4a 100644 --- a/include/qthread/syncvar.hpp +++ b/include/qthread/syncvar.hpp @@ -9,22 +9,22 @@ class syncvar; class syncvar { public: - QINLINE syncvar(void) noexcept { + inline syncvar(void) noexcept { // Doing it this way because extended initializers (e.g. // SYNCVAR_STATIC_INITIALIZER) are not (yet) supported by C++ the_syncvar_t.u.w = 0; } - QINLINE syncvar(uint64_t const &val) noexcept { + inline syncvar(uint64_t const &val) noexcept { assert(!(val & 0xf000000000000000ull)); the_syncvar_t.u.s.data = val; } - QINLINE syncvar(syncvar const &val) noexcept { + inline syncvar(syncvar const &val) noexcept { the_syncvar_t.u.w = val.the_syncvar_t.u.w; } - QINLINE syncvar(syncvar_t const &val) { the_syncvar_t.u.w = val.u.w; } + inline syncvar(syncvar_t const &val) { the_syncvar_t.u.w = val.u.w; } int empty(void) { return qthread_syncvar_empty(&the_syncvar_t); } diff --git a/include/qthread_innards.h b/include/qthread_innards.h index 4c819e41b..018c12f7a 100644 --- a/include/qthread_innards.h +++ b/include/qthread_innards.h @@ -38,13 +38,7 @@ typedef struct uint64_strip_s { typedef struct qlib_s { unsigned int nshepherds; aligned_t nshepherds_active; -#ifdef QTHREAD_MUTEX_INCREMENT - QTHREAD_FASTLOCK_TYPE nshepherds_active_lock; -#endif aligned_t nworkers_active; -#ifdef QTHREAD_MUTEX_INCREMENT - QTHREAD_FASTLOCK_TYPE nworkers_active_lock; -#endif unsigned int nworkerspershep; struct qthread_shepherd_s *shepherds; qt_threadqueue_t **threadqueues; @@ -109,14 +103,6 @@ typedef struct qlib_s { aligned_t sched_shepherd; QTHREAD_FASTLOCK_TYPE sched_shepherd_lock; -#if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32 - QTHREAD_FASTLOCK_TYPE *atomic_locks; -#ifdef QTHREAD_COUNT_THREADS - aligned_t *atomic_stripes; - QTHREAD_FASTLOCK_TYPE *atomic_stripes_locks; -#endif -#endif - /*AGG cost method, call method and max cost * defined in qthreads or given by the user at qthread initialization */ diff --git a/scripts/build.pl b/scripts/build.pl deleted file mode 100755 index 158eeb199..000000000 --- a/scripts/build.pl +++ /dev/null @@ -1,378 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; - -use Cwd qw/getcwd/; - -# Setup configuration options -my @default_conf_names = ('compat', 'unpooled', 'opt', 'nemesis', 'lifo', 'mutexfifo', 'slowcontext', 'shep_profile', 'lock_profile', 'steal_profile', 'tc_profile', 'hi_st', 'hi_mt', 'dev'); - -my %config = ( - default => '', - icc => 'CC=icc CXX=icc', - oldgcc => 'CC=gcc34 CXX=g++34', - compat => 'CFLAGS="-m32" CXXFLAGS="-m32" LDFLAGS="-m32" CPPFLAGS="-m32"', - unpooled => '--disable-pooled-memory', - opt => 'CFLAGS="-O3" CXXFLAGS="-O3"', - nemesis => '--with-scheduler=nemesis', - lifo => '--with-scheduler=lifo', - mutexfifo => '--with-scheduler=mutexfifo', - mtsfifo => '--with-scheduler=mtsfifo', - nottingham => '--with-scheduler=nottingham', - slowcontext => '--disable-fastcontext', - shavit => '--with-dict=shavit', - shep_profile => '--enable-profiling=shepherd', - lock_profile => '--enable-profiling=feb', - steal_profile => '--enable-profiling=steal', - tc_profile => '--enable-profiling=threadc', - hi_st => '--disable-hardware-atomics --with-scheduler=nemesis', - hi_mt => '--disable-hardware-atomics --with-scheduler=sherwood', - dev => 'CFLAGS="-g -O0" CXXFLAGS="-g -O0" --enable-debug --enable-guard-pages --enable-asserts --enable-static --disable-shared --enable-valgrind --disable-pooled-memory --enable-aligncheck', - debug => 'CFLAGS="-g -O0" CXXFLAGS="-g -O0" --enable-debug --enable-static --disable-shared', - hwloc => '--with-topology=hwloc', - sinc_stats => '--enable-profiling=sincs', - oversubscription => '--enable-oversubscription', - guard_pages => '--enable-guard-pages', - chapel_default => '--enable-static --disable-shared --enable-condwait-queue --disable-spawn-cache --with-scheduler=nemesis', -); - -my @summaries; - -# Collect command-line options -my @conf_names; -my @check_tests; -my @user_configs; -my $qt_src_dir = ''; -my $qt_bld_dir = ''; -my $qt_install_dir = ''; -my $repeat = 1; -my $make_flags = ''; -my $force_configure = 0; -my $force_clean = 0; -my $print_info = 0; -my $dry_run = 0; -my $quietly = 0; -my $need_help = 0; - -if (scalar @ARGV == 0) { - $need_help = 1; -} else { - while (@ARGV) { - my $flag = shift @ARGV; - - if ($flag =~ m/--configs=(.*)/) { - @conf_names = split(/,/, $1); - } elsif ($flag =~ m/--with-config=(.*)/) { - push @user_configs, $1; - } elsif ($flag =~ m/--source-dir=(.*)/) { - $qt_src_dir = $1; - } elsif ($flag =~ m/--build-dir=(.*)/) { - $qt_bld_dir = $1; - } elsif ($flag =~ m/--install-dir=(.*)/) { - $qt_install_dir = $1; - } elsif ($flag =~ m/--repeat=(.*)/) { - $repeat = int($1); - } elsif ($flag =~ m/--make-flags=(.*)/) { - $make_flags = $1; - } elsif ($flag eq '--force-configure') { - $force_configure = 1; - } elsif ($flag eq '--force-clean') { - $force_clean = 1; - } elsif ($flag eq '--verbose' || $flag eq '-v') { - $print_info = 1; - } elsif ($flag eq '--dry-run') { - $dry_run = 1; - } elsif ($flag eq '--quietly') { - $quietly = 1; - } elsif ($flag =~ m/--tests=(.*)/) { - @check_tests = split(/,/,$1) unless ($1 eq 'all') - } elsif ($flag eq '--help' || $flag eq '-h') { - $need_help = 1; - } else { - print "Unsupported option '$flag'.\n"; - exit(1); - } - } -} - -# Aggregate configuration options -while (@user_configs) { - my $user_config = pop @user_configs; - my $id = scalar @user_configs; - my $name = "userConfig$id"; - - push @conf_names, $name; - $config{$name} = $user_config; -} -if (scalar @conf_names == 0) { push @conf_names, 'default' }; -@conf_names = sort @conf_names; - -if ($need_help) { - print "usage: perl build.pl [options]\n"; - print "Options:\n"; - print "\t--configs= comma-separated list of configurations.\n"; - print "\t configuration options can be concatenated using\n"; - print "\t the '+' operator (e.g., 'conf1+conf2').\n"; - print "\t 'all' may be used as an alias for all known\n"; - print "\t configurations.\n"; - print "\t--with-config= a user-specified string of configuration\n"; - print "\t options. Essentially, this is used to define\n"; - print "\t an unnamed 'config', whereas the previous\n"; - print "\t uses pre-defined, named configs. This option\n"; - print "\t can be used multiple times.\n"; - print "\t--tests= comma-separated list of test suites. Valid\n"; - print "\t test suites are 'basics', 'features', and\n"; - print "\t 'stress'. The default is to run all three.\n"; - print "\t--source-dir= absolute path to Qthreads source.\n"; - print "\t--build-dir= absolute path to target build directory.\n"; - print "\t--install-dir= absolute path to target installation directory.\n"; - print "\t--repeat= run `make check` times per configuration.\n"; - print "\t--make-flags= options to pass to make (e.g. '-j 4').\n"; - print "\t--force-configure run `configure` again.\n"; - print "\t--force-clean run `make clean` before rebuilding.\n"; - print "\t--quietly only report warnings, errors, and summary stats.\n"; - print "\t--verbose\n"; - print "\t--dry-run\n"; - print "\t--help\n"; - - print "Configurations:\n"; - my @names = sort keys %config; - for my $name (@names) { - print "\t$name:\n\t\t'$config{$name}'\n"; - } - - exit(1); -} - -# Clean up and sanity check script options -my $use_all = 0; -foreach my $name (@conf_names) { - if ($name eq 'all') { - $use_all = 1; - } elsif (not exists $config{$name}) { - my @subconf_names = split(/\+/, $name); - my @subconf_profiles = (); - foreach my $subname (@subconf_names) { - if (exists $config{$subname}) { - push @subconf_profiles, $config{$subname}; - } else { - print "Invalid configuration option '$subname'\n"; - exit(1); - } - } - - $config{$name} = join(' ', @subconf_profiles); - } -} -if ($use_all) { - @conf_names = @default_conf_names; -} - -if ($qt_src_dir eq '') { - $qt_src_dir = getcwd; - if ((not -e "$qt_src_dir/README") || - (my_system("grep -q 'QTHREADS!' $qt_src_dir/README") != 0)) { - print "Could not find the source directory; try using --source-dir.\n"; - exit(1); - } -} elsif (not $qt_src_dir =~ m/^\//) { - print "Specify full path for source dir '$qt_src_dir'\n"; - exit(1); -} - -if ($qt_bld_dir eq '') { - $qt_bld_dir = "$qt_src_dir/build"; -} elsif (not $qt_bld_dir =~ m/^\//) { - print "Specify full path for build dir '$qt_bld_dir'\n"; - exit(1); -} - -if ($qt_install_dir eq '') { -} elsif (not $qt_install_dir =~ m/^\//) { - print "Specify full path for installation dir '$qt_install_dir'\n"; - exit(1); -} else { - foreach my $name (@conf_names) { - $config{$name} = join(' ', "--prefix=$qt_install_dir/$name"); - } -} - -# Optionally print information about the configuration -if ($print_info) { - print "Configurations: @conf_names\n"; - print "Source directory: $qt_src_dir\n"; - print "Build directory: $qt_bld_dir\n"; -} -if (not $qt_install_dir eq '') { - print "Install directory: $qt_install_dir\n"; -} - -# Run the test configurations -foreach my $conf_name (@conf_names) { - run_tests($conf_name); -} - -# Print a summary report -print "\n" . '=' x 50; -print "\nSummary:\n"; -foreach my $summary (@summaries) { - print "$summary\n"; -} -print '=' x 50 . "\n"; - -exit(0); - -################################################################################ - -sub run_tests { - my $conf_name = $_[0]; - my $test_dir = "$qt_bld_dir/$conf_name"; - - print "\n### Test: $conf_name\n" unless $quietly; - print "### Build directory: $test_dir\n" unless $quietly; - - # Setup for configuration - if (not -e "$qt_src_dir/configure") { - print "###\tGenerating configure script ...\n" if ($print_info); - my_system("cd $qt_src_dir && sh ./autogen.sh"); - } - - # Setup build space - print "###\tConfiguring '$conf_name' ...\n" unless $quietly; - my $configure_log = "$test_dir/build.configure.log"; - my_system("mkdir -p $test_dir") if (not -e $test_dir); - if (not $qt_install_dir eq '') { - my_system("mkdir -p $qt_install_dir/$conf_name") if (not -e "$qt_install_dir/$conf_name"); - } - my_system("cd $test_dir && $qt_src_dir/configure $config{$conf_name} 2>&1 | tee $configure_log") - if ($force_configure || not -e "$test_dir/config.log"); - print "### Log: $configure_log\n" unless $quietly; - - # Build library - print "###\tBuilding '$conf_name' ...\n" unless $quietly; - my $build_log = "$test_dir/build.make.log"; - my $build_command = "cd $test_dir"; - $build_command .= " && make clean > /dev/null" if ($force_clean); - $build_command .= " && make $make_flags 2>&1 | tee $build_log"; - if (not $qt_install_dir eq '') { - print "###\tInstalling '$conf_name' ...\n" unless $quietly; - $build_command .= " && make $make_flags install 2>&1 | tee $build_log"; - } - my_system($build_command); - if (not $dry_run) { - my $build_warnings = qx/awk '\/warning:\/' $build_log/; - if (length $build_warnings > 0) { - print "Build warnings in config $conf_name! Check log and/or run again with --force-clean and --verbose for more information.\n"; - print $build_warnings; - } - my $build_errors = qx/awk '\/error:\/' $build_log/; - if (length $build_errors > 0) { - print "Build error in config $conf_name! Check log and/or run again with --verbose for more information.\n"; - print $build_errors; - exit(1); - } - } - - # Build testsuite - my %failcounts; - my $failing_tests = 0; - my $passing_tests = 0; - my $pass = 1; - while ($pass <= $repeat) { - print "###\tBuilding and testing '$conf_name' pass $pass ...\n" - unless $quietly; - my $results_log = "$test_dir/build.$pass.results.log"; - print "### Log: $results_log\n" unless $quietly; - print "### Results for '$conf_name'\n" unless $quietly; - my $banner = '=' x 50; - print "$banner\n" unless $quietly; - - my @make_test_suites = ('basics', 'features', 'stress'); - if (scalar @check_tests == 0) { @check_tests = @make_test_suites}; - foreach my $make_test_suite (@check_tests) { - my $check_command = "cd $test_dir"; - $check_command .= " && make clean > /dev/null" if ($force_clean); - $check_command .= " && make $make_flags -C test/$make_test_suite check 2>&1 | tee $results_log"; - my_system($check_command); - if (not $dry_run) { - my $check_warnings = qx/awk '\/warning:\/' $results_log/; - if (length $check_warnings > 0) { - print "Build warnings in config $conf_name! Check log and/or run again with --force-clean and --verbose for more information.\n"; - print $check_warnings; - } - my $check_errors = qx/awk '\/error:\/' $results_log/; - if (length $check_errors > 0) { - print "Build error in config $conf_name! Check log and/or run again with --verbose for more information.\n"; - print $check_errors; - exit(1); - } - - # Display filtered results - my $digest = qx/grep 'tests passed' $results_log/; - my $digest_msg = ''; - if ($digest eq '') { - $digest = qx/grep '# PASS:' $results_log/; - } - if ($digest eq '') { - $digest = qx/grep 'tests failed' $results_log/; chomp($digest); - $digest =~ /([0-9]+) of ([0-9]+) tests failed/; - $failing_tests += $1; - $passing_tests += $2 - $1; - my $fails = qx/awk '\/FAIL\/{print \$2}' $results_log/; - my $fail_list .= join(',', split(/\n/, $fails)); - foreach my $test (split(/\n/, $fails)) { - $failcounts{$test} ++; - } - $digest_msg = $failing_tests . ' tests failed'; - $digest_msg .= " ($fail_list)"; - } else { - chomp $digest; - $digest = qx/grep 'All .* tests passed' $results_log/; - if ($digest eq '') { - $digest = qx/grep 'TOTAL:' $results_log/; - $digest =~ /TOTAL: ([0-9]+)/; - $passing_tests += $1; - } else { - $digest =~ /All ([0-9]+) tests passed/; - $passing_tests += $1; - } - $digest_msg = $passing_tests . ' tests passed'; - } - print "$digest_msg - $make_test_suite\n" unless $quietly; - } - } - print "$banner\n" unless $quietly; - - $pass++; - } - if (not $dry_run) { - my $summary = sprintf("%17s: ", $conf_name); - if ($failing_tests eq 0) { - $summary .= "All $passing_tests tests passed"; - } elsif ($passing_tests eq 0) { - $summary .= "All $failing_tests tests FAILED!!!"; - } else { - $summary .= "$passing_tests test".(($passing_tests!=1)?"s":"")." passed, "; - $summary .= "$failing_tests test".(($failing_tests!=1)?"s":"")." failed ("; - foreach my $test (keys(%failcounts)) { - $summary .= "$test:$failcounts{$test} "; - } - chop($summary); - $summary .= ")"; - } - push @summaries, $summary; - } -} - -sub my_system { - my $command = $_[0]; - - $command .= " > /dev/null" if (not $print_info); - print "\t\$ $command\n" if ($print_info); - - my $status = system($command) if (not $dry_run); - - return $status; -} -# vim:expandtab diff --git a/src/Makefile.am b/src/Makefile.am index 86813209e..0ccb14772 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -53,10 +53,6 @@ if COMPILE_EUREKAS libqthread_la_SOURCES += eurekas.c endif -if COMPILE_COMPAT_ATOMIC -libqthread_la_SOURCES += compat_atomics.c -endif - include qtimer/Makefile.inc include ds/Makefile.inc include patterns/Makefile.inc @@ -87,12 +83,8 @@ endif EXTRA_DIST += \ threadqueues/distrib_threadqueues.c \ - threadqueues/lifo_threadqueues.c \ threadqueues/nemesis_threadqueues.c \ - threadqueues/mutexfifo_threadqueues.c \ - threadqueues/mtsfifo_threadqueues.c \ threadqueues/sherwood_threadqueues.c \ - threadqueues/nottingham_threadqueues.c \ sincs/donecount.c \ sincs/donecount_cas.c \ sincs/original.c \ diff --git a/src/affinity/hwloc_v2.c b/src/affinity/hwloc_v2.c deleted file mode 100644 index a15a1b7c5..000000000 --- a/src/affinity/hwloc_v2.c +++ /dev/null @@ -1,763 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include - -#include "qt_affinity.h" -#include "qt_asserts.h" /* for qassert() */ -#include "qt_debug.h" -#include "qt_envariables.h" -#include "qt_output_macros.h" -#include "qt_subsystems.h" -#include "shufflesheps.h" - -static hwloc_topology_t sys_topo; -static uint32_t initialized = 0; - -static hwloc_cpuset_t mccoy_thread_bindings; - -static unsigned int qt_topology_output_level = 0; - -typedef struct { - int uid; - qthread_shepherd_id_t shep_id; - qthread_worker_id_t worker_id; - hwloc_obj_t bind_obj; -} topo_worker_t; - -typedef struct { - /* Shepherd mapping info */ - int num_sheps; - int shep_level; - hwloc_obj_t shep_obj; - - /* Worker mapping info */ - int num_workers; - int worker_level; - hwloc_obj_t worker_obj; - - /* linear mapping */ - topo_worker_t *worker_map; - - int num_wps; -} qt_topology_t; - -static qt_topology_t qt_topo; - -static void initialize_qt_topo(void) { - qt_topo.num_sheps = -1; - qt_topo.shep_level = -1; - qt_topo.shep_obj = NULL; - qt_topo.num_workers = -1; - qt_topo.worker_level = -1; - qt_topo.worker_obj = NULL; - qt_topo.num_wps = -1; - qt_topo.worker_map = NULL; -} - -#define HIERARCHY_NAME_LEN 128 -static hwloc_obj_type_t *topo_types; -static char (*topo_type_names)[HIERARCHY_NAME_LEN]; -static int num_types; - -static void print_system_view(hwloc_topology_t sys_topo) { - char str[128]; - hwloc_obj_t obj = NULL; - hwloc_const_cpuset_t allowed_cpuset = - hwloc_topology_get_allowed_cpuset(sys_topo); - - obj = hwloc_get_obj_inside_cpuset_by_depth(sys_topo, allowed_cpuset, 0, 0); - while (NULL != obj) { - int const depth = obj->depth; - int const arity = obj->arity; - int const num_objs = - hwloc_get_nbobjs_inside_cpuset_by_depth(sys_topo, allowed_cpuset, depth); - - hwloc_obj_snprintf(str, sizeof(str), sys_topo, obj, "#", 0); - - printf("TOPO: depth: %d\n", depth); - printf("TOPO:\tarity: %d\n", arity); - printf("TOPO:\tnum_objs: %d\n", num_objs); - printf("TOPO:\ttype: %s\n", str); - - obj = obj->first_child; - } - - int const num_cores = hwloc_get_nbobjs_inside_cpuset_by_type( - sys_topo, allowed_cpuset, HWLOC_OBJ_CACHE_UNIFIED); - - printf("TOPO: number of available COREs: %d\n", num_cores); -} - -static void print_logical_view(void) { - char shep_level[128]; - char worker_level[128]; - char str[128]; - - printf("QT_TOPO: shep_level: %d\n", qt_topo.shep_level); - if (NULL != qt_topo.shep_obj) { - hwloc_obj_type_snprintf( - shep_level, sizeof(shep_level), qt_topo.shep_obj, 0); - printf("QT_TOPO: shep type: %s\n", shep_level); - } - - printf("QT_TOPO: worker_level: %d\n", qt_topo.worker_level); - if (NULL != qt_topo.worker_obj) { - hwloc_obj_type_snprintf( - worker_level, sizeof(worker_level), qt_topo.worker_obj, 0); - printf("QT_TOPO: worker type: %s\n", worker_level); - } - - printf("QT_TOPO: #(sheps): %d\n", qt_topo.num_sheps); - printf("QT_TOPO: #(workers): %d\n", qt_topo.num_workers); - printf("QT_TOPO: #(wps): %d\n", qt_topo.num_wps); - - if (!strcmp(worker_level, "Core")) { - hwloc_obj_snprintf(str, - sizeof(str), - sys_topo, - qt_topo.worker_map[0].bind_obj->first_child, - "#", - 0); - printf("worker_map: {%d(%d,%d, Core containing %s)", - qt_topo.worker_map[0].uid, - qt_topo.worker_map[0].shep_id, - qt_topo.worker_map[0].worker_id, - str); - } else { - hwloc_obj_snprintf( - str, sizeof(str), sys_topo, qt_topo.worker_map[0].bind_obj, "#", 0); - printf("worker_map: {%d(%d,%d,%s)", - qt_topo.worker_map[0].uid, - qt_topo.worker_map[0].shep_id, - qt_topo.worker_map[0].worker_id, - str); - } - - for (int i = 1; i < qt_topo.num_workers; i++) { - if (!strcmp(worker_level, "Core")) { - hwloc_obj_snprintf(str, - sizeof(str), - sys_topo, - qt_topo.worker_map[i].bind_obj->first_child, - "#", - 0); - printf(", %d(%d,%d, Core containing %s)", - qt_topo.worker_map[i].uid, - qt_topo.worker_map[i].shep_id, - qt_topo.worker_map[i].worker_id, - str); - } else { - hwloc_obj_snprintf( - str, sizeof(str), sys_topo, qt_topo.worker_map[i].bind_obj, "#", 0); - printf(", %d(%d,%d,%s)", - qt_topo.worker_map[i].uid, - qt_topo.worker_map[i].shep_id, - qt_topo.worker_map[i].worker_id, - str); - } - } - printf("}\n"); -} - -static void fini_type_options(void) { - qthread_debug(AFFINITY_CALLS, "destroying type options\n"); - - qt_free(topo_types); - qt_free(topo_type_names); -} - -static void init_type_options(void) { - qthread_debug(AFFINITY_CALLS, "creating type options\n"); - - hwloc_const_cpuset_t allowed_cpuset = - hwloc_topology_get_allowed_cpuset(sys_topo); - hwloc_obj_t obj = - hwloc_get_obj_inside_cpuset_by_depth(sys_topo, allowed_cpuset, 0, 0); - - /* Walk down tree */ - while (NULL != obj) { - num_types += 1; - - obj = obj->first_child; - } - - topo_types = qt_malloc(num_types * sizeof(hwloc_obj_type_t)); - topo_type_names = qt_malloc(num_types * HIERARCHY_NAME_LEN * sizeof(char)); - assert(NULL != topo_types); - assert(NULL != topo_type_names); - - /* Walk up tree */ - obj = hwloc_get_obj_inside_cpuset_by_depth( - sys_topo, allowed_cpuset, num_types - 1, 0); - int cache_level = 1; - int type_id = num_types - 1; - while (NULL != obj) { - topo_types[type_id] = obj->type; - - if (0 == hwloc_compare_types(HWLOC_OBJ_CACHE_UNIFIED, obj->type)) { - snprintf(topo_type_names[type_id], 8, "L%dcache", cache_level); - cache_level += 1; - } else { - strncpy(topo_type_names[type_id], - hwloc_obj_type_string(obj->type), - HIERARCHY_NAME_LEN); - } - - type_id -= 1; - obj = obj->parent; - } -} - -static void print_type_options(void) { - printf("type_options: {%s", topo_type_names[0]); - for (int i = 1; i < num_types; i++) { printf(", %s", topo_type_names[i]); } - printf("}\n"); -} - -static hwloc_obj_type_t wkr_type = HWLOC_OBJ_CORE; -static int wkr_index = -1; -static int wkr_depth = -1; - -#if HWLOC_API_VERSION < 0x00010100 -#define hwloc_bitmap_weight hwloc_cpuset_weight -#define hwloc_bitmap_asprintf hwloc_cpuset_asprintf -#define hwloc_bitmap_foreach_begin hwloc_cpuset_foreach_begin -#define hwloc_bitmap_foreach_end hwloc_cpuset_foreach_end -#define hwloc_bitmap_alloc hwloc_cpuset_alloc -#define hwloc_bitmap_free hwloc_cpuset_free -#endif /* if HWLOC_API_VERSION < 0x00010100 */ - -static void qt_affinity_internal_hwloc_teardown(void) { /*{{{*/ - DEBUG_ONLY(hwloc_topology_check(sys_topo)); - - fini_type_options(); - - /* Reestablish mccoy thread bindings */ - hwloc_set_cpubind(sys_topo, mccoy_thread_bindings, HWLOC_CPUBIND_THREAD); - hwloc_bitmap_free(mccoy_thread_bindings); - - qt_free(qt_topo.worker_map); - hwloc_topology_destroy(sys_topo); - initialized = 0; -} /*}}}*/ - -/* - * User hints: - * - QT_TOPO_OUTPUT_LEVEL: The amount of topology information to print. Level - * '2' will print a synopsis of the topology reported - * by hwloc. Currently only level '2' is used. - * - QT_SHEPHERD_BOUNDARY: The level in the hierarchy to associate with - * shepherds. - * - QT_WORKER_UNIT: The level in the hierarchy to use for binding - * workers. - * The worker will be bound to the set of COREs under - * this level. - */ -void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, - qthread_worker_id_t *nbworkers, - size_t *hw_par) { /*{{{ */ - qthread_debug(AFFINITY_CALLS, - "nbshepherds=%u, nbworkers=%u, hw_par=%u\n", - *nbshepherds, - *nbworkers, - *hw_par); - - /* Collect user hints */ - qthread_shepherd_id_t num_sheps_hint = *nbshepherds; - qthread_worker_id_t num_wps_hint = *nbworkers; - size_t num_workers_hint = *hw_par; - - /* Set up logical sys_topo */ - initialize_qt_topo(); - - /* Initialize hwloc setup */ - if (qthread_cas(&initialized, 0, 1) == 0) { - qassert(hwloc_topology_init(&sys_topo), 0); - qassert(hwloc_topology_load(sys_topo), 0); - MACHINE_FENCE; - initialized = 2; - } else { - while (initialized == 1) SPINLOCK_BODY(); - } - DEBUG_ONLY(hwloc_topology_check(sys_topo)); - qthread_internal_cleanup(qt_affinity_internal_hwloc_teardown); - - /* Record mccoy thread bindings so we can reestablish during finalization */ - mccoy_thread_bindings = hwloc_bitmap_alloc(); - hwloc_get_cpubind(sys_topo, mccoy_thread_bindings, HWLOC_CPUBIND_THREAD); - - hwloc_const_cpuset_t allowed_cpuset = - hwloc_topology_get_allowed_cpuset(sys_topo); - - /* Collect CORE info */ - hwloc_obj_t core_obj = hwloc_get_obj_inside_cpuset_by_type( - sys_topo, allowed_cpuset, HWLOC_OBJ_CORE, 0); - if (NULL == core_obj) { - print_error("failed to locate CORE#0\n"); - exit(EXIT_FAILURE); - } - int const num_cores = hwloc_get_nbobjs_inside_cpuset_by_depth( - sys_topo, allowed_cpuset, core_obj->depth); - if (0 >= core_obj) { - print_error("failed to count COREs\n"); - exit(EXIT_FAILURE); - } - - /* Process boundary and unit environment variables */ - int shep_type_id = -1; - int worker_type_id = -1; - - /* Build up sys_topo options */ - init_type_options(); - - /* Collect environment variables */ - { - qt_topology_output_level = - qt_internal_get_env_num("TOPO_OUTPUT_LEVEL", 0, 0); - } - { - char const *qsh = qt_internal_get_env_str("SHEPHERD_BOUNDARY", "node"); - if (qsh) { - for (int ti = 0; ti < num_types; ++ti) { - if (!strncasecmp( - topo_type_names[ti], qsh, strlen(topo_type_names[ti]))) { - shep_type_id = ti; - } - } - if (shep_type_id == -1) { - fprintf(stderr, "unparsable shepherd boundary (%s)\n", qsh); - exit(EXIT_FAILURE); - } - } - - qsh = qt_internal_get_env_str("WORKER_UNIT", "pu"); - if (qsh) { - for (int ti = 0; ti < num_types; ++ti) { - if (!strncasecmp( - topo_type_names[ti], qsh, strlen(topo_type_names[ti]))) { - worker_type_id = ti; - } - } - if (worker_type_id == -1) { - fprintf(stderr, "unparsable worker unit (%s)\n", qsh); - exit(EXIT_FAILURE); - } - } - } - - /* Print system view */ - if (2 == qt_topology_output_level) { - print_system_view(sys_topo); - print_type_options(); - } - - /**************************************************************************/ - - qthread_debug(AFFINITY_DETAILS, "HINTS: max-sheps: %d\n", num_sheps_hint); - qthread_debug(AFFINITY_DETAILS, "HINTS: max-wps: %d\n", num_wps_hint); - qthread_debug(AFFINITY_DETAILS, "HINTS: max-workers: %d\n", num_workers_hint); - qthread_debug(AFFINITY_DETAILS, "HINTS: shep-boundary: %d\n", shep_type_id); - qthread_debug(AFFINITY_DETAILS, "HINTS: worker-unit: %d\n", worker_type_id); - - /* Process boundary and units requests */ - if (-1 != worker_type_id) { - /* User specified worker unit */ - - hwloc_obj_t worker_obj = hwloc_get_obj_inside_cpuset_by_type( - sys_topo, allowed_cpuset, topo_types[worker_type_id], 0); - if (NULL == worker_obj) { - print_error("failed to locate worker unit object\n"); - exit(EXIT_FAILURE); - } - - /* Calculate number of these objects */ - int num_objs = hwloc_get_nbobjs_inside_cpuset_by_type( - sys_topo, allowed_cpuset, topo_types[worker_type_id]); - - qthread_debug(AFFINITY_DETAILS, - "found %d %s unit obj(s)\n", - num_objs, - topo_type_names[worker_type_id]); - - /* Update logical sys_topo info */ - qt_topo.worker_obj = worker_obj; - qt_topo.worker_level = worker_obj->depth; - - /* Update hints */ - } - if (-1 != shep_type_id) { - /* User specified shepherd boundary */ - - hwloc_obj_t shep_obj = hwloc_get_obj_inside_cpuset_by_type( - sys_topo, allowed_cpuset, topo_types[shep_type_id], 0); - if (NULL == shep_obj) { - print_error("failed to locate shepherd boundary object\n"); - exit(EXIT_FAILURE); - } - - /* Calculate number of these objects */ - int const num_shep_objs = hwloc_get_nbobjs_inside_cpuset_by_type( - sys_topo, allowed_cpuset, topo_types[shep_type_id]); - - qthread_debug(AFFINITY_DETAILS, - "found %d %s shep obj(s)\n", - num_shep_objs, - topo_type_names[shep_type_id]); - - /* Calculate number of CORE within boundary: this is max num-workers */ - int const num_shep_cores = num_cores / num_shep_objs; - - /* Update logical sys_topo info */ - qt_topo.shep_obj = shep_obj; - qt_topo.shep_level = shep_obj->depth; - - /* Update hints */ - if (0 == num_sheps_hint || num_shep_objs < num_sheps_hint) { - qthread_debug(AFFINITY_DETAILS, - "%s shep obj => max-sheps=%d\n", - topo_type_names[shep_type_id], - num_shep_objs); - num_sheps_hint = num_shep_objs; - } - if (0 == num_wps_hint || num_shep_cores < num_wps_hint) { - qthread_debug(AFFINITY_DETAILS, - "%s shep obj => max-wps=%d\n", - topo_type_names[shep_type_id], - num_shep_cores); - num_wps_hint = num_shep_cores; - } - } - - /* Establish boundary and unit */ - if (NULL == qt_topo.worker_obj) { - qthread_debug(AFFINITY_DETAILS, - "No worker unit hint, choosing Core unit obj\n"); - - /* Assume unit is CORE; handle boundary selection below. */ - qt_topo.worker_obj = core_obj; - qt_topo.worker_level = core_obj->depth; - } - - if (NULL == qt_topo.shep_obj) { - /* Have only unit, must find boundary */ - - /* Policy: - * - If have num-sheps and num-wps hints, then select first shared level - * that accomodates both values. - * - If have only num-wps hint, then select first shared level that - * accomodates that values. - * - If have only num-sheps hint, then select first shared level that - * accomodates that values. - * - Otherwise select first shared level at or above the unit level. */ - - /* Policy: select the first shared level at or above the unit with - * at least num-wps shared COREs (if requested) and at most - * num-sheps boundary objects (if requested); num-wps should - * take precedence over num-sheps */ - hwloc_obj_t shep_obj = core_obj; - - /* Walk up tree counting shared COREs */ - int shared = 1; - while (NULL != shep_obj->parent) { - shared *= (shep_obj->arity == 0) ? 1 : shep_obj->arity; - - if (0 != num_sheps_hint && 0 != num_wps_hint) { - /* Have both num-sheps and num-wps hints */ - int num_shep_objs = hwloc_get_nbobjs_inside_cpuset_by_depth( - sys_topo, allowed_cpuset, shep_obj->depth); - if (num_shep_objs == num_sheps_hint && shared >= num_wps_hint) { - /* Choose this level */ - break; - } - } else if (0 != num_sheps_hint) { - /* Have only num-sheps hint */ - int num_shep_objs = hwloc_get_nbobjs_inside_cpuset_by_depth( - sys_topo, allowed_cpuset, shep_obj->depth); - if (num_shep_objs == num_sheps_hint) { - /* Choose this level */ - break; - } - } else if (0 != num_wps_hint) { - /* Have only num-wps hint */ - if (shared >= num_wps_hint) { - /* Choose this level */ - break; - } - } else { - /* No hints */ - if (shep_obj->depth <= qt_topo.worker_obj->depth && 1 < shared) { - /* Choose this level */ - break; - } - } - - shep_obj = shep_obj->parent; - } - - /* Update logical sys_topo info */ - qt_topo.shep_obj = shep_obj; - qt_topo.shep_level = shep_obj->depth; - - qthread_debug(AFFINITY_DETAILS, - "chose %s shep obj\n", - topo_type_names[qt_topo.shep_level]); - - /* Update hints */ - } - - /* Sanity-check that boundary and units are reasonable */ - assert(NULL != qt_topo.shep_obj && NULL != qt_topo.worker_obj); - if (qt_topo.shep_level > qt_topo.worker_level) { - print_warning("shepherd boundary smaller than worker unit\n"); - } - - /**************************************************************************/ - - /* Establish num-sheps and num-wps */ - if (0 == num_sheps_hint && 0 == num_wps_hint) { - /* No sys_topo hints */ - qt_topo.num_sheps = hwloc_get_nbobjs_inside_cpuset_by_depth( - sys_topo, allowed_cpuset, qt_topo.shep_obj->depth); - qt_topo.num_wps = num_cores / qt_topo.num_sheps; - } else if (0 == num_wps_hint) { - /* Only sheps hint given */ - int num_sheps_objs = hwloc_get_nbobjs_inside_cpuset_by_depth( - sys_topo, allowed_cpuset, qt_topo.shep_obj->depth); - - if (num_sheps_hint <= num_sheps_objs) { - qt_topo.num_sheps = num_sheps_hint; - } else { - print_warning("disregarding num-sheps hint (%d) in favor of maximum " - "number of objects at shepherd boundary level (%d)\n", - num_sheps_hint, - num_sheps_objs); - qt_topo.num_sheps = num_sheps_objs; - } - qt_topo.num_wps = num_cores / qt_topo.num_sheps; - } else if (0 == num_sheps_hint) { - /* Only WPS hint given */ - qt_topo.num_sheps = hwloc_get_nbobjs_inside_cpuset_by_depth( - sys_topo, allowed_cpuset, qt_topo.shep_obj->depth); - qt_topo.num_wps = num_wps_hint; - } else { - /* Both hints */ - qt_topo.num_sheps = num_sheps_hint; - qt_topo.num_wps = num_wps_hint; - } - - /* Establish num-workers */ - if (0 != num_workers_hint) { - /* Adjust num-wps count to account for requested num-workers */ - if ((num_workers_hint / num_sheps_hint) > num_wps_hint) { - print_error("invalid HWPAR, too many workers\n"); - exit(EXIT_FAILURE); - } - if (0 != num_workers_hint % num_sheps_hint) { - print_error("invalid HWPAR, odd workers\n"); - exit(EXIT_FAILURE); - } - - qt_topo.num_workers = num_workers_hint; - qt_topo.num_wps = num_workers_hint / qt_topo.num_sheps; - } else { - qt_topo.num_workers = qt_topo.num_sheps * qt_topo.num_wps; - } - - /* Construct worker map */ - qt_topo.worker_map = qt_malloc(qt_topo.num_workers * sizeof(topo_worker_t)); - assert(qt_topo.worker_map); - - for (int i = 0; i < qt_topo.num_sheps; i++) { - for (int j = 0; j < qt_topo.num_wps; j++) { - int uid = (i * qt_topo.num_wps) + j; - qt_topo.worker_map[uid].uid = uid; - qt_topo.worker_map[uid].shep_id = i; - qt_topo.worker_map[uid].worker_id = j; - - /* Set binding location */ - hwloc_obj_t shep_obj = hwloc_get_obj_inside_cpuset_by_depth( - sys_topo, allowed_cpuset, qt_topo.shep_level, i); - hwloc_obj_t logical_core_obj = hwloc_get_obj_inside_cpuset_by_type( - sys_topo, shep_obj->cpuset, HWLOC_OBJ_CORE, j); - qt_topo.worker_map[uid].bind_obj = hwloc_get_ancestor_obj_by_depth( - sys_topo, qt_topo.worker_obj->depth, logical_core_obj); - } - } - - /* Print logical sys_topo */ - if (2 == qt_topology_output_level) { print_logical_view(); } - - /* Set "outputs" */ - *nbshepherds = qt_topo.num_sheps; - *nbworkers = qt_topo.num_wps; - - wkr_type = qt_topo.worker_obj->type; - wkr_depth = qt_topo.worker_level; - - return; -} /*}}} */ - -void INTERNAL qt_affinity_set(qthread_worker_t *me, - unsigned int nworkerspershep) { /*{{{ */ - ASSERT_ONLY(hwloc_topology_check(sys_topo)); - - hwloc_obj_t target_obj = qt_topo.worker_map[me->unique_id - 1].bind_obj; - if (hwloc_set_cpubind(sys_topo, target_obj->cpuset, HWLOC_CPUBIND_THREAD)) { - char *str; - int i = errno; -#ifdef __APPLE__ - if (i == ENOSYS) { return; } -#endif - hwloc_bitmap_asprintf(&str, target_obj->cpuset); - fprintf(stderr, - "Couldn't bind to cpuset %s because %s (%i)\n", - str, - strerror(i), - i); - FREE(str, strlen(str)); - } -} /*}}} */ - -int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps, - qthread_shepherd_id_t nshepherds) { /*{{{ */ - qthread_debug(AFFINITY_CALLS, - "generating distances for %i sheps (%p)\n", - (int)qt_topo.num_sheps, - sheps); - - for (size_t i = 0; i < qt_topo.num_sheps; i++) { - sheps[i].node = i % qt_topo.num_sheps; - sheps[i].sorted_sheplist = - qt_calloc(qt_topo.num_sheps - 1, sizeof(qthread_shepherd_id_t)); - sheps[i].shep_dists = qt_calloc(qt_topo.num_sheps, sizeof(unsigned int)); - } - -#ifdef QTHREAD_HAVE_HWLOC_DISTS - hwloc_const_cpuset_t allowed_cpuset = - hwloc_topology_get_allowed_cpuset(sys_topo); - /* XXX: should this really find the obj closest to the shep level that - * has a distance matrix? */ - const struct hwloc_distances_s *matrix = - hwloc_get_whole_distance_matrix_by_type(sys_topo, HWLOC_OBJ_NODE); - - if (matrix) { - qthread_debug(AFFINITY_DETAILS, - "matrix is %p, type at this depth: %s, relative_depth: " - "%u(%s), nbobj: %u\n", - matrix, - hwloc_obj_type_string(qt_topo.shep_obj->type), - matrix->relative_depth, - hwloc_obj_type_string( - hwloc_get_depth_type(sys_topo, matrix->relative_depth)), - matrix->nbobjs); - assert(matrix->latency); - } else { - qthread_debug(AFFINITY_DETAILS, - "matrix is %p, type at this depth: %s\n", - matrix, - hwloc_obj_type_string(HWLOC_OBJ_NODE)); - } - size_t node_to_NUMAnode[qt_topo.num_sheps]; - for (size_t i = 0; i < qt_topo.num_sheps; ++i) { - hwloc_obj_t node_obj = hwloc_get_obj_inside_cpuset_by_depth( - sys_topo, allowed_cpuset, qt_topo.shep_level, i); - while (node_obj->type > HWLOC_OBJ_NODE) { - node_obj = node_obj->parent; - assert(node_obj); - } - node_to_NUMAnode[i] = node_obj->logical_index; - qthread_debug( - AFFINITY_DETAILS, "obj %i maps to node %i\n", i, node_to_NUMAnode[i]); - } -#endif /* ifdef QTHREAD_HAVE_HWLOC_DISTS */ - - for (size_t i = 0; i < qt_topo.num_sheps; ++i) { - for (size_t j = 0, k = 0; j < qt_topo.num_sheps; ++j) { - if (j != i) { -#ifdef QTHREAD_HAVE_HWLOC_DISTS - if (matrix) { - sheps[i].shep_dists[j] = - matrix->latency[node_to_NUMAnode[sheps[i].node] + - matrix->nbobjs * node_to_NUMAnode[sheps[j].node]] * - 10; - qthread_debug(AFFINITY_DETAILS, - "distance from %i(%i) to %i(%i) is %i\n", - (int)i, - (int)sheps[i].node, - (int)j, - (int)sheps[j].node, - (int)(sheps[i].shep_dists[j])); - } else { - // handle what is fundamentally a bug in old versions of hwloc - sheps[i].shep_dists[j] = 10; - qthread_debug(AFFINITY_DETAILS, - "pretending distance from %i to %i is %i\n", - (int)i, - (int)j, - (int)(sheps[i].shep_dists[j])); - } -#else /* ifdef QTHREAD_HAVE_HWLOC_DISTS */ - sheps[i].shep_dists[j] = 10; - qthread_debug(AFFINITY_DETAILS, - "pretending distance from %i to %i is %i\n", - (int)i, - (int)j, - (int)(sheps[i].shep_dists[j])); -#endif /* ifdef QTHREAD_HAVE_HWLOC_DISTS */ - sheps[i].sorted_sheplist[k++] = j; - } - } - if (qt_topo.num_sheps > 1) { - sort_sheps( - sheps[i].shep_dists, sheps[i].sorted_sheplist, qt_topo.num_sheps); - } - } - /* there does not seem to be a way to extract distances... */ - return QTHREAD_SUCCESS; -} /*}}} */ - -#ifdef QTHREAD_HAVE_MEM_AFFINITY -void INTERNAL qt_affinity_mem_tonode(void *addr, - size_t bytes, - int node) { /*{{{ */ - hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); - - DEBUG_ONLY(hwloc_topology_check(sys_topo)); - hwloc_bitmap_set(nodeset, node); - hwloc_set_area_membind(sys_topo, - addr, - bytes, - nodeset, - HWLOC_MEMBIND_BIND, - HWLOC_MEMBIND_NOCPUBIND); - hwloc_bitmap_free(nodeset); -} /*}}} */ - -void INTERNAL *qt_affinity_alloc(size_t bytes) { /*{{{ */ - DEBUG_ONLY(hwloc_topology_check(sys_topo)); - return hwloc_alloc(sys_topo, bytes); -} /*}}} */ - -void INTERNAL *qt_affinity_alloc_onnode(size_t bytes, int node) { /*{{{ */ - void *ret; - hwloc_nodeset_t nodeset; - - DEBUG_ONLY(hwloc_topology_check(sys_topo)); - nodeset = hwloc_bitmap_alloc(); - hwloc_bitmap_set(nodeset, node); - ret = hwloc_alloc_membind_nodeset( - sys_topo, bytes, nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_NOCPUBIND); - hwloc_bitmap_free(nodeset); - return ret; -} /*}}} */ - -void INTERNAL qt_affinity_free(void *ptr, size_t bytes) { /*{{{ */ - DEBUG_ONLY(hwloc_topology_check(sys_topo)); - hwloc_free(sys_topo, ptr, bytes); -} /*}}} */ - -#endif /* ifdef QTHREAD_HAVE_MEM_AFFINITY */ - -/* vim:set expandtab: */ diff --git a/src/affinity/lgrp.c b/src/affinity/lgrp.c deleted file mode 100644 index f6a7f804c..000000000 --- a/src/affinity/lgrp.c +++ /dev/null @@ -1,281 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef HAVE_PROCESSOR_BIND -#include -#include -#include -#ifdef HAVE_SYS_LGRP_USER_H -#include -#endif -#endif -#include /* for perror() */ -#include /* for malloc() */ - -#include "qt_asserts.h" -#include "qt_subsystems.h" -#include "qt_visibility.h" -// #include "qthread_innards.h" -#include "qt_affinity.h" -#include "qt_debug.h" - -#include "shepcomp.h" -#include "shufflesheps.h" - -static lgrp_cookie_t lgrp_cookie; -static lgrp_id_t mccoy_thread_home; -static lgrp_affinity_t mccoy_thread_home_affinity; - -qthread_shepherd_id_t guess_num_shepherds(void); -qthread_worker_id_t -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds); - -static int lgrp_maxcpus(lgrp_id_t const lgrp, int cpu_max) { /*{{{ */ - int nchildren, - ncpus = lgrp_cpus(lgrp_cookie, lgrp, NULL, 0, LGRP_CONTENT_DIRECT); - - if (ncpus == -1) { - return cpu_max; - } else if ((ncpus > 0) && (ncpus > cpu_max)) { - cpu_max = ncpus; - } - nchildren = lgrp_children(lgrp_cookie, lgrp, NULL, 0); - if (nchildren == -1) { - return cpu_max; - } else if (nchildren > 0) { - int i; - int nchildren_save = nchildren; - lgrp_id_t *children = MALLOC(nchildren * sizeof(lgrp_id_t)); - - nchildren = lgrp_children(lgrp_cookie, lgrp, children, nchildren); - if (nchildren == -1) { - qthread_debug(ALWAYS_OUTPUT, "hardware giving inconsistent answers!\n"); - abort(); - return cpu_max; - } - for (i = 0; i < nchildren; i++) { - cpu_max = lgrp_maxcpus(children[i], cpu_max); - } - FREE(children, nchildren * sizeof(lgrp_id_t)); - } - return cpu_max; -} /*}}} */ - -static int lgrp_walk(lgrp_id_t const lgrp, - processorid_t **cpus, - lgrp_id_t *lgrp_ids, - int cpu_grps) { /*{{{ */ - int nchildren, - ncpus = lgrp_cpus(lgrp_cookie, lgrp, NULL, 0, LGRP_CONTENT_DIRECT); - - if (ncpus == -1) { - return cpu_grps; - } else if (ncpus > 0) { - processorid_t *cpuids = MALLOC((ncpus + 1) * sizeof(processorid_t)); - - ncpus = lgrp_cpus(lgrp_cookie, lgrp, cpuids, ncpus, LGRP_CONTENT_DIRECT); - if (ncpus == -1) { - qthread_debug(ALWAYS_OUTPUT, "hardware giving inconsistent answers!\n"); - abort(); - return cpu_grps; - } - cpuids[ncpus] = -1; - if (cpus) { cpus[cpu_grps] = cpuids; } - if (lgrp_ids) { lgrp_ids[cpu_grps] = lgrp; } - cpu_grps++; - } - nchildren = lgrp_children(lgrp_cookie, lgrp, NULL, 0); - if (nchildren == -1) { - return cpu_grps; - } else if (nchildren > 0) { - int i; - lgrp_id_t *children = MALLOC(nchildren * sizeof(lgrp_id_t)); - - nchildren = lgrp_children(lgrp_cookie, lgrp, children, nchildren); - if (nchildren == -1) { - qthread_debug(ALWAYS_OUTPUT, "hardware giving inconsistent answers!\n"); - abort(); - return cpu_grps; - } - for (i = 0; i < nchildren; i++) { - cpu_grps = lgrp_walk(children[i], cpus, lgrp_ids, cpu_grps); - } - FREE(children, nchildren * sizeof(lgrp_id_t)); - } - return cpu_grps; -} /*}}} */ - -static void qt_affinity_internal_lgrp_teardown(void) { - lgrp_affinity_set( - P_LWPID, P_MYID, mccoy_thread_home, mccoy_thread_home_affinity); -} - -void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, - qthread_worker_id_t *nbworkers, - size_t *hw_par) { /*{{{ */ - lgrp_cookie = lgrp_init(LGRP_VIEW_OS); - mccoy_thread_home = lgrp_home(P_LWPID, P_MYID); - mccoy_thread_home_affinity = - lgrp_affinity_get(P_LWPID, P_MYID, mccoy_thread_home); - qthread_internal_cleanup(qt_affinity_internal_lgrp_teardown); - if (*nbshepherds == 0) { *nbshepherds = guess_num_shepherds(); } - if (*nbworkers == 0) { - *nbworkers = guess_num_workers_per_shep(*nbshepherds); - } -} /*}}} */ - -qthread_shepherd_id_t INTERNAL guess_num_shepherds(void) { /*{{{ */ - qthread_shepherd_id_t guess = 1; - - guess = lgrp_walk(lgrp_root(lgrp_cookie), NULL, NULL, 0); - if (guess <= 0) { guess = 1; } - qthread_debug(AFFINITY_DETAILS, "guessing %i shepherds\n", (int)guess); - return guess; -} /*}}} */ - -void INTERNAL qt_affinity_set(qthread_worker_t *me, - unsigned int Q_UNUSED(nw)) { /*{{{ */ - /* if this seems wrong, first answer: why should workers have more than socket - * affinity? */ - qthread_debug(AFFINITY_DETAILS, - "set shep %i worker %i to lgrp %i\n", - (int)me->shepherd->shepherd_id, - (int)me->worker_id, - (int)me->shepherd->lgrp); - if (lgrp_affinity_set(P_LWPID, P_MYID, me->shepherd->lgrp, LGRP_AFF_STRONG) != - 0) { - perror("lgrp_affinity_set"); - } -} /*}}} */ - -qthread_worker_id_t INTERNAL -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */ - unsigned int guess = 1; - int tot_nodes = lgrp_walk(lgrp_root(lgrp_cookie), NULL, NULL, 0); - - guess = lgrp_maxcpus(lgrp_root(lgrp_cookie), 0); - - qthread_debug(AFFINITY_DETAILS, - "guessing num workers for %i sheps (nodes:%i max:%i)\n", - (int)nshepherds, - tot_nodes, - (int)guess); - - if (nshepherds > tot_nodes) { guess /= (nshepherds / tot_nodes); } - if (guess == 0) { guess = 1; } - - qthread_debug(AFFINITY_DETAILS, "guessing %i workers per shep\n", (int)guess); - return guess; -} /*}}} */ - -int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps, - qthread_shepherd_id_t nshepherds) { /*{{{ */ - unsigned int lgrp_offset; - int lgrp_count_grps; - processorid_t **cpus = NULL; - lgrp_id_t *lgrp_ids = NULL; - - switch (lgrp_cookie) { - case EINVAL: - case ENOMEM: - qthread_debug(AFFINITY_DETAILS, "lgrp_cookie is invalid!\n"); - return QTHREAD_THIRD_PARTY_ERROR; - } - { - size_t max_lgrps = lgrp_nlgrps(lgrp_cookie); - - if (max_lgrps <= 0) { - qthread_debug( - AFFINITY_DETAILS, "max_lgrps is <= zero! (%i)\n", max_lgrps); - return QTHREAD_THIRD_PARTY_ERROR; - } - cpus = qt_calloc(max_lgrps, sizeof(processorid_t *)); - assert(cpus); - lgrp_ids = qt_calloc(max_lgrps, sizeof(lgrp_id_t)); - assert(lgrp_ids); - } - lgrp_count_grps = lgrp_walk(lgrp_root(lgrp_cookie), cpus, lgrp_ids, 0); - if (lgrp_count_grps <= 0) { - qthread_debug( - AFFINITY_DETAILS, "lgrp_count_grps is <= zero ! (%i)\n", lgrp_count_grps); - return QTHREAD_THIRD_PARTY_ERROR; - } - for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) { - /* first, pick a lgrp/node */ - int cpu; - unsigned int first_loff; - - first_loff = lgrp_offset = i % lgrp_count_grps; - sheps[i].node = -1; - sheps[i].lgrp = -1; - /* now pick an available CPU */ - while (1) { - cpu = 0; - /* find an unused one */ - while (cpus[lgrp_offset][cpu] != (processorid_t)(-1)) cpu++; - if (cpu == 0) { - /* if no unused ones... try the next lgrp */ - lgrp_offset++; - lgrp_offset *= (lgrp_offset < lgrp_count_grps); - if (lgrp_offset == first_loff) { break; } - } else { - /* found one! */ - cpu--; - sheps[i].node = cpus[lgrp_offset][cpu]; - sheps[i].lgrp = lgrp_ids[lgrp_offset]; - cpus[lgrp_offset][cpu] = -1; - break; - } - } - } - for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) { - unsigned int const node_i = sheps[i].lgrp; - size_t j; - sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int)); - assert(sheps[i].shep_dists); - for (j = 0; j < nshepherds; j++) { - unsigned int const node_j = sheps[j].lgrp; - - if ((node_i != QTHREAD_NO_NODE) && (node_j != QTHREAD_NO_NODE)) { - int ret = - lgrp_latency_cookie(lgrp_cookie, node_i, node_j, LGRP_LAT_CPU_TO_MEM); - - if (ret < 0) { - assert(ret >= 0); - return QTHREAD_THIRD_PARTY_ERROR; - } else { - sheps[i].shep_dists[j] = (unsigned int)ret; - } - } else { - /* XXX too arbitrary */ - if (i == j) { - sheps[i].shep_dists[j] = 12; - } else { - sheps[i].shep_dists[j] = 18; - } - } - } - } - for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) { - size_t j, k = 0; - - sheps[i].sorted_sheplist = - qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t)); - assert(sheps[i].sorted_sheplist); - for (j = 0; j < nshepherds; j++) { - if (j != i) { sheps[i].sorted_sheplist[k++] = j; } - } - if (nshepherds > 1) { - sort_sheps(sheps[i].shep_dists, sheps[i].sorted_sheplist, nshepherds); - } - } - if (cpus) { - for (int i = 0; i < lgrp_count_grps; i++) { qt_free(cpus[i]); } - qt_free(cpus); - } - if (lgrp_ids) { qt_free(lgrp_ids); } - return QTHREAD_SUCCESS; -} /*}}} */ - -/* vim:set expandtab: */ diff --git a/src/affinity/libnuma.c b/src/affinity/libnuma.c deleted file mode 100644 index acfe46c5e..000000000 --- a/src/affinity/libnuma.c +++ /dev/null @@ -1,198 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include - -#include "qt_affinity.h" -#include "qt_asserts.h" -#include "qt_debug.h" -#include "qt_subsystems.h" - -#include "shepcomp.h" -#include "shufflesheps.h" - -static nodemask_t *mccoy_bitmask = NULL; - -static qthread_shepherd_id_t guess_num_shepherds(void); -qthread_worker_id_t -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds); - -static void qt_affinity_internal_numa_teardown(void) { - numa_run_on_node_mask(mccoy_bitmask); - FREE(mccoy_bitmask, sizeof(nodemask_t)); -} - -void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, - qthread_worker_id_t *nbworkers, - size_t *hw_par) { /*{{{ */ - mccoy_bitmask = MALLOC(sizeof(nodemask_t)); - *mccoy_bitmask = numa_get_run_node_mask(); - qthread_internal_cleanup(qt_affinity_internal_numa_teardown); - if (*nbshepherds == 0) { *nbshepherds = guess_num_shepherds(); } - if (*nbworkers == 0) { - *nbworkers = guess_num_workers_per_shep(*nbshepherds); - } -} /*}}} */ - -void INTERNAL qt_affinity_mem_tonode(void *addr, - size_t bytes, - int node) { /*{{{ */ - numa_tonode_memory(addr, bytes, node); -} /*}}} */ - -void INTERNAL *qt_affinity_alloc(size_t bytes) { /*{{{ */ - return numa_alloc(bytes); -} /*}}} */ - -void INTERNAL *qt_affinity_alloc_onnode(size_t bytes, int node) { /*{{{ */ - return numa_alloc_onnode(bytes, node); -} /*}}} */ - -void INTERNAL qt_affinity_free(void *ptr, size_t bytes) { /*{{{ */ - numa_free(ptr, bytes); -} /*}}} */ - -#define BMASK_WORDS 16 - -static qthread_shepherd_id_t guess_num_shepherds(void) { /*{{{ */ - qthread_shepherd_id_t nshepherds = 1; - - if (numa_available() != 1) { - /* this is (probably) correct if/when we have multithreaded shepherds, - * ... BUT ONLY IF ALL NODES HAVE CPUS!!!!!! */ - nshepherds = numa_max_node() + 1; - qthread_debug( - AFFINITY_DETAILS, "numa_max_node() returned %i\n", nshepherds); - } - if (nshepherds <= 0) { nshepherds = 1; } - return nshepherds; -} /*}}} */ - -void INTERNAL qt_affinity_set(qthread_worker_t *me, - unsigned int Q_UNUSED(nw)) { /*{{{ */ - assert(me); - - qthread_shepherd_t *const myshep = me->shepherd; - - /* It would be nice if we could do something more specific than - * "numa_run_on_node", but because sched_etaffinity() is so dangerous, we - * really can't, in good conscience. */ - qthread_debug(AFFINITY_FUNCTIONS, - "calling numa_run_on_node(%i) for worker %i\n", - myshep->node, - me->packed_worker_id); - int ret = numa_run_on_node(myshep->node); - if (ret != 0) { - numa_error("setting thread affinity"); - abort(); - } - numa_set_localalloc(); -} /*}}} */ - -qthread_worker_id_t INTERNAL -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */ - size_t cpu_count = 1; - unsigned int guess = 1; - - qthread_debug( - AFFINITY_CALLS, "guessing workers for %i shepherds\n", (int)nshepherds); -#ifdef HAVE_NUMA_NUM_THREAD_CPUS - /* note: not numa_num_configured_cpus(), just in case an - * artificial limit has been imposed. */ - cpu_count = numa_num_thread_cpus(); - qthread_debug( - AFFINITY_DETAILS, "numa_num_thread_cpus returned %i\n", nshepherds); -#elif defined(HAVE_NUMA_BITMASK_NBYTES) - cpu_count = 0; - for (size_t b = 0; b < numa_bitmask_nbytes(numa_all_cpus_ptr) * 8; b++) { - cpu_count += numa_bitmask_isbitset(numa_all_cpus_ptr, b); - } - qthread_debug(AFFINITY_DETAILS, - "after checking through the all_cpus_ptr, I counted %i cpus\n", - (int)cpu_count); -#else /* ifdef HAVE_NUMA_NUM_THREAD_CPUS */ - cpu_count = numa_max_node() + 1; - qthread_debug(AFFINITY_DETAILS, "numa_max_node() returned %i\n", nshepherds); -#endif /* ifdef HAVE_NUMA_NUM_THREAD_CPUS */ - guess = cpu_count / nshepherds; - if (guess == 0) { guess = 1; } - qthread_debug( - AFFINITY_DETAILS, "guessing %i workers per shepherd\n", (int)guess); - return guess; -} /*}}} */ - -int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps, - qthread_shepherd_id_t nshepherds) { /*{{{ */ - size_t const num_extant_nodes = numa_max_node() + 1; - nodemask_t bmask; - - qthread_debug(AFFINITY_FUNCTIONS, - "sheps(%p), nshepherds(%u), num_extant_nodes:%u\n", - sheps, - nshepherds, - (unsigned)num_extant_nodes); - if (numa_available() == -1) { return QTHREAD_THIRD_PARTY_ERROR; } - nodemask_zero(&bmask); - /* assign nodes */ - qthread_debug(AFFINITY_DETAILS, "assign nodes...\n"); - for (size_t i = 0; i < nshepherds; ++i) { - sheps[i].node = i % num_extant_nodes; - qthread_debug( - AFFINITY_DETAILS, "set bit %u in bmask\n", i % num_extant_nodes); - nodemask_set(&bmask, i % num_extant_nodes); - } - qthread_debug(AFFINITY_DETAILS, "numa_set_interleave_mask\n"); - numa_set_interleave_mask(&bmask); - qthread_debug(AFFINITY_DETAILS, "querying distances...\n"); - /* truly ancient versions of libnuma (in the changelog, this is - * considered "pre-history") do not have numa_distance() */ - for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) { - qthread_debug(AFFINITY_DETAILS, "i = %u < %u...\n", i, nshepherds); - unsigned int const node_i = sheps[i].node; - size_t j, k; - sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int)); - sheps[i].sorted_sheplist = - qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t)); - qthread_debug(AFFINITY_DETAILS, - "allocs %p %p\n", - sheps[i].shep_dists, - sheps[i].sorted_sheplist); - assert(sheps[i].shep_dists); - assert(sheps[i].sorted_sheplist); - for (j = 0; j < nshepherds; j++) { - unsigned int const node_j = sheps[j].node; - -#if QTHREAD_NUMA_DISTANCE_WORKING - if ((node_i != QTHREAD_NO_NODE) && (node_j != QTHREAD_NO_NODE) && - (node_i != node_j)) { - sheps[i].shep_dists[j] = numa_distance(node_i, node_j); - } else { -#endif - /* XXX too arbitrary */ - if (i == j) { - sheps[i].shep_dists[j] = 0; - } else { - sheps[i].shep_dists[j] = 20; - } -#if QTHREAD_NUMA_DISTANCE_WORKING - } -#endif - qthread_debug(AFFINITY_DETAILS, - "shep %u to shep %u distance: %u\n", - i, - j, - sheps[i].shep_dists[j]); - } - k = 0; - for (j = 0; j < nshepherds; j++) { - if (j != i) { sheps[i].sorted_sheplist[k++] = j; } - } - if (nshepherds > 1) { - sort_sheps(sheps[i].shep_dists, sheps[i].sorted_sheplist, nshepherds); - } - } - return QTHREAD_SUCCESS; -} /*}}} */ - -/* vim:set expandtab */ diff --git a/src/affinity/libnumaV2.c b/src/affinity/libnumaV2.c deleted file mode 100644 index 94a611ff6..000000000 --- a/src/affinity/libnumaV2.c +++ /dev/null @@ -1,233 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include - -#include "qt_affinity.h" -#include "qt_asserts.h" -#include "qt_debug.h" -#include "qt_subsystems.h" - -#include "shepcomp.h" -#include "shufflesheps.h" - -static struct bitmask *mccoy_bitmask = NULL; - -qthread_shepherd_id_t guess_num_shepherds(void); -qthread_worker_id_t -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds); - -static void qt_affinity_internal_numaV2_teardown(void) { - numa_run_on_node_mask(mccoy_bitmask); -} - -void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, - qthread_worker_id_t *nbworkers, - size_t *hw_par) { /*{{{ */ - qthread_debug(AFFINITY_FUNCTIONS, "start\n"); - qassert(numa_available(), 0); - mccoy_bitmask = numa_get_run_node_mask(); - qthread_internal_cleanup(qt_affinity_internal_numaV2_teardown); - if (*nbshepherds == 0) { *nbshepherds = guess_num_shepherds(); } - if (*nbworkers == 0) { - *nbworkers = guess_num_workers_per_shep(*nbshepherds); - } -} /*}}} */ - -void INTERNAL qt_affinity_mem_tonode(void *addr, - size_t bytes, - int node) { /*{{{ */ - numa_tonode_memory(addr, bytes, node); -} /*}}} */ - -void INTERNAL *qt_affinity_alloc(size_t bytes) { /*{{{ */ - return numa_alloc(bytes); -} /*}}} */ - -void INTERNAL *qt_affinity_alloc_onnode(size_t bytes, int node) { /*{{{ */ - return numa_alloc_onnode(bytes, node); -} /*}}} */ - -void INTERNAL qt_affinity_free(void *ptr, size_t bytes) { /*{{{ */ - numa_free(ptr, bytes); -} /*}}} */ - -qthread_shepherd_id_t INTERNAL guess_num_shepherds(void) { /*{{{ */ - qthread_shepherd_id_t nshepherds = 1; - - if (numa_available() != 1) { - qthread_debug(AFFINITY_FUNCTIONS, "numa_available != 1\n"); - /* this is (probably) correct if/when we have multithreaded shepherds, - * ... BUT ONLY IF ALL NODES HAVE CPUS!!!!!! */ - nshepherds = numa_max_node() + 1; - qthread_debug( - AFFINITY_DETAILS, "numa_max_node() returned %i\n", nshepherds); - } - if (nshepherds <= 0) { nshepherds = 1; } - qthread_debug(AFFINITY_FUNCTIONS, "guessing %i shepherds\n", (int)nshepherds); - return nshepherds; -} /*}}} */ - -void INTERNAL qt_affinity_set(qthread_worker_t *me, - unsigned int Q_UNUSED(nw)) { /*{{{ */ - assert(me); - - qthread_shepherd_t *const myshep = me->shepherd; - - /* It would be nice if we could do something more specific than - * "numa_run_on_node", but because sched_setaffinity() is so dangerous, we - * really can't, in good conscience. */ - qthread_debug(AFFINITY_DETAILS, - "calling numa_run_on_node(%i) for worker %i\n", - myshep->node, - me->packed_worker_id); - int ret = numa_run_on_node(myshep->node); - if (ret != 0) { - qthread_debug(ALWAYS_OUTPUT, - "numa_run_on_node() returned an error: %s\n", - strerror(errno)); - abort(); - } - numa_set_localalloc(); -} /*}}} */ - -qthread_worker_id_t INTERNAL -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */ - size_t cpu_count = 1; - unsigned int guess = 1; - - qthread_debug( - AFFINITY_DETAILS, "guessing workers for %i shepherds\n", (int)nshepherds); -#ifdef HAVE_NUMA_NUM_THREAD_CPUS - /* note: not numa_num_configured_cpus(), just in case an - * artificial limit has been imposed. */ - cpu_count = numa_num_thread_cpus(); - qthread_debug( - AFFINITY_DETAILS, "numa_num_thread_cpus returned %i\n", (int)cpu_count); -#elif defined(HAVE_NUMA_BITMASK_NBYTES) - cpu_count = 0; - for (size_t b = 0; b < numa_bitmask_nbytes(numa_all_cpus_ptr) * 8; b++) { - cpu_count += numa_bitmask_isbitset(numa_all_cpus_ptr, b); - } - qthread_debug(AFFINITY_DETAILS, - "after checking through the all_cpus_ptr, I counted %i cpus\n", - (int)cpu_count); -#else /* ifdef HAVE_NUMA_NUM_THREAD_CPUS */ - cpu_count = numa_max_node() + 1; - qthread_debug( - AFFINITY_DETAILS, "numa_max_node() returned %i\n", (int)cpu_count); -#endif /* ifdef HAVE_NUMA_NUM_THREAD_CPUS */ - guess = cpu_count / nshepherds; - if (guess == 0) { guess = 1; } - qthread_debug( - AFFINITY_FUNCTIONS, "guessing %i workers per shepherd\n", (int)guess); - return guess; -} /*}}} */ - -static void assign_nodes(qthread_shepherd_t *sheps, size_t nsheps) { /*{{{ */ - size_t const num_extant_nodes = numa_max_node() + 1; - struct bitmask *nmask = numa_get_run_node_mask(); - struct bitmask *cmask = numa_allocate_cpumask(); - size_t *cpus_left_per_node = - qt_calloc(num_extant_nodes, - sizeof(size_t)); // handle heterogeneous core counts - int over_subscribing = 0; - - assert(cmask); - assert(nmask); - assert(cpus_left_per_node); - numa_bitmask_clearall(cmask); - /* get the # cpus for each node */ - for (size_t i = 0; i < numa_bitmask_nbytes(nmask) * 8; ++i) { - if (numa_bitmask_isbitset(nmask, i)) { - numa_node_to_cpus(i, cmask); - for (size_t j = 0; j < numa_bitmask_nbytes(cmask) * 8; j++) { - cpus_left_per_node[i] += numa_bitmask_isbitset(cmask, j) ? 1 : 0; - } - qthread_debug(AFFINITY_DETAILS, - "there are %i CPUs on node %i\n", - (int)cpus_left_per_node[i], - (int)i); - } - } - /* assign nodes by iterating over cpus_left_per_node array (which is of - * size num_extant_nodes rather than of size nodes_i_can_use) */ - int node = 0; - for (size_t i = 0; i < nsheps; ++i) { - switch (over_subscribing) { - case 0: { - int count = 0; - while (count < num_extant_nodes && cpus_left_per_node[node] == 0) { - node++; - node *= (node < num_extant_nodes); - count++; - } - if (count < num_extant_nodes) { - cpus_left_per_node[node]--; - break; - } - } - over_subscribing = 1; - } - qthread_debug( - AFFINITY_DETAILS, "setting shep %i to numa node %i\n", (int)i, (int)node); - sheps[i].node = node; - node++; - node *= (node < num_extant_nodes); - } - numa_bitmask_free(nmask); - numa_bitmask_free(cmask); - FREE(cpus_left_per_node, num_extant_nodes * sizeof(size_t)); -} /*}}} */ - -int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps, - qthread_shepherd_id_t nshepherds) { /*{{{ */ - qthread_debug(AFFINITY_FUNCTIONS, "sheps:%p, nsheps:%u\n", sheps, nshepherds); - if (numa_available() == -1) { return QTHREAD_THIRD_PARTY_ERROR; } - assign_nodes(sheps, nshepherds); -#ifdef HAVE_NUMA_DISTANCE - qthread_debug(AFFINITY_DETAILS, "querying distances...\n"); - /* truly ancient versions of libnuma (in the changelog, this is - * considered "pre-history") do not have numa_distance() */ - for (unsigned int i = 0; i < nshepherds; i++) { - unsigned int const node_i = sheps[i].node; - size_t j, k; - sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int)); - sheps[i].sorted_sheplist = - qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t)); - assert(sheps[i].shep_dists); - assert(sheps[i].sorted_sheplist); - for (j = 0; j < nshepherds; j++) { - unsigned int const node_j = sheps[j].node; - - if ((node_i != QTHREAD_NO_NODE) && (node_j != QTHREAD_NO_NODE)) { - sheps[i].shep_dists[j] = numa_distance(node_i, node_j); - } else { - /* XXX too arbitrary */ - if (i == j) { - sheps[i].shep_dists[j] = 0; - } else { - sheps[i].shep_dists[j] = 20; - } - } - qthread_debug(AFFINITY_DETAILS, - "shep %u to shep %u distance: %u\n", - i, - j, - sheps[i].shep_dists[j]); - } - k = 0; - for (j = 0; j < nshepherds; j++) { - if (j != i) { sheps[i].sorted_sheplist[k++] = j; } - } - if (nshepherds > 1) { - sort_sheps(sheps[i].shep_dists, sheps[i].sorted_sheplist, nshepherds); - } - } -#endif /* ifdef HAVE_NUMA_DISTANCE */ - return QTHREAD_SUCCESS; -} /*}}} */ - -/* vim:set expandtab: */ diff --git a/src/affinity/mach.c b/src/affinity/mach.c deleted file mode 100644 index fac6d82eb..000000000 --- a/src/affinity/mach.c +++ /dev/null @@ -1,108 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#ifdef HAVE_SYSCTL -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_SYSCTL_H -#include -#endif -#endif -#ifdef HAVE_MACH_THREAD_POLICY_H -#include -#include -kern_return_t thread_policy_set(thread_t thread, - thread_policy_flavor_t flavor, - thread_policy_t policy_info, - mach_msg_type_number_t count); -kern_return_t thread_policy_get(thread_t thread, - thread_policy_flavor_t flavor, - thread_policy_t policy_info, - mach_msg_type_number_t *count, - boolean_t *get_default); -#endif /* ifdef HAVE_MACH_THREAD_POLICY_H */ - -#include "qt_affinity.h" -#include "qt_asserts.h" -#include "shufflesheps.h" - -qthread_shepherd_id_t INTERNAL guess_num_shepherds(void); -qthread_worker_id_t INTERNAL -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds); - -void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, - qthread_worker_id_t *nbworkers, - size_t *hw_par) { /*{{{ */ - if (*nbshepherds == 0) { *nbshepherds = guess_num_shepherds(); } - if (*nbworkers == 0) { - *nbworkers = guess_num_workers_per_shep(*nbshepherds); - } -} /*}}} */ - -qthread_shepherd_id_t INTERNAL guess_num_shepherds(void) { /*{{{ */ - qthread_shepherd_id_t nshepherds = 1; - -#if defined(HAVE_SYSCTL) && defined(CTL_HW) && defined(HW_NCPU) - int name[2] = {CTL_HW, HW_NCPU}; - uint32_t oldv; - size_t oldvlen = sizeof(oldv); - if (sysctl(name, 2, &oldv, &oldvlen, NULL, 0) < 0) { - /* sysctl is the official query mechanism on Macs, so if it failed, - * we want to know */ - perror("sysctl"); - } else { - assert(oldvlen == sizeof(oldv)); - nshepherds = (qthread_shepherd_id_t)oldv; - } -#endif /* if defined(HAVE_SYSCTL) && defined(CTL_HW) && defined(HW_NCPU) */ - if (nshepherds <= 0) { nshepherds = 1; } - return nshepherds; -} /*}}} */ - -void INTERNAL qt_affinity_set(qthread_worker_t *me, - unsigned int Q_UNUSED(nw)) { /*{{{ */ - mach_msg_type_number_t Count = THREAD_AFFINITY_POLICY_COUNT; - thread_affinity_policy_data_t mask[THREAD_AFFINITY_POLICY_COUNT]; - - memset(mask, - 0, - sizeof(thread_affinity_policy_data_t) * THREAD_AFFINITY_POLICY_COUNT); - mask[0].affinity_tag = me->packed_worker_id + 1; - Count = 1; - if (thread_policy_set(mach_thread_self(), - THREAD_AFFINITY_POLICY, - (thread_policy_t)&mask, - Count) != KERN_SUCCESS) { - fprintf(stderr, "ERROR! Cannot SET affinity for some reason\n"); - } -} /*}}} */ - -qthread_worker_id_t INTERNAL -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */ - return 1; -} /*}}} */ - -int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps, - qthread_shepherd_id_t nshepherds) { /*{{{ */ - /* there is no native way to detect distances, so unfortunately we must assume - * that they're all equidistant */ - for (size_t i = 0; i < nshepherds; ++i) { - sheps[i].sorted_sheplist = - qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t)); - sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int)); - for (size_t j = 0, k = 0; j < nshepherds; ++j) { - if (j != i) { - sheps[i].shep_dists[j] = 10; - sheps[i].sorted_sheplist[k++] = j; - } - } - // no need to sort; they're all equidistant - shuffle_sheps(sheps[i].sorted_sheplist, nshepherds - 1); - } - return QTHREAD_SUCCESS; -} /*}}} */ - -/* vim:set expandtab: */ diff --git a/src/affinity/plpa.c b/src/affinity/plpa.c deleted file mode 100644 index 3e5b19baf..000000000 --- a/src/affinity/plpa.c +++ /dev/null @@ -1,97 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef HAVE_SYSCTL -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_SYSCTL_H -#include -#endif -#endif -#if defined(HAVE_SYSCONF) && defined(HAVE_UNISTD_H) -#include -#endif - -#include - -#include "qt_affinity.h" -#include "qt_asserts.h" -#include "shufflesheps.h" - -qthread_shepherd_id_t guess_num_shepherds(void); -qthread_worker_id_t -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds); - -void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, - qthread_worker_id_t *nbworkers, - size_t *hw_par) { /*{{{ */ - if (*nbshepherds == 0) { - *nbshepherds = guess_num_shepherds(); - if (*nbshepherds <= 0) { *nbshepherds = 1; } - } - if (*nbworkers == 0) { - *nbworkers = guess_num_workers_per_shep(*nbshepherds); - if (*nbworkers <= 0) { *nbworkers = 1; } - } -} /*}}} */ - -qthread_shepherd_id_t INTERNAL guess_num_shepherds(void) { /*{{{ */ - qthread_shepherd_id_t nshepherds = 1; - -#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) /* Linux */ - long ret = sysconf(_SC_NPROCESSORS_CONF); - nshepherds = (ret > 0) ? ret : 1; -#elif defined(HAVE_SYSCTL) && defined(CTL_HW) && defined(HW_NCPU) - int name[2] = {CTL_HW, HW_NCPU}; - uint32_t oldv; - size_t oldvlen = sizeof(oldv); - if (sysctl(name, 2, &oldv, &oldvlen, NULL, 0) >= 0) { - assert(oldvlen == sizeof(oldv)); - nshepherds = (int)oldv; - } -#endif /* if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) */ - return nshepherds; -} /*}}} */ - -void INTERNAL qt_affinity_set(qthread_worker_t *me, - unsigned int Q_UNUSED(nw)) { /*{{{ */ - plpa_cpu_set_t *cpuset = (plpa_cpu_set_t *)MALLOC(sizeof(plpa_cpu_set_t)); - - PLPA_CPU_ZERO(cpuset); - PLPA_CPU_SET(me->packed_worker_id, cpuset); - if ((plpa_sched_setaffinity(0, sizeof(plpa_cpu_set_t), cpuset) < 0) && - (errno != EINVAL)) { - perror("plpa setaffinity"); - } - FREE(cpuset, sizeof(plpa_cpu_set_t)); -} /*}}} */ - -qthread_worker_id_t INTERNAL -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */ - return 1; -} /*}}} */ - -int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps, - qthread_shepherd_id_t nshepherds) { /*{{{*/ - for (size_t i = 0; i < nshepherds; ++i) { - sheps[i].node = i * qlib->nworkerspershep; - sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int)); - sheps[i].sorted_sheplist = - qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t)); - for (size_t j = 0, k = 0; j < nshepherds; ++j) { - if (j != i) { - sheps[i].shep_dists[j] = 10; - sheps[i].sorted_sheplist[k++] = j; - } - } - // no need to sort; they're all equidistant - shuffle_sheps(sheps[i].sorted_sheplist, nshepherds - 1); - } - /* there is no inherent way to detect distances, so unfortunately we must - * assume that they're all equidistant */ - return QTHREAD_SUCCESS; -} /*}}}*/ - -/* vim:set expandtab: */ diff --git a/src/affinity/sys.c b/src/affinity/sys.c deleted file mode 100644 index 2f5ab5127..000000000 --- a/src/affinity/sys.c +++ /dev/null @@ -1,112 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#if defined(HAVE_SYSCONF) && defined(HAVE_SC_NPROCESSORS_CONF) /* Linux */ -#include -#elif defined(HAVE_SYSCTL) && defined(HAVE_HW_NCPU) -#include -#include -#endif - -#include "qt_affinity.h" -#include "qt_asserts.h" -#include "qt_debug.h" -#include "shufflesheps.h" - -qthread_shepherd_id_t guess_num_shepherds(void); -qthread_worker_id_t -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds); - -void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, - qthread_worker_id_t *nbworkers, - size_t *hw_par) { /*{{{ */ - if (*nbshepherds == 0) { - *nbshepherds = guess_num_shepherds(); - if (*nbshepherds <= 0) { *nbshepherds = 1; } - } - if (*nbworkers == 0) { - *nbworkers = guess_num_workers_per_shep(*nbshepherds); - if (*nbworkers <= 0) { *nbworkers = 1; } - } -} /*}}} */ - -qthread_shepherd_id_t INTERNAL guess_num_shepherds(void) { /*{{{ */ -#if defined(HAVE_SYSCONF) && defined(HAVE_SC_NPROCESSORS_CONF) /* Linux */ - long ret = sysconf(_SC_NPROCESSORS_CONF); - qthread_debug( - AFFINITY_CALLS, "based on sysconf(), guessing %i shepherds\n", (int)ret); - return (ret > 0) ? ret : 1; - -#elif defined(HAVE_SYSCTL) && defined(HAVE_HW_NCPU) - int name[2] = {CTL_HW, HW_NCPU}; - uint32_t oldv; - size_t oldvlen = sizeof(oldv); - if (sysctl(name, 2, &oldv, &oldvlen, NULL, 0) >= 0) { - assert(oldvlen == sizeof(oldv)); - qthread_debug( - AFFINITY_CALLS, "based on sysctl(), guessing %i shepherds\n", (int)oldv); - return oldv; - } else { - qthread_debug(AFFINITY_CALLS, - "sysctl() returned an error, assuming 1 shepherd\n"); - return 1; - } -#endif /* if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) */ - qthread_debug(AFFINITY_CALLS, - "no useful interfaces present; assuming a single shepherd\n"); - return 1; -} /*}}} */ - -void INTERNAL qt_affinity_set(qthread_worker_t *me, unsigned int Q_UNUSED(nw)) { -} - -qthread_worker_id_t INTERNAL -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */ - size_t num_procs = 1; - size_t guess = 1; - - qthread_debug( - AFFINITY_CALLS, "guessing workers for %i shepherds\n", (int)nshepherds); -#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) /* Linux */ - long ret = sysconf(_SC_NPROCESSORS_CONF); - qthread_debug(AFFINITY_DETAILS, "sysconf() says %i processors\n", (int)ret); - num_procs = (ret > 0) ? (size_t)ret : 1; -#elif defined(HAVE_SYSCTL) && defined(CTL_HW) && defined(HW_NCPU) - int name[2] = {CTL_HW, HW_NCPU}; - uint32_t oldv; - size_t oldvlen = sizeof(oldv); - if (sysctl(name, 2, &oldv, &oldvlen, NULL, 0) >= 0) { - assert(oldvlen == sizeof(oldv)); - qthread_debug(AFFINITY_DETAILS, "sysctl() says %i CPUs\n", (int)oldv); - num_procs = (size_t)oldv; - } -#endif /* if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) */ - guess = num_procs / nshepherds; - if (guess == 0) { guess = 1; } - qthread_debug( - AFFINITY_DETAILS, "guessing %i workers per shepherd\n", (int)guess); - return (qthread_shepherd_id_t)guess; -} /*}}} */ - -int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps, - qthread_shepherd_id_t nshepherds) { /*{{{ */ - qthread_debug(AFFINITY_CALLS, "start (%p, %i)\n", sheps, (int)nshepherds); - for (size_t i = 0; i < nshepherds; ++i) { - sheps[i].sorted_sheplist = - qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t)); - sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int)); - for (size_t j = 0, k = 0; j < nshepherds; ++j) { - if (j != i) { - assert(k < (nshepherds - 1)); - sheps[i].shep_dists[j] = 10; - sheps[i].sorted_sheplist[k++] = j; - } - } - // no need to sort; they're all equidistant - shuffle_sheps(sheps[i].sorted_sheplist, nshepherds - 1); - } - return QTHREAD_SUCCESS; -} /*}}} */ - -/* vim:set expandtab: */ diff --git a/src/alloc/base.c b/src/alloc/base.c index 451cfa30e..dec404402 100644 --- a/src/alloc/base.c +++ b/src/alloc/base.c @@ -12,7 +12,7 @@ #ifdef HAVE_GETPAGESIZE #include #else -static QINLINE int getpagesize() { return 4096; } +static inline int getpagesize() { return 4096; } #endif /* Internal Headers */ diff --git a/src/alloc/chapel.c b/src/alloc/chapel.c index 66a7c9ed1..135f42dc1 100644 --- a/src/alloc/chapel.c +++ b/src/alloc/chapel.c @@ -9,7 +9,7 @@ #ifdef HAVE_GETPAGESIZE #include #else -static QINLINE int getpagesize() { return 4096; } +static inline int getpagesize() { return 4096; } #endif #include "chpl-linefile-support.h" diff --git a/src/compat_atomics.c b/src/compat_atomics.c deleted file mode 100644 index 09b5eb3bd..000000000 --- a/src/compat_atomics.c +++ /dev/null @@ -1,116 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include - -#include "qt_asserts.h" -#include "qt_initialized.h" -#include "qt_profiling.h" -#include "qthread_innards.h" - -extern unsigned int QTHREAD_LOCKING_STRIPES; -#define QTHREAD_CHOOSE_STRIPE(addr) \ - (((size_t)addr >> 4) & (QTHREAD_LOCKING_STRIPES - 1)) - -#if defined(QTHREAD_MUTEX_INCREMENT) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) -uint32_t qthread_incr32_(uint32_t *op, int32_t const incr) { /*{{{ */ - unsigned int stripe = QTHREAD_CHOOSE_STRIPE(op); - uint32_t retval; - - QTHREAD_FEB_TIMER_DECLARATION(incr); - - assert(qthread_library_initialized); - - QTHREAD_COUNT_THREADS_BINCOUNTER(atomic, stripe); - QTHREAD_FEB_UNIQUERECORD(incr, op, qthread_internal_self()); - QTHREAD_FEB_TIMER_START(incr); - QTHREAD_FASTLOCK_LOCK(&(qlib->atomic_locks[stripe])); - retval = *op; - *op += incr; - QTHREAD_FASTLOCK_UNLOCK(&(qlib->atomic_locks[stripe])); - QTHREAD_FEB_TIMER_STOP(incr, qthread_internal_self()); - return retval; -} /*}}} */ - -uint64_t qthread_incr64_(uint64_t *op, int64_t const incr) { /*{{{ */ - unsigned int stripe = QTHREAD_CHOOSE_STRIPE(op); - uint64_t retval; - - QTHREAD_FEB_TIMER_DECLARATION(incr); - - assert(qthread_library_initialized); - - QTHREAD_COUNT_THREADS_BINCOUNTER(atomic, stripe); - QTHREAD_FEB_UNIQUERECORD(incr, op, qthread_internal_self()); - QTHREAD_FEB_TIMER_START(incr); - QTHREAD_FASTLOCK_LOCK(&(qlib->atomic_locks[stripe])); - retval = *op; - *op += incr; - QTHREAD_FASTLOCK_UNLOCK(&(qlib->atomic_locks[stripe])); - QTHREAD_FEB_TIMER_STOP(incr, qthread_internal_self()); - return retval; -} /*}}} */ - -double qthread_dincr_(double *op, double const incr) { /*{{{ */ - unsigned int stripe = QTHREAD_CHOOSE_STRIPE(op); - double retval; - - assert(qthread_library_initialized); - - QTHREAD_FASTLOCK_LOCK(&(qlib->atomic_locks[stripe])); - retval = *op; - *op += incr; - QTHREAD_FASTLOCK_UNLOCK(&(qlib->atomic_locks[stripe])); - return retval; -} /*}}} */ - -float qthread_fincr_(float *op, float const incr) { /*{{{ */ - unsigned int stripe = QTHREAD_CHOOSE_STRIPE(op); - float retval; - - assert(qthread_library_initialized); - - QTHREAD_FASTLOCK_LOCK(&(qlib->atomic_locks[stripe])); - retval = *op; - *op += incr; - QTHREAD_FASTLOCK_UNLOCK(&(qlib->atomic_locks[stripe])); - return retval; -} /*}}} */ - -uint32_t qthread_cas32_(uint32_t *operand, - uint32_t const oldval, - uint32_t const newval) { /*{{{ */ - uint32_t retval; - unsigned int stripe = QTHREAD_CHOOSE_STRIPE(operand); - - assert(qthread_library_initialized); - - QTHREAD_FASTLOCK_LOCK(&(qlib->atomic_locks[stripe])); - retval = *operand; - if (retval == oldval) { *operand = newval; } - QTHREAD_FASTLOCK_UNLOCK(&(qlib->atomic_locks[stripe])); - return retval; -} /*}}} */ - -uint64_t qthread_cas64_(uint64_t *operand, - uint64_t const oldval, - uint64_t const newval) { /*{{{ */ - uint64_t retval; - unsigned int stripe = QTHREAD_CHOOSE_STRIPE(operand); - - assert(qthread_library_initialized); - - QTHREAD_FASTLOCK_LOCK(&(qlib->atomic_locks[stripe])); - retval = *operand; - if (retval == oldval) { *operand = newval; } - QTHREAD_FASTLOCK_UNLOCK(&(qlib->atomic_locks[stripe])); - return retval; -} /*}}} */ - -#else /* if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) */ -#error Building this file erroneously. -#endif /* if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == \ - QTHREAD_POWERPC32) */ -/* vim:set expandtab: */ diff --git a/src/ds/qarray.c b/src/ds/qarray.c index 3329e04f6..e83fed748 100644 --- a/src/ds/qarray.c +++ b/src/ds/qarray.c @@ -32,7 +32,7 @@ static aligned_t *chunk_distribution_tracker = NULL; /* this function is for DIST *ONLY*; it returns a pointer to the location that * the bookkeeping data is stored (i.e. the record of where this segment is * stored) */ -static QINLINE qthread_shepherd_id_t * +static inline qthread_shepherd_id_t * qarray_internal_segment_shep(qarray const *a, void const *segment_head) { /*{{{ */ char *ptr = (((char *)segment_head) + (a->segment_size * a->unit_size)); @@ -47,7 +47,7 @@ qarray_internal_segment_shep(qarray const *a, return (qthread_shepherd_id_t *)ptr; } /*}}} */ -static QINLINE qthread_shepherd_id_t qarray_internal_segment_shep_read( +static inline qthread_shepherd_id_t qarray_internal_segment_shep_read( qarray const *a, void const *segment_head) { /*{{{*/ qthread_shepherd_id_t retval; qthread_shepherd_id_t *ptr = qarray_internal_segment_shep(a, segment_head); @@ -58,7 +58,7 @@ static QINLINE qthread_shepherd_id_t qarray_internal_segment_shep_read( return retval; } /*}}}*/ -static QINLINE void +static inline void qarray_internal_segment_shep_write(qarray const *a, void const *segment_head, qthread_shepherd_id_t shep) { /*{{{*/ @@ -317,9 +317,6 @@ static qarray *qarray_create_internal(size_t const count, break; } if (ret->base_ptr == NULL) { -#ifdef QTHREAD_HAVE_LIBNUMA - numa_error("allocating qarray body"); -#endif } #else /* ifdef QTHREAD_HAVE_MEM_AFFINITY */ /* For speed, we want page-aligned memory, if we can get it */ diff --git a/src/ds/qdqueue.c b/src/ds/qdqueue.c index 884e34a2b..c170c78e6 100644 --- a/src/ds/qdqueue.c +++ b/src/ds/qdqueue.c @@ -8,10 +8,6 @@ #include #endif -#ifdef QTHREAD_HAVE_LIBNUMA -#include -#endif - #include #include #include diff --git a/src/feb.c b/src/feb.c index ae0c4ba19..852e06266 100644 --- a/src/feb.c +++ b/src/feb.c @@ -38,9 +38,6 @@ static qt_hash *FEBs; #ifdef QTHREAD_COUNT_THREADS aligned_t *febs_stripes; -#ifdef QTHREAD_MUTEX_INCREMENT -QTHREAD_FASTLOCK_TYPE *febs_stripes_locks; -#endif #endif /******************************************************************** @@ -73,19 +70,19 @@ typedef struct { /******************************************************************** * Local Prototypes *********************************************************************/ -static QINLINE void qthread_gotlock_fill(qthread_shepherd_t *shep, +static inline void qthread_gotlock_fill(qthread_shepherd_t *shep, qthread_addrstat_t *m, void *maddr); -static QINLINE void +static inline void qthread_gotlock_fill_inner(qthread_shepherd_t *shep, qthread_addrstat_t *m, void *maddr, uint_fast8_t const recursive, qthread_addrres_t **precond_tasks); -static QINLINE void qthread_gotlock_empty(qthread_shepherd_t *shep, +static inline void qthread_gotlock_empty(qthread_shepherd_t *shep, qthread_addrstat_t *m, void *maddr); -static QINLINE void +static inline void qthread_gotlock_empty_inner(qthread_shepherd_t *shep, qthread_addrstat_t *m, void *maddr, @@ -119,18 +116,11 @@ static void qt_feb_subsystem_shutdown(void) { #ifdef QTHREAD_COUNT_THREADS print_status( "bin %i used %u times for FEBs\n", i, (unsigned int)febs_stripes[i]); -#ifdef QTHREAD_MUTEX_INCREMENT - QTHREAD_FASTLOCK_DESTROY(febs_stripes_locks[i]); -#endif #endif } FREE(FEBs, sizeof(qt_hash) * QTHREAD_LOCKING_STRIPES); #ifdef QTHREAD_COUNT_THREADS FREE(febs_stripes, sizeof(aligned_t) * QTHREAD_LOCKING_STRIPES); -#ifdef QTHREAD_MUTEX_INCREMENT - FREE(febs_stripes_locks, - sizeof(QTHREAD_FASTLOCK_TYPE) * QTHREAD_LOCKING_STRIPES); -#endif #endif #if !defined(UNPOOLED_ADDRSTAT) && !defined(UNPOOLED) qt_mpool_destroy(generic_addrstat_pool); @@ -156,18 +146,10 @@ void INTERNAL qt_feb_subsystem_init(uint_fast8_t need_sync) { #ifdef QTHREAD_COUNT_THREADS febs_stripes = MALLOC(sizeof(aligned_t) * QTHREAD_LOCKING_STRIPES); assert(febs_stripes); -#ifdef QTHREAD_MUTEX_INCREMENT - febs_stripes_locks = - MALLOC(sizeof(QTHREAD_FASTLOCK_TYPE) * QTHREAD_LOCKING_STRIPES); - assert(febs_stripes_locks); -#endif #endif /* ifdef QTHREAD_COUNT_THREADS */ for (unsigned i = 0; i < QTHREAD_LOCKING_STRIPES; i++) { #ifdef QTHREAD_COUNT_THREADS febs_stripes[i] = 0; -#ifdef QTHREAD_MUTEX_INCREMENT - QTHREAD_FASTLOCK_INIT(febs_stripes_locks[i]); -#endif #endif FEBs[i] = qt_hash_create(need_sync); assert(FEBs[i]); @@ -317,7 +299,7 @@ int API_FUNC qthread_feb_status(aligned_t const *addr) { /*{{{ */ /* this function removes the FEB data structure for the address maddr from the * hash table */ -static QINLINE void qthread_FEB_remove(void *maddr) { /*{{{ */ +static inline void qthread_FEB_remove(void *maddr) { /*{{{ */ qthread_addrstat_t *m; int const lockbin = QTHREAD_CHOOSE_STRIPE2(maddr); @@ -406,7 +388,7 @@ static QINLINE void qthread_FEB_remove(void *maddr) { /*{{{ */ } } /*}}} */ -static QINLINE void +static inline void qthread_precond_launch(qthread_shepherd_t *shep, qthread_addrres_t *precond_tasks) { /*{{{*/ qthread_addrres_t *precond_tail = @@ -435,7 +417,7 @@ qthread_precond_launch(qthread_shepherd_t *shep, } } /*}}}*/ -static QINLINE void +static inline void qthread_gotlock_empty_inner(qthread_shepherd_t *shep, qthread_addrstat_t *m, void *maddr, @@ -484,7 +466,7 @@ qthread_gotlock_empty_inner(qthread_shepherd_t *shep, } } /*}}} */ -static QINLINE void qthread_gotlock_empty(qthread_shepherd_t *shep, +static inline void qthread_gotlock_empty(qthread_shepherd_t *shep, qthread_addrstat_t *m, void *maddr) { qthread_addrres_t *tmp = NULL; @@ -492,7 +474,7 @@ static QINLINE void qthread_gotlock_empty(qthread_shepherd_t *shep, qthread_gotlock_empty_inner(shep, m, maddr, 0, &tmp); } -static QINLINE void +static inline void qthread_gotlock_fill_inner(qthread_shepherd_t *shep, qthread_addrstat_t *m, void *maddr, @@ -645,7 +627,7 @@ qthread_gotlock_fill_inner(qthread_shepherd_t *shep, } } /*}}} */ -static QINLINE void qthread_gotlock_fill(qthread_shepherd_t *shep, +static inline void qthread_gotlock_fill(qthread_shepherd_t *shep, qthread_addrstat_t *m, void *maddr) { qthread_addrres_t *tmp = NULL; diff --git a/src/mpool.c b/src/mpool.c index bc26992a4..dd9a7ed5d 100644 --- a/src/mpool.c +++ b/src/mpool.c @@ -108,7 +108,7 @@ void INTERNAL qt_mpool_subsystem_init(void) { } /* local funcs */ -static QINLINE void * +static inline void * qt_mpool_internal_aligned_alloc(size_t alloc_size, size_t alignment) { /*{{{ */ void *ret = qt_internal_aligned_alloc(alloc_size, alignment); @@ -116,7 +116,7 @@ qt_mpool_internal_aligned_alloc(size_t alloc_size, size_t alignment) { /*{{{ */ return ret; } /*}}} */ -static QINLINE void qt_mpool_internal_aligned_free(void *freeme, +static inline void qt_mpool_internal_aligned_free(void *freeme, size_t alignment) { /*{{{ */ qt_internal_aligned_free(freeme, alignment); } /*}}} */ diff --git a/src/qloop.c b/src/qloop.c index fa320501c..c4a6cf046 100644 --- a/src/qloop.c +++ b/src/qloop.c @@ -41,7 +41,7 @@ struct qloop_wrapper_args { void *sync; }; -static QINLINE void qt_loop_balance_inner(size_t const start, +static inline void qt_loop_balance_inner(size_t const start, size_t const stop, qt_loop_f const func, void *argptr, @@ -320,7 +320,7 @@ void API_FUNC qt_loop_sinc(size_t start, #define QT_LOOP_BALANCE_SIMPLE (1 << 0) -static QINLINE void qt_loop_balance_inner(size_t const start, +static inline void qt_loop_balance_inner(size_t const start, size_t const stop, qt_loop_f const func, void *argptr, @@ -561,7 +561,7 @@ static aligned_t qloopaccum_wrapper(void *restrict arg_void) { /*{{{ */ return 0; } /*}}} */ -static QINLINE void qt_loopaccum_balance_inner(size_t const start, +static inline void qt_loopaccum_balance_inner(size_t const start, size_t const stop, size_t const size, void *restrict out, @@ -798,7 +798,7 @@ static int qqloop_get_iterations_guided( } } /*}}} */ -static QINLINE int qqloop_get_iterations_factored( +static inline int qqloop_get_iterations_factored( qqloop_iteration_queue_t *restrict const iq, struct qqloop_static_args *restrict const sa, struct qqloop_wrapper_range *restrict const range) { /*{{{ */ @@ -856,7 +856,7 @@ static QINLINE int qqloop_get_iterations_factored( } } /*}}} */ -static QINLINE int qqloop_get_iterations_chunked( +static inline int qqloop_get_iterations_chunked( qqloop_iteration_queue_t *restrict const iq, struct qqloop_static_args *restrict const sa, struct qqloop_wrapper_range *restrict const range) { /*{{{ */ @@ -878,7 +878,7 @@ static QINLINE int qqloop_get_iterations_chunked( return retval; } /*}}} */ -static QINLINE int qqloop_get_iterations_timed( +static inline int qqloop_get_iterations_timed( qqloop_iteration_queue_t *restrict const iq, struct qqloop_static_args *restrict const sa, struct qqloop_wrapper_range *restrict const range) { /*{{{ */ @@ -936,7 +936,7 @@ static QINLINE int qqloop_get_iterations_timed( } } /*}}} */ -static QINLINE qqloop_iteration_queue_t * +static inline qqloop_iteration_queue_t * qqloop_create_iq(size_t const startat, size_t const stopat, size_t const step, @@ -971,7 +971,7 @@ qqloop_create_iq(size_t const startat, return iq; } /*}}} */ -static QINLINE void qqloop_destroy_iq(qqloop_iteration_queue_t *iq) { /*{{{ */ +static inline void qqloop_destroy_iq(qqloop_iteration_queue_t *iq) { /*{{{ */ assert(iq); switch (iq->type) { case TIMED: { diff --git a/src/qthread.c b/src/qthread.c index 819dcacf7..ee9fdd42d 100644 --- a/src/qthread.c +++ b/src/qthread.c @@ -83,14 +83,6 @@ #include "qt_output_macros.h" #include "qt_subsystems.h" -#if !(defined(HAVE_GCC_INLINE_ASSEMBLY) && \ - (QTHREAD_SIZEOF_ALIGNED_T == 4 || \ - QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32)) && \ - !defined(QTHREAD_ATOMIC_CAS) && !defined(QTHREAD_MUTEX_INCREMENT) -#warning QTHREAD_MUTEX_INCREMENT not defined. It probably should be. -#define QTHREAD_MUTEX_INCREMENT 1 -#endif - #ifdef QTHREAD_PERFORMANCE #define WKR_DBG 1 #include "qthread/logging.h" @@ -133,13 +125,13 @@ static void qthread_wrapper(unsigned int high, unsigned int low); static void qthread_wrapper(void *ptr); #endif -static QINLINE void qthread_makecontext(qt_context_t *const c, +static inline void qthread_makecontext(qt_context_t *const c, void *const stack, size_t const stacksize, void (*func)(void), void const *const arg, qt_context_t *const returnc); -static QINLINE qthread_t *qthread_thread_new(qthread_f f, +static inline qthread_t *qthread_thread_new(qthread_f f, void const *arg, size_t arg_size, void *ret, @@ -175,7 +167,7 @@ qt_mpool generic_big_qthread_pool = NULL; #if defined(UNPOOLED_STACKS) || defined(UNPOOLED) #ifdef QTHREAD_GUARD_PAGES -static QINLINE void *ALLOC_STACK(void) { /*{{{ */ +static inline void *ALLOC_STACK(void) { /*{{{ */ if (GUARD_PAGES) { uint8_t *tmp = qt_internal_aligned_alloc( qlib->qthread_stack_size + sizeof(struct qthread_runtime_data_s) + @@ -203,7 +195,7 @@ static QINLINE void *ALLOC_STACK(void) { /*{{{ */ } } /*}}} */ -static QINLINE void FREE_STACK(void *t) { /*{{{ */ +static inline void FREE_STACK(void *t) { /*{{{ */ if (GUARD_PAGES) { uint8_t *tmp = t; @@ -234,7 +226,7 @@ static QINLINE void FREE_STACK(void *t) { /*{{{ */ #else /* if defined(UNPOOLED_STACKS) || defined(UNPOOLED) */ static qt_mpool generic_stack_pool = NULL; #ifdef QTHREAD_GUARD_PAGES -static QINLINE void *ALLOC_STACK(void) { /*{{{ */ +static inline void *ALLOC_STACK(void) { /*{{{ */ if (GUARD_PAGES) { uint8_t *tmp = qt_mpool_alloc(generic_stack_pool); @@ -254,7 +246,7 @@ static QINLINE void *ALLOC_STACK(void) { /*{{{ */ } } /*}}} */ -static QINLINE void FREE_STACK(void *t) { /*{{{ */ +static inline void FREE_STACK(void *t) { /*{{{ */ if (GUARD_PAGES) { assert(t); t = (uint8_t *)t - getpagesize(); @@ -350,7 +342,7 @@ int qthread_library_initialized = 0; void *shep0arg = NULL; #endif -static QINLINE void alloc_rdata(qthread_shepherd_t *me, qthread_t *t) { /*{{{*/ +static inline void alloc_rdata(qthread_shepherd_t *me, qthread_t *t) { /*{{{*/ void *stack = NULL; struct qthread_runtime_data_s *rdata; @@ -911,16 +903,6 @@ int API_FUNC qthread_initialize(void) { /*{{{ */ qlib = (qlib_t)MALLOC(sizeof(struct qlib_s)); qassert_ret(qlib, QTHREAD_MALLOC_ERROR); -#if defined(QTHREAD_MUTEX_INCREMENT) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) - qlib->atomic_locks = - MALLOC(sizeof(QTHREAD_FASTLOCK_TYPE) * QTHREAD_LOCKING_STRIPES); - qassert_ret(qlib->atomic_locks, QTHREAD_MALLOC_ERROR); - for (i = 0; i < QTHREAD_LOCKING_STRIPES; i++) { - QTHREAD_FASTLOCK_INIT(qlib->atomic_locks[i]); - } -#endif - qt_internal_alignment_init(); qt_hash_initialize_subsystem(); @@ -970,10 +952,6 @@ int API_FUNC qthread_initialize(void) { /*{{{ */ (qt_threadqueue_t **)MALLOC(nshepherds * sizeof(qt_threadqueue_t *)); #endif /* ifdef QTHREAD_LOCAL_PRIORITY */ qassert_ret(qlib->shepherds, QTHREAD_MALLOC_ERROR); -#ifdef QTHREAD_MUTEX_INCREMENT - QTHREAD_FASTLOCK_INIT(qlib->nshepherds_active_lock); - QTHREAD_FASTLOCK_INIT(qlib->nworkers_active_lock); -#endif qt_mpool_subsystem_init(); @@ -1112,9 +1090,6 @@ int API_FUNC qthread_initialize(void) { /*{{{ */ qlib->local_priority_queues[i] = qlib->shepherds[i].local_priority_queue; #endif /* ifdef QTHREAD_LOCAL_PRIORITY */ #ifdef QTHREAD_FEB_PROFILING -#ifdef QTHREAD_MUTEX_INCREMENT - qlib->shepherds[i].uniqueincraddrs = qt_hash_create(need_sync); -#endif qlib->shepherds[i].uniquelockaddrs = qt_hash_create(need_sync); qlib->shepherds[i].uniquefebaddrs = qt_hash_create(need_sync); #endif @@ -1348,7 +1323,7 @@ int API_FUNC qthread_initialize(void) { /*{{{ */ /* This initializes a context (c) to run the function (func) with a single * argument (arg). This is just a wrapper around makecontext that isolates some * of the portability garbage. */ -static QINLINE void qthread_makecontext(qt_context_t *const c, +static inline void qthread_makecontext(qt_context_t *const c, void *const stack, size_t const stacksize, void (*func)(void), @@ -1701,11 +1676,6 @@ void API_FUNC qthread_finalize(void) { /*{{{ */ shep->idle_maxtime); #endif #ifdef QTHREAD_FEB_PROFILING -#ifdef QTHREAD_MUTEX_INCREMENT - QTHREAD_ACCUM_MAX(shep0->incr_maxtime, shep->incr_maxtime); - shep0->incr_time += shep->incr_time; - shep0->incr_count += shep->incr_count; -#endif QTHREAD_ACCUM_MAX(shep0->aquirelock_maxtime, shep->aquirelock_maxtime); shep0->aquirelock_time += shep->aquirelock_time; shep0->aquirelock_count += shep->aquirelock_count; @@ -1724,11 +1694,6 @@ void API_FUNC qthread_finalize(void) { /*{{{ */ shep0->empty_time += shep->empty_time; shep0->empty_count += shep->empty_count; qthread_debug(CORE_DETAILS, "destroying hashes\n"); -#ifdef QTHREAD_MUTEX_INCREMENT - qt_hash_callback( - shep->uniqueincraddrs, qthread_unique_collect, shep0->uniqueincraddrs); - qt_hash_destroy(shep->uniqueincraddrs); -#endif qt_hash_callback( shep->uniquelockaddrs, qthread_unique_collect, shep0->uniquelockaddrs); qt_hash_destroy(shep->uniquelockaddrs); @@ -1751,15 +1716,6 @@ void API_FUNC qthread_finalize(void) { /*{{{ */ } #ifdef QTHREAD_FEB_PROFILING -#ifdef QTHREAD_MUTEX_INCREMENT - print_status( - "%llu increments performed (%ld unique), average %g secs, max %g secs\n", - (unsigned long long)shep0->incr_count, - qt_hash_count(shep0->uniqueincraddrs), - (shep0->incr_count == 0) ? 0 : (shep0->incr_time / shep0->incr_count), - shep0->incr_maxtime); - qt_hash_destroy(shep0->uniqueincraddrs); -#endif print_status("%ld unique addresses used with FEB, blocked %g secs\n", qt_hash_count(shep0->uniquefebaddrs), (shep0->febblock_count == 0) ? 0 : shep0->febblock_time); @@ -1787,16 +1743,6 @@ void API_FUNC qthread_finalize(void) { /*{{{ */ #ifdef LOCK_FREE_FEBS extern unsigned int QTHREAD_LOCKING_STRIPES; QTHREAD_LOCKING_STRIPES = 1; -#elif defined(QTHREAD_MUTEX_INCREMENT) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) - extern unsigned int QTHREAD_LOCKING_STRIPES; - for (i = 0; i < QTHREAD_LOCKING_STRIPES; i++) { - QTHREAD_FASTLOCK_DESTROY(qlib->atomic_locks[i]); - } -#endif -#ifdef QTHREAD_MUTEX_INCREMENT - QTHREAD_FASTLOCK_DESTROY(qlib->nshepherds_active_lock); - QTHREAD_FASTLOCK_DESTROY(qlib->nworkers_active_lock); #endif #ifdef QTHREAD_COUNT_THREADS print_status("spawned %lu threads, max realized concurrency %lu, avg " @@ -1838,11 +1784,6 @@ void API_FUNC qthread_finalize(void) { /*{{{ */ tmp->func(); FREE(tmp, sizeof(struct qt_cleanup_funcs_s)); } -#if defined(QTHREAD_MUTEX_INCREMENT) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) - FREE((void *)qlib->atomic_locks, - sizeof(QTHREAD_FASTLOCK_TYPE) * QTHREAD_LOCKING_STRIPES); -#endif for (i = 0; i < qlib->nshepherds; ++i) { qthread_debug( @@ -2130,7 +2071,7 @@ aligned_t API_FUNC *qthread_retloc(void) { /*{{{ */ /************************************************************/ /* functions to manage thread stack allocation/deallocation */ /************************************************************/ -static QINLINE qthread_t *qthread_thread_new(qthread_f const f, +static inline qthread_t *qthread_thread_new(qthread_f const f, void const *arg, size_t arg_size, void *ret, diff --git a/src/syncvar.c b/src/syncvar.c index 3bb69967e..f3df80fe7 100644 --- a/src/syncvar.c +++ b/src/syncvar.c @@ -31,15 +31,15 @@ #endif /* QTHREAD_USE_EUREKAS */ /* Internal Prototypes */ -static QINLINE void qthread_syncvar_gotlock_fill(qthread_shepherd_t *shep, +static inline void qthread_syncvar_gotlock_fill(qthread_shepherd_t *shep, qthread_addrstat_t *m, syncvar_t *maddr, uint64_t const ret); -static QINLINE void qthread_syncvar_gotlock_empty(qthread_shepherd_t *shep, +static inline void qthread_syncvar_gotlock_empty(qthread_shepherd_t *shep, qthread_addrstat_t *m, syncvar_t *maddr, uint64_t const ret); -static QINLINE void qthread_syncvar_remove(void *maddr); +static inline void qthread_syncvar_remove(void *maddr); /* Internal Structs */ typedef struct { @@ -77,9 +77,6 @@ typedef struct { static qt_hash *syncvars; #ifdef QTHREAD_COUNT_THREADS extern aligned_t *febs_stripes; -#ifdef QTHREAD_MUTEX_INCREMENT -extern QTHREAD_FASTLOCK_TYPE *febs_stripes_locks; -#endif #endif extern unsigned int QTHREAD_LOCKING_STRIPES; @@ -1018,7 +1015,7 @@ int API_FUNC qthread_syncvar_readFE_nb(uint64_t *restrict dest, return QTHREAD_SUCCESS; } /*}}} */ -static QINLINE void qthread_syncvar_schedule(qthread_t *waiter, +static inline void qthread_syncvar_schedule(qthread_t *waiter, qthread_shepherd_t *shep) { /*{{{*/ assert(waiter); assert(shep); @@ -1037,7 +1034,7 @@ static QINLINE void qthread_syncvar_schedule(qthread_t *waiter, } } /*}}}*/ -static QINLINE void qthread_syncvar_remove(void *maddr) { /*{{{*/ +static inline void qthread_syncvar_remove(void *maddr) { /*{{{*/ int const lockbin = QTHREAD_CHOOSE_STRIPE(maddr); qthread_addrstat_t *m; @@ -1114,7 +1111,7 @@ static QINLINE void qthread_syncvar_remove(void *maddr) { /*{{{*/ } } /*}}}*/ -static QINLINE void qthread_syncvar_gotlock_empty(qthread_shepherd_t *shep, +static inline void qthread_syncvar_gotlock_empty(qthread_shepherd_t *shep, qthread_addrstat_t *m, syncvar_t *maddr, uint64_t const sf) { /*{{{ */ @@ -1145,7 +1142,7 @@ static QINLINE void qthread_syncvar_gotlock_empty(qthread_shepherd_t *shep, if (removeable) { qthread_syncvar_remove(maddr); } } /*}}} */ -static QINLINE void qthread_syncvar_gotlock_fill(qthread_shepherd_t *shep, +static inline void qthread_syncvar_gotlock_fill(qthread_shepherd_t *shep, qthread_addrstat_t *m, syncvar_t *maddr, uint64_t const ret) { /*{{{ */ diff --git a/src/threadqueues/distrib_threadqueues.c b/src/threadqueues/distrib_threadqueues.c index 5b0b7a169..ec4d02c03 100644 --- a/src/threadqueues/distrib_threadqueues.c +++ b/src/threadqueues/distrib_threadqueues.c @@ -100,18 +100,18 @@ static void free_threadqueue(qt_threadqueue_t *t) { qt_mpool_free(generic_threadqueue_pools.queues, t); } -static QINLINE qt_threadqueue_node_t *alloc_tqnode(void) { +static inline qt_threadqueue_node_t *alloc_tqnode(void) { return (qt_threadqueue_node_t *)qt_mpool_alloc( generic_threadqueue_pools.nodes); } -static QINLINE void free_tqnode(qt_threadqueue_node_t *t) { +static inline void free_tqnode(qt_threadqueue_node_t *t) { qt_mpool_free(generic_threadqueue_pools.nodes, t); } extern qt_mpool generic_qthread_pool; -static QINLINE void free_qthread(qthread_t *t) { +static inline void free_qthread(qthread_t *t) { return qt_mpool_free(generic_qthread_pool, t); } diff --git a/src/threadqueues/lifo_threadqueues.c b/src/threadqueues/lifo_threadqueues.c deleted file mode 100644 index b23e7f59f..000000000 --- a/src/threadqueues/lifo_threadqueues.c +++ /dev/null @@ -1,349 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* System Headers */ -#include -#include -#include - -/* Internal Headers */ -#include "qt_asserts.h" -#include "qt_atomics.h" -#include "qt_debug.h" -#include "qt_macros.h" -#include "qt_prefetch.h" -#include "qt_qthread_struct.h" -#include "qt_threadqueues.h" -#include "qt_visibility.h" -#include "qthread/qthread.h" -#include "qthread_innards.h" /* for qlib */ -#ifdef QTHREAD_USE_EUREKAS -#include "qt_eurekas.h" -#endif /* QTHREAD_USE_EUREKAS */ -#include "qt_subsystems.h" - -/* Note: this queue is SAFE to use with multiple de-queuers, with the caveat - * that if you have multiple dequeuer's, you'll need to solve the ABA problem. - * (single dequeuer == no ABA problem) Also, yielding changes behavior a bit in - * the multiple-dequeuer case. - */ - -/* Data Structures */ -struct _qt_threadqueue_node { - struct _qt_threadqueue_node *next; - qthread_t *thread; -}; - -struct _qt_threadqueue { - qt_threadqueue_node_t *stack; - /* the following is for estimating a queue's "busy" level, and is not - * guaranteed accurate (that would be a race condition) */ - saligned_t advisory_queuelen; -#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE - uint32_t frustration; - QTHREAD_COND_DECL(trigger) -#endif -} /* qt_threadqueue_t */; - -/* Memory Management */ -#if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) -#define ALLOC_THREADQUEUE() (qt_threadqueue_t *)MALLOC(sizeof(qt_threadqueue_t)) -#define FREE_THREADQUEUE(t) FREE(t, sizeof(qt_threadqueue_t)) -#define ALLOC_TQNODE() \ - (qt_threadqueue_node_t *)MALLOC(sizeof(qt_threadqueue_node_t)) -#define FREE_TQNODE(t) FREE(t, sizeof(qt_threadqueue_node_t)) - -void INTERNAL qt_threadqueue_subsystem_init(void) {} -#else /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */ -qt_threadqueue_pools_t generic_threadqueue_pools = {NULL, NULL}; -#define ALLOC_THREADQUEUE() \ - (qt_threadqueue_t *)qt_mpool_alloc(generic_threadqueue_pools.queues) -#define FREE_THREADQUEUE(t) qt_mpool_free(generic_threadqueue_pools.queues, t) -#define ALLOC_TQNODE() \ - (qt_threadqueue_node_t *)qt_mpool_alloc(generic_threadqueue_pools.nodes) -#define FREE_TQNODE(t) qt_mpool_free(generic_threadqueue_pools.nodes, t) - -static void qt_threadqueue_subsystem_shutdown(void) { - qt_mpool_destroy(generic_threadqueue_pools.queues); - qt_mpool_destroy(generic_threadqueue_pools.nodes); -} - -void INTERNAL qt_threadqueue_subsystem_init(void) { - generic_threadqueue_pools.queues = qt_mpool_create(sizeof(qt_threadqueue_t)); - generic_threadqueue_pools.nodes = - qt_mpool_create_aligned(sizeof(qt_threadqueue_node_t), sizeof(void *)); - qthread_internal_cleanup(qt_threadqueue_subsystem_shutdown); -} -#endif /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */ - -/* Thankfully, a basic LIFO stack does not suffer from the ABA problem. */ - -qt_threadqueue_t INTERNAL *qt_threadqueue_new(void) { /*{{{*/ - qt_threadqueue_t *q = ALLOC_THREADQUEUE(); - - qassert_ret(q != NULL, NULL); - - q->stack = NULL; - q->advisory_queuelen = 0; -#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE - q->frustration = 0; - QTHREAD_COND_INIT(q->trigger); -#endif /* ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE */ - - return q; -} /*}}}*/ - -static qthread_t *qt_threadqueue_dequeue(qt_threadqueue_t *q) { /*{{{*/ - qt_threadqueue_node_t *retval = q->stack; - - if (retval != NULL) { - qt_threadqueue_node_t *old, *new; - -#ifdef QTHREAD_LIFO_MULTI_DEQUEUER -# error This dequeue function is not safe! retval may be freed before we dereference it to find the next ptr. Need to use hazardptrs. -#endif - do { - old = retval; - new = retval->next; - retval = qthread_cas_ptr(&q->stack, old, new); - } while (retval != old && retval != NULL); - } - if (retval != NULL) { - qthread_t *t = retval->thread; - FREE_TQNODE(retval); - (void)qthread_incr(&(q->advisory_queuelen), -1); - return t; - } else { - return NULL; - } -} /*}}}*/ - -void INTERNAL qt_threadqueue_free(qt_threadqueue_t *q) { /*{{{*/ - assert(q); - while (qt_threadqueue_dequeue(q)); -#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE - QTHREAD_COND_DESTROY(q->trigger); -#endif - FREE_THREADQUEUE(q); -} /*}}}*/ - -#ifdef QTHREAD_USE_SPAWNCACHE -qthread_t INTERNAL * -qt_threadqueue_private_dequeue(qt_threadqueue_private_t *c) { /*{{{*/ - return NULL; -} /*}}}*/ - -int INTERNAL -qt_threadqueue_private_enqueue(qt_threadqueue_private_t *restrict pq, - qt_threadqueue_t *restrict q, - qthread_t *restrict t) { /*{{{*/ - return 0; -} /*}}}*/ - -int INTERNAL qt_threadqueue_private_enqueue_yielded( - qt_threadqueue_private_t *restrict q, qthread_t *restrict t) { /*{{{*/ - return 0; -} /*}}}*/ - -void INTERNAL qt_threadqueue_enqueue_cache(qt_threadqueue_t *q, - qt_threadqueue_private_t *cache) {} - -void INTERNAL qt_threadqueue_private_filter( - qt_threadqueue_private_t *restrict c, qt_threadqueue_filter_f f) {} -#endif /* ifdef QTHREAD_USE_SPAWNCACHE */ - -void INTERNAL qt_threadqueue_enqueue(qt_threadqueue_t *restrict q, - qthread_t *restrict t) { /*{{{*/ - qt_threadqueue_node_t *old, *new; - qt_threadqueue_node_t *node; - - assert(q); - assert(t); - - qthread_debug(THREADQUEUE_CALLS, "q(%p), t(%p->%u)\n", q, t, t->thread_id); - - node = ALLOC_TQNODE(); - assert(node != NULL); - node->thread = t; - node->next = NULL; - - old = q->stack; /* should be an atomic read */ - do { - node->next = old; - new = qthread_cas_ptr(&(q->stack), old, node); - if (new != old) { - old = new; - } else { - break; - } - } while (1); - (void)qthread_incr(&(q->advisory_queuelen), 1); - - /* awake waiter */ -#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE - if (q->frustration) { - QTHREAD_COND_LOCK(q->trigger); - if (q->frustration) { - q->frustration = 0; - QTHREAD_COND_SIGNAL(q->trigger); - } - QTHREAD_COND_UNLOCK(q->trigger); - } -#endif -} /*}}}*/ - -void INTERNAL qt_threadqueue_enqueue_yielded(qt_threadqueue_t *restrict q, - qthread_t *restrict t) { /*{{{*/ - assert(q); - assert(t); - -#ifdef QTHREAD_LIFO_MULTI_DEQUEUER - qthread_t *top = qt_threadqueue_dequeue(q); - qt_threadqueue_enqueue(q, t); - if (top) { qt_threadqueue_enqueue(q, top); } -#else - /* THIS is not safe for multiple dequeuers */ - qt_threadqueue_node_t *cursor = q->stack; - if (cursor) { - qt_threadqueue_node_t *node; - while (cursor->next) { cursor = cursor->next; } - assert(cursor->next == NULL); - /* alloc the node */ - node = ALLOC_TQNODE(); - assert(node != NULL); - node->thread = t; - node->next = NULL; - /* append the node */ - cursor->next = node; - (void)qthread_incr(&(q->advisory_queuelen), 1); - } else { - qt_threadqueue_enqueue(q, t); - } -#endif /* ifdef QTHREAD_LIFO_MULTI_DEQUEUER */ -} /*}}}*/ - -ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/ - assert(q); - return q->advisory_queuelen; -} /*}}}*/ - -qthread_t INTERNAL * -qt_scheduler_get_thread(qt_threadqueue_t *q, - qt_threadqueue_private_t *Q_UNUSED(qc), - uint_fast8_t Q_UNUSED(active)) { /*{{{*/ -#ifdef QTHREAD_USE_EUREKAS - qt_eureka_disable(); -#endif /* QTHREAD_USE_EUREKAS */ - qthread_t *retval = qt_threadqueue_dequeue(q); - - qthread_debug(THREADQUEUE_CALLS, "q(%p)\n", q); - if (retval == NULL) { -#ifdef QTHREAD_USE_EUREKAS - qt_eureka_check(0); -#endif /* QTHREAD_USE_EUREKAS */ - while (q->stack == NULL) { -#ifndef QTHREAD_CONDWAIT_BLOCKING_QUEUE - SPINLOCK_BODY(); -#else - COMPILER_FENCE; - if (qthread_incr(&q->frustration, 1) > 1000) { - QTHREAD_COND_LOCK(q->trigger); - if (q->frustration > 1000) { QTHREAD_COND_WAIT(q->trigger); } - QTHREAD_COND_UNLOCK(q->trigger); - } -#endif /* ifdef USE_HARD_POLLING */ - } -#ifdef QTHREAD_USE_EUREKAS - qt_eureka_disable(); -#endif /* QTHREAD_USE_EUREKAS */ - retval = qt_threadqueue_dequeue(q); - } - assert(retval); - qthread_debug(THREADQUEUE_BEHAVIOR, - "found thread %u (%p); q(%p)\n", - retval->thread_id, - retval, - q); - return retval; -} /*}}}*/ - -/* walk queue removing all tasks matching this description */ -void INTERNAL qt_threadqueue_filter(qt_threadqueue_t *q, - qt_threadqueue_filter_f f) { /*{{{*/ - qt_threadqueue_node_t *curs, **ptr; - - assert(q != NULL); - - curs = q->stack; - ptr = &q->stack; - while (curs) { - qthread_t *t = curs->thread; - switch (f(t)) { - case IGNORE_AND_CONTINUE: // ignore, move on - ptr = &curs->next; - curs = curs->next; - break; - case IGNORE_AND_STOP: // ignore, stop looking - return; - - case REMOVE_AND_CONTINUE: // remove, move on - { - qt_threadqueue_node_t *freeme = curs; - -#ifdef QTHREAD_USE_EUREKAS - qthread_internal_assassinate(t); -#endif /* QTHREAD_USE_EUREKAS */ - *ptr = curs->next; - curs = curs->next; - FREE_TQNODE(freeme); - break; - } - case REMOVE_AND_STOP: // remove, stop looking; -#ifdef QTHREAD_USE_EUREKAS - qthread_internal_assassinate(t); -#endif /* QTHREAD_USE_EUREKAS */ - *ptr = curs->next; - FREE_TQNODE(curs); - return; - } - } -} /*}}}*/ - -/* some place-holder functions */ -void INTERNAL qthread_steal_stat(void) {} - -void INTERNAL qthread_steal_enable(void) {} - -void INTERNAL qthread_steal_disable(void) {} - -void INTERNAL qthread_cas_steal_stat(void) {} - -qthread_shepherd_id_t INTERNAL -qt_threadqueue_choose_dest(qthread_shepherd_t *curr_shep) { - qthread_shepherd_id_t dest_shep_id = 0; - - if (curr_shep) { - dest_shep_id = curr_shep->sched_shepherd++; - curr_shep->sched_shepherd *= (qlib->nshepherds > (dest_shep_id + 1)); - } else { - dest_shep_id = (qthread_shepherd_id_t)qthread_internal_incr_mod( - &qlib->sched_shepherd, qlib->nshepherds, &qlib->sched_shepherd_lock); - } - - return dest_shep_id; -} - -qthread_t INTERNAL *qt_threadqueue_dequeue_specific(qt_threadqueue_t *q, - void *value) { - return NULL; -} - -size_t INTERNAL qt_threadqueue_policy(const enum threadqueue_policy policy) { - switch (policy) { - case SINGLE_WORKER: return THREADQUEUE_POLICY_TRUE; - default: return THREADQUEUE_POLICY_UNSUPPORTED; - } -} - -/* vim:set expandtab: */ diff --git a/src/threadqueues/mtsfifo_threadqueues.c b/src/threadqueues/mtsfifo_threadqueues.c deleted file mode 100644 index 7ed8f93fc..000000000 --- a/src/threadqueues/mtsfifo_threadqueues.c +++ /dev/null @@ -1,484 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* System Headers */ -#include - -/* Internal Headers */ -#include "qt_asserts.h" -#include "qt_atomics.h" -#include "qt_debug.h" -#include "qt_macros.h" -#include "qt_prefetch.h" -#include "qt_qthread_struct.h" -#include "qt_shepherd_innards.h" -#include "qt_threadqueues.h" -#include "qt_visibility.h" -#include "qthread/qthread.h" -#include "qthread_innards.h" /* for qlib */ -#if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) -#include "qt_alloc.h" -#endif -#ifdef QTHREAD_USE_EUREKAS -#include "qt_eurekas.h" -#endif /* QTHREAD_USE_EUREKAS */ -#include "qt_subsystems.h" - -/* Data Structures */ -struct _qt_threadqueue_node { - struct _qt_threadqueue_node *next; - qthread_t *value; -} /* qt_threadqueue_node_t */; - -struct _qt_threadqueue { - qt_threadqueue_node_t *head; - qt_threadqueue_node_t *tail; -#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE - aligned_t fruitless; - QTHREAD_COND_DECL(trigger); -#endif /* CONDWAIT */ - /* the following is for estimating a queue's "busy" level, and is not - * guaranteed accurate (that would be a race condition) */ - saligned_t advisory_queuelen; -} /* qt_threadqueue_t */; - -/* Memory Management */ -#if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) -#define ALLOC_THREADQUEUE() (qt_threadqueue_t *)MALLOC(sizeof(qt_threadqueue_t)) -#define FREE_THREADQUEUE(t) FREE(t, sizeof(qt_threadqueue_t)) - -static QINLINE void ALLOC_TQNODE(qt_threadqueue_node_t **ret) { /*{{{ */ - *ret = (qt_threadqueue_node_t *)qt_internal_aligned_alloc( - sizeof(qt_threadqueue_node_t), 16); - if (*ret != NULL) { memset(*ret, 0, sizeof(qt_threadqueue_node_t)); } -} /*}}} */ - -static void FREE_TQNODE(void *p) { - FREE_SCRIBBLE(p, sizeof(qt_threadqueue_node_t)); - qt_internal_aligned_free(p, 16); -} - -void INTERNAL qt_threadqueue_subsystem_init(void) {} -#else /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */ -qt_threadqueue_pools_t generic_threadqueue_pools; -#define ALLOC_THREADQUEUE() \ - (qt_threadqueue_t *)qt_mpool_alloc(generic_threadqueue_pools.queues) -#define FREE_THREADQUEUE(t) qt_mpool_free(generic_threadqueue_pools.queues, t) - -static QINLINE void ALLOC_TQNODE(qt_threadqueue_node_t **ret) { /*{{{ */ - *ret = - (qt_threadqueue_node_t *)qt_mpool_alloc(generic_threadqueue_pools.nodes); - if (*ret != NULL) { memset(*ret, 0, sizeof(qt_threadqueue_node_t)); } -} /*}}} */ - -static void FREE_TQNODE(void *p) { - qt_mpool_free(generic_threadqueue_pools.nodes, p); -} - -static void qt_threadqueue_subsystem_shutdown(void) { - qt_mpool_destroy(generic_threadqueue_pools.nodes); - qt_mpool_destroy(generic_threadqueue_pools.queues); -} - -void INTERNAL qt_threadqueue_subsystem_init(void) { - generic_threadqueue_pools.nodes = - qt_mpool_create_aligned(sizeof(qt_threadqueue_node_t), 16); - generic_threadqueue_pools.queues = qt_mpool_create(sizeof(qt_threadqueue_t)); - qthread_internal_cleanup(qt_threadqueue_subsystem_shutdown); -} - -#endif /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */ - -ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/ - return qthread_internal_atomic_read_s(&q->advisory_queuelen, - &q->advisory_queuelen_m); -} /*}}}*/ - -/*****************************************/ -/* functions to manage the thread queues */ -/*****************************************/ - -// This lock-free algorithm borrowed from -// http://www.research.ibm.com/people/m/michael/podc-1996.pdf -// ... and modified to use hazard ptrs according to -// http://www.research.ibm.com/people/m/michael/ieeetpds-2004.pdf - -qt_threadqueue_t INTERNAL *qt_threadqueue_new(void) { /*{{{ */ - qt_threadqueue_t *q = ALLOC_THREADQUEUE(); - - if (q != NULL) { -#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE - q->fruitless = 0; - QTHREAD_COND_INIT(q->trigger); -#endif /* ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE */ - ALLOC_TQNODE(((qt_threadqueue_node_t **)&(q->head))); - assert(q->head != NULL); - if (q->head == NULL) { // if we're not using asserts, fail nicely -#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE - QTHREAD_COND_DESTROY(q->trigger); -#endif - FREE_THREADQUEUE(q); - q = NULL; - } - q->tail = q->head; - q->tail->next = NULL; - } - return q; -} /*}}} */ - -static qthread_t *qt_threadqueue_dequeue(qt_threadqueue_t *q) { /*{{{ */ - qthread_t *p = NULL; - - qt_threadqueue_node_t *head; - qt_threadqueue_node_t *tail; - qt_threadqueue_node_t *next_ptr; - - assert(q != NULL); - while (1) { - head = q->head; - - hazardous_ptr(0, head); - if (head != q->head) { - continue; // are head, tail, and next consistent? - } - - tail = q->tail; - next_ptr = head->next; - - hazardous_ptr(1, next_ptr); - - if (next_ptr == NULL) { - return NULL; // queue is empty - } - if (head == tail) { // tail is falling behind! - (void)qt_cas((void **)&(q->tail), - (void *)tail, - next_ptr); // advance tail ptr - continue; - } - // read value before CAS, otherwise another dequeue might free the next node - p = next_ptr->value; - if (qt_cas((void **)&(q->head), (void *)head, next_ptr) == head) { - break; // success! - } - } - hazardous_release_node(FREE_TQNODE, head); - if (p != NULL) { - Q_PREFETCH(p); - (void)qthread_internal_incr_s( - &q->advisory_queuelen, &q->advisory_queuelen_m, -1); - } - return p; -} /*}}} */ - -void INTERNAL qt_threadqueue_free(qt_threadqueue_t *q) { /*{{{ */ - while (q->head != q->tail) { qt_threadqueue_dequeue(q); } - assert(q->head == q->tail); -#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE - QTHREAD_COND_DESTROY(q->trigger); -#endif - FREE_TQNODE((qt_threadqueue_node_t *)q->head); - FREE_THREADQUEUE(q); -} /*}}} */ - -#ifdef QTHREAD_USE_SPAWNCACHE -qthread_t INTERNAL * -qt_threadqueue_private_dequeue(qt_threadqueue_private_t *c) { - return NULL; -} - -int INTERNAL -qt_threadqueue_private_enqueue(qt_threadqueue_private_t *restrict pq, - qt_threadqueue_t *restrict q, - qthread_t *restrict t) { - return 0; -} - -int INTERNAL qt_threadqueue_private_enqueue_yielded( - qt_threadqueue_private_t *restrict q, qthread_t *restrict t) { - return 0; -} - -void INTERNAL qt_threadqueue_enqueue_cache(qt_threadqueue_t *q, - qt_threadqueue_private_t *cache) {} - -void INTERNAL qt_threadqueue_private_filter( - qt_threadqueue_private_t *restrict c, qt_threadqueue_filter_f f) {} -#endif /* ifdef QTHREAD_USE_SPAWNCACHE */ - -void INTERNAL qt_threadqueue_enqueue(qt_threadqueue_t *restrict q, - qthread_t *restrict t) { /*{{{ */ - qt_threadqueue_node_t *tail; - qt_threadqueue_node_t *next; - qt_threadqueue_node_t *node; - - assert(t != NULL); - assert(q != NULL); - qthread_debug(THREADQUEUE_CALLS, - "q(%p), t(%p:%i): began head:%p tail:%p\n", - q, - t, - t->thread_id, - q->head, - q->tail); - - ALLOC_TQNODE(&node); - assert(node != NULL); - - node->value = t; - node->next = NULL; - - while (1) { - qthread_debug(THREADQUEUE_DETAILS, - "q(%p), t(%p:%i): reading q->tail\n", - q, - t, - t->thread_id); - tail = q->tail; - - hazardous_ptr(0, tail); - if (tail != q->tail) { - continue; // are tail and next consistent? - } - - next = tail->next; - if (next != NULL) { // tail not pointing to last node - (void)qt_cas((void **)&(q->tail), - (void *)tail, - next); // ABA hazard (mitigated by QCOMPOSE) - continue; - } - // tail must be pointing to the last node - if (qt_cas((void **)&(tail->next), (void *)next, node) == next) { - break; // success! - } - } - (void)qt_cas((void **)&(q->tail), (void *)tail, node); - qthread_debug(THREADQUEUE_DETAILS, - "q(%p), t(%p:%i): appended head:%p nextptr:%p tail:%p\n", - q, - t, - t->thread_id, - q->head, - q->head ? q->head->next : NULL, - q->tail); - - (void)qthread_incr(&q->advisory_queuelen, 1); -#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE - if (q->fruitless) { - QTHREAD_COND_LOCK(q->trigger); - if (q->fruitless) { - q->fruitless = 0; - QTHREAD_BCAST(q->trigger); - } - QTHREAD_COND_UNLOCK(q->trigger); - } -#endif - hazardous_ptr(0, - NULL); // release the ptr (avoid hazardptr resource exhaustion) -} /*}}} */ - -void qt_threadqueue_enqueue_yielded(qt_threadqueue_t *restrict q, - qthread_t *restrict t) { /*{{{*/ - qt_threadqueue_enqueue(q, t); -} /*}}}*/ - -qthread_t INTERNAL * -qt_scheduler_get_thread(qt_threadqueue_t *q, - qt_threadqueue_private_t *Q_UNUSED(qc), - uint_fast8_t Q_UNUSED(active)) { /*{{{ */ - qthread_t *p = NULL; - - qt_threadqueue_node_t *head; - qt_threadqueue_node_t *tail; - qt_threadqueue_node_t *next_ptr; - - assert(q != NULL); - qthread_debug(THREADQUEUE_CALLS, "q(%p): began\n", q); -#ifdef QTHREAD_USE_EUREKAS - qt_eureka_disable(); -#endif /* QTHREAD_USE_EUREKAS */ - qthread_debug(THREADQUEUE_DETAILS, - "q(%p): head=%p next_ptr=%p tail=%p\n", - q, - q->head, - q->head ? q->head->next : NULL, - q->tail); - while (1) { - head = q->head; - - hazardous_ptr(0, head); - if (head != q->head) { - continue; // are head, tail, and next consistent? - } - - tail = q->tail; - next_ptr = head->next; - - hazardous_ptr(1, next_ptr); - - if (next_ptr == NULL) { // queue is empty -#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE - if (qthread_internal_incr(&q->fruitless, &q->fruitless_m, 1) > 1000) { -#ifdef QTHREAD_USE_EUREKAS - qt_eureka_check(0); -#endif /* QTHREAD_USE_EUREKAS */ - QTHREAD_COND_LOCK(q->trigger); - while (q->fruitless > 1000) { QTHREAD_COND_WAIT(q->trigger); } - QTHREAD_COND_UNLOCK(q->trigger); -#ifdef QTHREAD_USE_EUREKAS - qt_eureka_disable(); -#endif /* QTHREAD_USE_EUREKAS */ - } else { -#ifdef QTHREAD_USE_EUREKAS - qt_eureka_check(0); -#endif /* QTHREAD_USE_EUREKAS */ -#ifdef HAVE_PTHREAD_YIELD - pthread_yield(); -#elif HAVE_SHED_YIELD - sched_yield(); -#endif -#ifdef QTHREAD_USE_EUREKAS - qt_eureka_disable(); -#endif /* QTHREAD_USE_EUREKAS */ - } -#else /* ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE */ -#ifdef QTHREAD_USE_EUREKAS - qt_eureka_check(1); -#endif /* QTHREAD_USE_EUREKAS */ - SPINLOCK_BODY(); -#endif /* ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE */ - continue; - } - qthread_debug(THREADQUEUE_DETAILS, "q(%p): next_ptr = %p\n", q, next_ptr); - if (head == tail) { // tail is falling behind - (void)qt_cas((void **)&(q->tail), - (void *)tail, - next_ptr); // advance tail ptr - } - // read value before CAS, otherwise another dequeue might free the next node - p = next_ptr->value; - if (qt_cas((void **)&(q->head), (void *)head, next_ptr) == head) { - break; // success! - } - } - qthread_debug(THREADQUEUE_DETAILS, - "q(%p): found a thread! p=%p:%i\n", - q, - p, - p->thread_id); - hazardous_release_node(FREE_TQNODE, head); - if (p != NULL) { - (void)qthread_internal_incr_s( - &q->advisory_queuelen, &q->advisory_queuelen_m, -1); - } - return p; -} /*}}} */ - -/* walk queue removing all tasks matching this description */ -void INTERNAL qt_threadqueue_filter(qt_threadqueue_t *q, - qt_threadqueue_filter_f f) { /*{{{*/ - qt_threadqueue_node_t *curs, **ptr; - - qthread_debug(THREADQUEUE_CALLS, - "q(%p), f(%p): began head:%p next:%p tail:%p\n", - q, - f, - q->head, - q->head ? q->head->next : NULL, - q->tail); - - assert(q != NULL); - do { - curs = q->head; - if (curs == NULL) { return; } - hazardous_ptr(0, curs); - COMPILER_FENCE; - } while (curs != q->head); - ptr = &curs->next; - curs = curs->next; - hazardous_ptr(1, curs); - while (curs) { - qthread_t *t = curs->value; - switch (f(t)) { - case IGNORE_AND_CONTINUE: // ignore, move on - hazardous_ptr(0, curs); - ptr = &curs->next; - curs = curs->next; - hazardous_ptr(1, curs); - continue; - case IGNORE_AND_STOP: // ignore, stop looking - return; - - case REMOVE_AND_CONTINUE: // remove, move on - { - qt_threadqueue_node_t *freeme = curs; - -#ifdef QTHREAD_USE_EUREKAS - qthread_internal_assassinate(t); -#endif /* QTHREAD_USE_EUREKAS */ - if (curs->next == NULL) { - /* this is clever: since 'next' is the first field, its - * address is the address of the entire structure */ - q->tail = (qt_threadqueue_node_t *)ptr; - } - *ptr = curs->next; - curs = curs->next; - hazardous_ptr(1, curs); - hazardous_release_node(FREE_TQNODE, freeme); - } - continue; - case REMOVE_AND_STOP: // remove, stop looking -#ifdef QTHREAD_USE_EUREKAS - qthread_internal_assassinate(t); -#endif /* QTHREAD_USE_EUREKAS */ - if (curs->next == NULL) { - /* this is clever: since 'next' is the first field, its - * address is the address of the entire structure */ - q->tail = (qt_threadqueue_node_t *)ptr; - } - *ptr = curs->next; - hazardous_release_node(FREE_TQNODE, curs); - return; - } - } -} /*}}}*/ - -/* some place-holder functions */ -void INTERNAL qthread_steal_stat(void) {} - -void INTERNAL qthread_steal_enable(void) {} - -void INTERNAL qthread_steal_disable(void) {} - -void INTERNAL qthread_cas_steal_stat(void) {} - -qthread_shepherd_id_t INTERNAL -qt_threadqueue_choose_dest(qthread_shepherd_t *curr_shep) { - qthread_shepherd_id_t dest_shep_id = 0; - - if (curr_shep) { - dest_shep_id = curr_shep->sched_shepherd++; - curr_shep->sched_shepherd *= (qlib->nshepherds > (dest_shep_id + 1)); - } else { - dest_shep_id = (qthread_shepherd_id_t)qthread_internal_incr_mod( - &qlib->sched_shepherd, qlib->nshepherds, &qlib->sched_shepherd_lock); - } - - return dest_shep_id; -} - -qthread_t INTERNAL *qt_threadqueue_dequeue_specific(qt_threadqueue_t *q, - void *value) { - return NULL; -} - -size_t INTERNAL qt_threadqueue_policy(const enum threadqueue_policy policy) { - switch (policy) { - case SINGLE_WORKER: return THREADQUEUE_POLICY_TRUE; - default: return THREADQUEUE_POLICY_UNSUPPORTED; - } -} - -/* vim:set expandtab: */ diff --git a/src/threadqueues/mutexfifo_threadqueues.c b/src/threadqueues/mutexfifo_threadqueues.c deleted file mode 100644 index 950e51bc8..000000000 --- a/src/threadqueues/mutexfifo_threadqueues.c +++ /dev/null @@ -1,317 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* System Headers */ -#include -#include -#include -#include - -/* API Headers */ -#include "qthread/qthread.h" - -/* Internal Headers */ -#include "qt_asserts.h" -#include "qt_debug.h" -#include "qt_macros.h" -#include "qt_prefetch.h" -#include "qt_qthread_struct.h" -#include "qt_shepherd_innards.h" -#include "qt_threadqueues.h" -#include "qt_visibility.h" -#include "qthread_innards.h" /* for qlib */ -#ifdef QTHREAD_USE_EUREKAS -#include "qt_eurekas.h" -#endif /* QTHREAD_USE_EUREKAS */ -#include "qt_subsystems.h" - -/* Data Structures */ -struct _qt_threadqueue_node { - struct _qt_threadqueue_node *next; - qthread_t *value; -} /* qt_threadqueue_node_t */; - -struct _qt_threadqueue { - qt_threadqueue_node_t *head; - qt_threadqueue_node_t *tail; - QTHREAD_FASTLOCK_TYPE head_lock; - QTHREAD_FASTLOCK_TYPE tail_lock; - QTHREAD_FASTLOCK_TYPE advisory_queuelen_m; - /* the following is for estimating a queue's "busy" level, and is not - * guaranteed accurate (that would be a race condition) */ - saligned_t advisory_queuelen; -} /* qt_threadqueue_t */; - -/* Memory Management */ -#if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) -#define ALLOC_THREADQUEUE() (qt_threadqueue_t *)MALLOC(sizeof(qt_threadqueue_t)) -#define FREE_THREADQUEUE(t) FREE(t, sizeof(qt_threadqueue_t)) -#define ALLOC_TQNODE() \ - (qt_threadqueue_node_t *)MALLOC(sizeof(qt_threadqueue_node_t)) -#define FREE_TQNODE(t) FREE(t, sizeof(qt_threadqueue_node_t)) - -void INTERNAL qt_threadqueue_subsystem_init(void) {} -#else /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */ -qt_threadqueue_pools_t generic_threadqueue_pools; -#define ALLOC_THREADQUEUE() \ - (qt_threadqueue_t *)qt_mpool_alloc(generic_threadqueue_pools.queues) -#define FREE_THREADQUEUE(t) qt_mpool_free(generic_threadqueue_pools.queues, t) -#define ALLOC_TQNODE() \ - (qt_threadqueue_node_t *)qt_mpool_alloc(generic_threadqueue_pools.nodes) -#define FREE_TQNODE(t) qt_mpool_free(generic_threadqueue_pools.nodes, t) - -static void qt_threadqueue_subsystem_shutdown(void) { /*{{{*/ - qt_mpool_destroy(generic_threadqueue_pools.nodes); - qt_mpool_destroy(generic_threadqueue_pools.queues); -} /*}}}*/ - -void INTERNAL qt_threadqueue_subsystem_init(void) { /*{{{*/ - generic_threadqueue_pools.nodes = - qt_mpool_create(sizeof(qt_threadqueue_node_t)); - generic_threadqueue_pools.queues = qt_mpool_create(sizeof(qt_threadqueue_t)); - qthread_internal_cleanup(qt_threadqueue_subsystem_shutdown); -} /*}}}*/ -#endif /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */ - -ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/ - return qthread_internal_atomic_read_s(&q->advisory_queuelen, - &q->advisory_queuelen_m); -} /*}}}*/ - -#define QTHREAD_INITLOCK(l) \ - do { \ - if (pthread_mutex_init(l, NULL) != 0) { return QTHREAD_PTHREAD_ERROR; } \ - } while (0) -#define QTHREAD_LOCK(l) qassert(pthread_mutex_lock(l), 0) -#define QTHREAD_UNLOCK(l) qassert(pthread_mutex_unlock(l), 0) -// #define QTHREAD_DESTROYLOCK(l) do { int __ret__ = pthread_mutex_destroy(l); -// if (__ret__ != 0) fprintf(stderr, "pthread_mutex_destroy(%p) returned %i -// (%s)\n", l, __ret__, strerror(__ret__)); assert(__ret__ == 0); } while (0) -#define QTHREAD_DESTROYLOCK(l) qassert(pthread_mutex_destroy(l), 0) -#define QTHREAD_DESTROYCOND(l) qassert(pthread_cond_destroy(l), 0) - -/*****************************************/ -/* functions to manage the thread queues */ -/*****************************************/ - -qt_threadqueue_t INTERNAL *qt_threadqueue_new(void) { /*{{{ */ - qt_threadqueue_t *q = ALLOC_THREADQUEUE(); - - if (q != NULL) { - QTHREAD_FASTLOCK_INIT(q->head_lock); - QTHREAD_FASTLOCK_INIT(q->tail_lock); - QTHREAD_FASTLOCK_INIT(q->advisory_queuelen_m); - q->advisory_queuelen = 0; - q->head = ALLOC_TQNODE(); - assert(q->head != NULL); - if (q->head == NULL) { - QTHREAD_FASTLOCK_DESTROY(q->advisory_queuelen_m); - QTHREAD_FASTLOCK_DESTROY(q->head_lock); - QTHREAD_FASTLOCK_DESTROY(q->tail_lock); - FREE_THREADQUEUE(q); - q = NULL; - } else { - q->tail = q->head; - q->head->next = NULL; - q->head->value = NULL; - } - } - return q; -} /*}}} */ - -static qthread_t *qt_threadqueue_dequeue(qt_threadqueue_t *q) { /*{{{ */ - qthread_t *p = NULL; - - qt_threadqueue_node_t *node, *new_head; - - assert(q != NULL); - QTHREAD_FASTLOCK_LOCK(&q->head_lock); - { - node = q->head; - new_head = node->next; - if (new_head != NULL) { - p = new_head->value; - q->head = new_head; - } - } - QTHREAD_FASTLOCK_UNLOCK(&q->head_lock); - if (p != NULL) { - Q_PREFETCH(p); - (void)qthread_internal_incr_s( - &q->advisory_queuelen, &q->advisory_queuelen_m, -1); - } - return p; -} /*}}} */ - -void INTERNAL qt_threadqueue_free(qt_threadqueue_t *q) { /*{{{ */ - while (q->head != q->tail) { qt_threadqueue_dequeue(q); } - QTHREAD_FASTLOCK_DESTROY(q->head_lock); - QTHREAD_FASTLOCK_DESTROY(q->tail_lock); - QTHREAD_FASTLOCK_DESTROY(q->advisory_queuelen_m); - FREE_TQNODE((qt_threadqueue_node_t *)(q->head)); - FREE_THREADQUEUE(q); -} /*}}} */ - -#ifdef QTHREAD_USE_SPAWNCACHE -qthread_t INTERNAL * -qt_threadqueue_private_dequeue(qt_threadqueue_private_t *c) { /*{{{*/ - return NULL; -} /*}}}*/ - -int INTERNAL -qt_threadqueue_private_enqueue(qt_threadqueue_private_t *restrict pq, - qt_threadqueue_t *restrict q, - qthread_t *restrict t) { /*{{{*/ - return 0; -} /*}}}*/ - -int INTERNAL qt_threadqueue_private_enqueue_yielded( - qt_threadqueue_private_t *restrict q, qthread_t *restrict t) { /*{{{*/ - return 0; -} /*}}}*/ - -void INTERNAL qt_threadqueue_enqueue_cache(qt_threadqueue_t *q, - qt_threadqueue_private_t *cache) {} - -void INTERNAL qt_threadqueue_private_filter( - qt_threadqueue_private_t *restrict c, qt_threadqueue_filter_f f) {} -#endif /* ifdef QTHREAD_USE_SPAWNCACHE */ - -void INTERNAL qt_threadqueue_enqueue(qt_threadqueue_t *restrict q, - qthread_t *restrict t) { /*{{{ */ - qt_threadqueue_node_t *node; - - node = ALLOC_TQNODE(); - assert(node != NULL); - node->value = t; - node->next = NULL; - QTHREAD_FASTLOCK_LOCK(&q->tail_lock); - { - q->tail->next = node; - q->tail = node; - } - QTHREAD_FASTLOCK_UNLOCK(&q->tail_lock); - (void)qthread_internal_incr_s( - &q->advisory_queuelen, &q->advisory_queuelen_m, 1); -} /*}}} */ - -void qt_threadqueue_enqueue_yielded(qt_threadqueue_t *restrict q, - qthread_t *restrict t) { /*{{{*/ - qt_threadqueue_enqueue(q, t); -} /*}}}*/ - -/* this function is amusing, but the point is to avoid unnecessary bus traffic - * by allowing idle shepherds to sit for a while while still allowing for - * low-overhead for busy shepherds. This is a hybrid approach: normally, it - * functions as a spinlock, but if it spins too much, it waits for a signal */ -qthread_t INTERNAL * -qt_scheduler_get_thread(qt_threadqueue_t *q, - qt_threadqueue_private_t *Q_UNUSED(qc), - uint_fast8_t Q_UNUSED(active)) { /*{{{ */ - qthread_t *p = NULL; - -#ifdef QTHREAD_USE_EUREKAS - qt_eureka_disable(); -#endif /* QTHREAD_USE_EUREKAS */ - while ((p = qt_threadqueue_dequeue(q)) == NULL) { -#ifdef QTHREAD_USE_EUREKAS - qt_eureka_check(1); -#endif /* QTHREAD_USE_EUREKAS */ - SPINLOCK_BODY(); - } - return p; -} /*}}} */ - -/* walk queue removing all tasks matching this description */ -void INTERNAL qt_threadqueue_filter(qt_threadqueue_t *q, - qt_threadqueue_filter_f f) { /*{{{*/ - QTHREAD_FASTLOCK_LOCK(&q->head_lock); - { - qt_threadqueue_node_t *curs = q->head->next; - qt_threadqueue_node_t **ptr = &q->head->next; - - while (curs) { - qthread_t *t = curs->value; - switch (f(t)) { - case IGNORE_AND_CONTINUE: // ignore, move on - ptr = &curs->next; - curs = curs->next; - break; - case IGNORE_AND_STOP: // ignore, stop looking - curs = NULL; - continue; - case REMOVE_AND_CONTINUE: // remove, move on - { - qt_threadqueue_node_t *tmp = curs; -#ifdef QTHREAD_USE_EUREKAS - qthread_internal_assassinate(t); -#endif /* QTHREAD_USE_EUREKAS */ - if (curs->next == NULL) { - /* this is clever: since 'next' is the first field, its - * address is the address of the entire structure */ - q->tail = (qt_threadqueue_node_t *)ptr; - } - *ptr = curs->next; - curs = curs->next; - FREE_TQNODE(tmp); - break; - } - case REMOVE_AND_STOP: // remove, stop looking - { -#ifdef QTHREAD_USE_EUREKAS - qthread_internal_assassinate(t); -#endif /* QTHREAD_USE_EUREKAS */ - if (curs->next == NULL) { - /* this is clever: since 'next' is the first field, its - * address is the address of the entire structure */ - q->tail = (qt_threadqueue_node_t *)ptr; - } - *ptr = curs->next; - FREE_TQNODE(curs); - curs = NULL; - continue; - } - } - } - } - QTHREAD_FASTLOCK_UNLOCK(&q->head_lock); -} /*}}}*/ - -/* some place-holder functions */ -void INTERNAL qthread_steal_stat(void) {} - -void INTERNAL qthread_steal_enable(void) {} - -void INTERNAL qthread_steal_disable(void) {} - -void INTERNAL qthread_cas_steal_stat(void) {} - -qthread_shepherd_id_t INTERNAL -qt_threadqueue_choose_dest(qthread_shepherd_t *curr_shep) { - qthread_shepherd_id_t dest_shep_id = 0; - - if (curr_shep) { - dest_shep_id = curr_shep->sched_shepherd++; - curr_shep->sched_shepherd *= (qlib->nshepherds > (dest_shep_id + 1)); - } else { - dest_shep_id = (qthread_shepherd_id_t)qthread_internal_incr_mod( - &qlib->sched_shepherd, qlib->nshepherds, &qlib->sched_shepherd_lock); - } - - return dest_shep_id; -} - -qthread_t INTERNAL *qt_threadqueue_dequeue_specific(qt_threadqueue_t *q, - void *value) { - return NULL; -} - -size_t INTERNAL qt_threadqueue_policy(const enum threadqueue_policy policy) { - switch (policy) { - case SINGLE_WORKER: return THREADQUEUE_POLICY_TRUE; - default: return THREADQUEUE_POLICY_UNSUPPORTED; - } -} - -/* vim:set expandtab: */ diff --git a/src/threadqueues/nottingham_threadqueues.c b/src/threadqueues/nottingham_threadqueues.c deleted file mode 100644 index 23dfc62d7..000000000 --- a/src/threadqueues/nottingham_threadqueues.c +++ /dev/null @@ -1,963 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* System Headers */ -#include -#include -#include -#include -#include - -/* Internal Headers */ -#include "56reader-rwlock.h" -#include "qt_asserts.h" -#include "qt_envariables.h" -#include "qt_macros.h" -#include "qt_prefetch.h" -#include "qt_qthread_struct.h" -#include "qt_shepherd_innards.h" -#include "qt_threadqueues.h" -#include "qt_visibility.h" -#include "qthread/qthread.h" -#include "qthread_innards.h" /* for qlib */ - -#ifndef NOINLINE -#define NOINLINE __attribute__((noinline)) -#endif - -/* Data Structures */ -struct uint128 { - uint64_t lo; - uint64_t hi; -}; - -typedef struct uint128 uint128_t; - -struct _qt_threadqueue_entry { - qthread_t *value; - uint32_t index; - uint32_t counter; -}; - -typedef struct _qt_threadqueue_entry qt_threadqueue_entry_t; - -typedef __m128i m128i; - -typedef union { - m128i sse; - qt_threadqueue_entry_t entry; -} qt_threadqueue_union_t; - -struct _qt_threadqueue { - m128i top; - m128i blanks[3]; - m128i *base; - uint32_t size; - uint32_t bottom; - rwlock_t *rwlock; - QTHREAD_FASTLOCK_TYPE spinlock; - - /* used for the work stealing queue implementation */ - m128i flush[1]; - uint32_t empty; - uint32_t stealing; - uint32_t steal_disable; -} /* qt_threadqueue_t */; - -// Forward declarations - -void INTERNAL qt_threadqueue_enqueue_multiple(qt_threadqueue_t *q, - int stealcount, - qthread_t **stealbuffer, - qthread_shepherd_t *shep); - -INTERNAL int qt_threadqueue_dequeue_steal(qt_threadqueue_t *q, - qthread_t **nostealbuffer, - qthread_t **stealbuffer); - -void INTERNAL qt_threadqueue_resize_and_enqueue(qt_threadqueue_t *q, - qthread_t *t); - -int static QINLINE qt_threadqueue_stealable(qthread_t *t); - -qthread_t static QINLINE *qt_threadqueue_dequeue_helper(qt_threadqueue_t *q); - -void INTERNAL qt_threadqueue_enqueue_unstealable(qt_threadqueue_t *q, - qthread_t **nostealbuffer, - int amtNotStolen); - -void INTERNAL qt_threadqueue_subsystem_init(void) {} - -#ifdef CAS_STEAL_PROFILE -static void cas_profile_update(int id, int retries) { - uint64_strip_t *cas_steal_profile = qlib->cas_steal_profile; - - if (cas_steal_profile == NULL) { return; } - if (retries >= CAS_STEAL_PROFILE_LENGTH) { - cas_steal_profile[id].fields[CAS_STEAL_PROFILE_LENGTH - 1]++; - } else { - cas_steal_profile[id].fields[retries]++; - } -} - -#else /* ifdef CAS_STEAL_PROFILE */ -#define cas_profile_update(x, y) \ - do { \ - } while (0) -#endif /* ifdef CAS_STEAL_PROFILE */ - -ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/ - return 0; -} /*}}}*/ - -/*****************************************/ -/* functions to manage the thread queues */ -/*****************************************/ - -static QINLINE long qthread_steal_chunksize(void); -static QINLINE qthread_t *qthread_steal(qt_threadqueue_t *thiefq); - -qt_threadqueue_t INTERNAL *qt_threadqueue_new(void) { /*{{{*/ - qt_threadqueue_t *q; - - posix_memalign((void **)&q, 64, sizeof(qt_threadqueue_t)); - - if (q != NULL) { - qt_threadqueue_union_t top; - top.entry.value = NULL; - top.entry.index = 0; - top.entry.counter = 0; - - q->top = top.sse; - q->size = 1024; - q->bottom = 0; - q->empty = 1; - q->stealing = 0; - QTHREAD_FASTLOCK_INIT(q->spinlock); - posix_memalign((void **)&(q->base), 64, q->size * sizeof(m128i)); - posix_memalign((void **)&(q->rwlock), 64, sizeof(rwlock_t)); - rwlock_init(q->rwlock); - memset(q->base, 0, q->size * sizeof(m128i)); - } - return q; -} /*}}}*/ - -void INTERNAL qt_threadqueue_free(qt_threadqueue_t *q) { /*{{{*/ - // mspiegel: is it necessary to drain the queue? - /* while (q->head != q->tail) { - * qt_scheduler_get_thread(q, 1); - * } */ - qt_free((void *)q->base); - qt_free((void *)q); -} /*}}}*/ - -static QINLINE int -qt_threadqueue_cas128(uint128_t *src, uint128_t *cmp, uint128_t *with) { /*{{{*/ - char result; - - // (AT&T syntax) - __asm__ __volatile__("lock; cmpxchg16b (%6);" - "setz %7; " - : "=a"(cmp->lo), "=d"(cmp->hi) - : "0"(cmp->lo), - "1"(cmp->hi), - "b"(with->lo), - "c"(with->hi), - "r"(src), - "m"(result) - : "cc", "memory"); - /* (Intel syntax) - * __asm__ __volatile__ - * ( - * "lock cmpxchg16b oword ptr %1\n\t" - * "setz %0" - * : "=q" ( result ) - * , "+m" ( *src ) - * , "+d" ( cmp->hi ) - * , "+a" ( cmp->lo ) - * : "c" ( with->hi ) - * , "b" ( with->lo ) - * : "cc" - * ); */ - return result; -} /*}}}*/ - -static QINLINE void -qt_threadqueue_finish(qt_threadqueue_t *q, - qt_threadqueue_entry_t top_entry) { /*{{{*/ - qt_threadqueue_union_t snapshot, oldnode; - - uint32_t index = top_entry.index; - - snapshot.sse = q->base[index]; - - oldnode.entry.value = snapshot.entry.value; - oldnode.entry.counter = top_entry.counter - 1; - oldnode.entry.index = top_entry.index = 0; - - if (snapshot.entry.counter > oldnode.entry.counter) { return; } - if (snapshot.entry.counter == top_entry.counter) { return; } - - qt_threadqueue_cas128((uint128_t *)q->base + index, - (uint128_t *)&oldnode, - (uint128_t *)&top_entry); -} /*}}}*/ - -#ifdef QTHREAD_USE_SPAWNCACHE -qthread_t INTERNAL * -qt_threadqueue_private_dequeue(qt_threadqueue_private_t *c) { - return NULL; -} - -int INTERNAL -qt_threadqueue_private_enqueue(qt_threadqueue_private_t *restrict pq, - qt_threadqueue_t *restrict q, - qthread_t *restrict t) { - return 0; -} - -int INTERNAL qt_threadqueue_private_enqueue_yielded( - qt_threadqueue_private_t *restrict q, qthread_t *restrict t) { - return 0; -} - -#endif /* ifdef QTHREAD_USE_SPAWNCACHE */ - -/* enqueue at tail */ -void INTERNAL qt_threadqueue_enqueue(qt_threadqueue_t *restrict q, - qthread_t *restrict t) { /*{{{*/ - qt_threadqueue_union_t oldtop, snapshot, lastchance; - qt_threadqueue_entry_t newtop; - uint32_t nextindex; - -#ifdef CAS_STEAL_PROFILE - int cycles = 0; -#endif - - int id = qthread_worker_unique(NULL); - - rwlock_rdlock(q->rwlock, id); - - oldtop.sse = q->top; - - while (1) { -#ifdef CAS_STEAL_PROFILE - cycles++; -#endif - - qt_threadqueue_finish(q, oldtop.entry); - - nextindex = (oldtop.entry.index + 1) % q->size; - - if (nextindex == q->bottom) { - // Pthread reader-writer locks will deadlock - // on lock promotion attempts. - rwlock_rdunlock(q->rwlock, id); - qt_threadqueue_resize_and_enqueue(q, t); - cas_profile_update(id, cycles - 1); - return; - } - - snapshot.sse = q->base[nextindex]; - newtop.value = t; - newtop.index = nextindex; - newtop.counter = snapshot.entry.counter + 1; - - lastchance.sse = q->top; - - if (lastchance.entry.counter != oldtop.entry.counter) { - oldtop.entry = lastchance.entry; - continue; - } - - if (qt_threadqueue_cas128( - (uint128_t *)&(q->top), (uint128_t *)&oldtop, (uint128_t *)&newtop)) { - break; - } - } - - q->empty = 0; - - rwlock_rdunlock(q->rwlock, id); - - cas_profile_update(id, cycles - 1); -} /*}}}*/ - -/* enqueue multiple (from steal) */ -void INTERNAL -qt_threadqueue_enqueue_multiple(qt_threadqueue_t *q, - int stealcount, - qthread_t **stealbuffer, - qthread_shepherd_t *shep) { /*{{{*/ - /* save element 0 for the thief */ - for (int i = 1; i < stealcount; i++) { - qthread_t *t = stealbuffer[i]; - t->target_shepherd = shep->shepherd_id; - qt_threadqueue_enqueue(q, t); - } -} /*}}}*/ - -/* This function is called when the queue is full. - * Either a thread is enqueuing at the tail, - * or a thread is enqueuing at the head. - * In both cases, we need to grow the array. - * PRECONDITION: the writer lock must be held. */ -static QINLINE void qt_threadqueue_resize(qt_threadqueue_t *q) { - // TODO: error checking has not been performed. - // If oldsize == UINT32_MAX, then indicate an error. - // If memory allocation returns NULL, then indicate an error. - - uint32_t oldsize = q->size, bottom = q->bottom; - uint32_t newsize = (oldsize > (UINT32_MAX / 2)) ? UINT32_MAX : oldsize * 2; - m128i *newloc; - qt_threadqueue_union_t top; - - qassert(posix_memalign((void **)&(newloc), 64, newsize * sizeof(m128i)), 0); - - assert(newsize > oldsize); - assert(newloc != NULL); - - uint32_t len1 = oldsize - bottom; - uint32_t len2 = bottom; - - m128i *dest1 = newloc; - m128i *dest2 = newloc + len1; - - m128i *src1 = q->base + bottom; - m128i *src2 = q->base; - - memcpy(dest1, src1, len1 * sizeof(m128i)); - memcpy(dest2, src2, len2 * sizeof(m128i)); - memset(newloc + oldsize, 0, (newsize - oldsize) * sizeof(m128i)); - - top.sse = q->top; - top.entry.counter = top.entry.counter + 1; - top.entry.index = oldsize - 1; - q->top = top.sse; - - q->base = newloc; - q->size = newsize; - q->bottom = 0; -} - -void INTERNAL qt_threadqueue_resize_and_enqueue(qt_threadqueue_t *q, - qthread_t *t) { /*{{{*/ - int id = qthread_worker_unique(NULL); - - rwlock_wrlock(q->rwlock, id); - - qt_threadqueue_union_t top, newtop, snapshot; - uint32_t nextindex, oldsize, bottom; - - top.sse = q->top; - - qt_threadqueue_finish(q, top.entry); - - oldsize = q->size; - bottom = q->bottom; - nextindex = (top.entry.index + 1) % oldsize; - - if (nextindex == bottom) { - qt_threadqueue_resize(q); - nextindex = oldsize; - } - - snapshot.sse = q->base[nextindex]; - - newtop.entry.value = t; - newtop.entry.index = nextindex; - newtop.entry.counter = snapshot.entry.counter + 1; - - snapshot.entry.value = t; - snapshot.entry.counter = snapshot.entry.counter + 1; - - q->top = newtop.sse; - q->base[nextindex] = snapshot.sse; - - q->empty = 0; - - rwlock_wrunlock(q->rwlock); -} /*}}}*/ - -/* yielded threads enqueue at head */ -void INTERNAL qt_threadqueue_enqueue_yielded(qt_threadqueue_t *restrict q, - qthread_t *restrict t) { /*{{{*/ - int id = qthread_worker_unique(NULL); - - rwlock_wrlock(q->rwlock, id); - - qt_threadqueue_union_t top; - - top.sse = q->top; - - qt_threadqueue_finish(q, top.entry); - - /* Three cases to consider: - * (a) The queue is empty. Move the - * new thread into q->top and return. - * (b) The queue is full. Resize the - * queue and then continue to part (c). - * (c) otherwise. - */ - if (top.entry.index == q->bottom) { - qt_threadqueue_union_t snapshot, newtop; - - uint32_t nextindex = (top.entry.index + 1) % q->size; - snapshot.sse = q->base[nextindex]; - uint32_t nextcounter = snapshot.entry.counter + 1; - - newtop.entry.index = nextindex; - newtop.entry.value = t; - newtop.entry.counter = nextcounter; - - snapshot.entry.value = t; - snapshot.entry.counter = nextcounter; - - q->top = newtop.sse; - q->base[nextindex] = snapshot.sse; - rwlock_wrunlock(q->rwlock); - return; - } else if ((top.entry.index + 1) % q->size == q->bottom) { - qt_threadqueue_resize(q); - } - - uint32_t bot = q->bottom, size = q->size; - uint32_t newbot = (bot - 1) % size; - - qt_threadqueue_union_t bottom, newbottom; - - bottom.sse = q->base[bot]; - newbottom.sse = q->base[newbot]; - bottom.entry.counter += 1; - newbottom.entry.counter += 1; - bottom.entry.value = t; - newbottom.entry.value = NULL; - - q->base[bot] = bottom.sse; - q->base[newbot] = newbottom.sse; - q->bottom = newbot; - - q->empty = 0; - - rwlock_wrunlock(q->rwlock); -} /*}}}*/ - -qthread_t static QINLINE *qt_threadqueue_dequeue_helper(qt_threadqueue_t *q) { - qthread_t *t = NULL; - - q->stealing = 1; - - QTHREAD_FASTLOCK_LOCK(&q->spinlock); - if (!(q->steal_disable) && (q->stealing)) { t = qthread_steal(q); } - QTHREAD_FASTLOCK_UNLOCK(&q->spinlock); - - return (t); -} - -/* dequeue at tail, unlike original qthreads implementation */ -qthread_t INTERNAL * -qt_scheduler_get_thread(qt_threadqueue_t *q, - qt_threadqueue_private_t *Q_UNUSED(qc), - uint_fast8_t active) { /*{{{*/ - qthread_t *t = NULL; - rwlock_t *rwlock = q->rwlock; - qt_threadqueue_union_t oldtop, lastchance; - -#ifdef CAS_STEAL_PROFILE - int cycles = 0; -#endif - - int id = qthread_worker_unique(NULL); - - assert(q != NULL); - - rwlock_rdlock(rwlock, id); - - oldtop.sse = q->top; - - while (1) { -#ifdef CAS_STEAL_PROFILE - cycles++; -#endif - - if (oldtop.entry.index == q->bottom) { - rwlock_rdunlock(rwlock, id); - if (active) { - t = qt_threadqueue_dequeue_helper(q); - if (t != NULL) { - cas_profile_update(id, cycles - 1); - return (t); - } - } - rwlock_rdlock(rwlock, id); - oldtop.sse = q->top; - } else { - t = oldtop.entry.value; - - if ((t->flags & QTHREAD_REAL_MCCOY)) { // only needs to be on worker 0 for - // termination - switch (qthread_worker(NULL)) { - case NO_WORKER: // only happens during termination -- keep trying - rwlock_rdunlock(rwlock, id); // release lock and get new value - rwlock_rdlock(rwlock, id); - oldtop.sse = q->top; - continue; - case 0: break; - default: - /* McCoy thread can only run on worker 0 */ - rwlock_rdunlock(rwlock, id); - if (active) { - t = qt_threadqueue_dequeue_helper(q); - if (t != NULL) { - cas_profile_update(id, cycles - 1); - return (t); - } - } - rwlock_rdlock(rwlock, id); - oldtop.sse = q->top; - continue; - } - } - - qt_threadqueue_finish(q, oldtop.entry); - - uint32_t previndex = (oldtop.entry.index - 1) % q->size; - - qt_threadqueue_entry_t newtop; - qt_threadqueue_union_t belowtop; - belowtop.sse = q->base[previndex]; - newtop.index = previndex; - newtop.value = belowtop.entry.value; - newtop.counter = belowtop.entry.counter + 1; - - lastchance.sse = q->top; - - if (lastchance.entry.counter != oldtop.entry.counter) { - oldtop.entry = lastchance.entry; - continue; - } - - if (qt_threadqueue_cas128((uint128_t *)&(q->top), - (uint128_t *)&oldtop, - (uint128_t *)&newtop)) { - rwlock_rdunlock(rwlock, id); - assert(t != NULL); - cas_profile_update(id, cycles - 1); - return (t); - } - } - } -} /*}}}*/ - -int static QINLINE qt_threadqueue_stealable(qthread_t *t) { - return (t->thread_state != QTHREAD_STATE_YIELDED && - t->thread_state != QTHREAD_STATE_TERM_SHEP && - !(t->flags & QTHREAD_UNSTEALABLE)); -} - -void INTERNAL qt_threadqueue_enqueue_unstealable(qt_threadqueue_t *q, - qthread_t **nostealbuffer, - int amtNotStolen) { - if (amtNotStolen == 0) { return; } - - uint32_t bottom = q->bottom; - - qt_threadqueue_union_t top; - top.sse = q->top; - - if (top.entry.index == bottom) { - top.entry.value = nostealbuffer[amtNotStolen - 1]; - q->top = top.sse; - } - - qt_threadqueue_union_t snapshot; - for (int i = amtNotStolen - 1; i >= 0; i--) { - snapshot.sse = q->base[bottom]; - snapshot.entry.value = nostealbuffer[i]; - q->base[bottom] = snapshot.sse; - bottom = (bottom - 1) % q->size; - } - q->bottom = bottom; -} - -/* dequeue stolen threads at head, skip yielded threads */ -INTERNAL int qt_threadqueue_dequeue_steal(qt_threadqueue_t *q, - qthread_t **nostealbuffer, - qthread_t **stealbuffer) { /*{{{ */ - assert(q != NULL); - - int amtStolen = 0, amtNotStolen = 0; - - int id = qthread_worker_unique(NULL); - - if (q->empty) { return (0); } - - rwlock_wrlock(q->rwlock, id); - - qt_threadqueue_union_t top; - - top.sse = q->top; - qt_threadqueue_finish(q, top.entry); - - uint32_t bottom = q->bottom; - uint32_t current = (bottom + 1) % q->size; - - qt_threadqueue_union_t snapshot; - - while (amtStolen < qthread_steal_chunksize()) { - snapshot.sse = q->base[current]; - - /* Three cases to consider: - * (a) The queue is empty. - * (b) The queue contains a single element. - * (c) Otherwise. - */ - if (bottom == top.entry.index) { - q->empty = 1; - break; - } else if (current == top.entry.index) { - qthread_t *candidate = top.entry.value; - if (qt_threadqueue_stealable(candidate)) { - stealbuffer[amtStolen++] = candidate; - snapshot.entry.value = NULL; - snapshot.entry.counter = snapshot.entry.counter + 1; - top.entry.value = NULL; - top.entry.counter = snapshot.entry.counter; - q->base[current] = snapshot.sse; - q->top = top.sse; - bottom = current; - } - q->empty = 1; - break; - } else { - qthread_t *candidate = snapshot.entry.value; - if (qt_threadqueue_stealable(candidate)) { - stealbuffer[amtStolen++] = candidate; - } else if (amtNotStolen == STEAL_BUFFER_LENGTH) { - abort(); // should never happen - } else { - nostealbuffer[amtNotStolen++] = candidate; - } - snapshot.entry.value = NULL; - snapshot.entry.counter = snapshot.entry.counter + 1; - q->base[current] = snapshot.sse; - bottom = current; - current = (current + 1) % q->size; - } - } - q->bottom = bottom; - - qt_threadqueue_enqueue_unstealable(q, nostealbuffer, amtNotStolen); - - rwlock_wrunlock(q->rwlock); - -#ifdef STEAL_PROFILE // should give mechanism to make steal profiling optional - qthread_incr(&q->steal_amount_stolen, amtStolen); -#endif - - return (amtStolen); -} /*}}} */ - -/* Returns the number of tasks to steal per steal operation (chunk size) */ -static QINLINE long qthread_steal_chunksize(void) { /*{{{*/ - static long chunksize = 0; - - if (chunksize == 0) { - chunksize = - qt_internal_get_env_num("STEAL_CHUNKSIZE", qlib->nworkerspershep, 1); - } - - return chunksize; -} /*}}}*/ - -/* Steal work from another shepherd's queue - * Returns the amount of work stolen - * PRECONDITION: the readlock must be aquired. - */ -static QINLINE qthread_t *qthread_steal(qt_threadqueue_t *thiefq) { /*{{{*/ - int i; - - extern TLS_DECL(qthread_shepherd_t *, shepherd_structs); - qthread_shepherd_t *victim_shepherd; - qthread_worker_t *worker = (qthread_worker_t *)TLS_GET(shepherd_structs); - qthread_shepherd_t *thief_shepherd = (qthread_shepherd_t *)worker->shepherd; - qthread_t **nostealbuffer = worker->nostealbuffer; - qthread_t **stealbuffer = worker->stealbuffer; - -#ifdef STEAL_PROFILE // should give mechanism to make steal profiling optional - qthread_incr(&thief_shepherd->steal_called, 1); -#endif -#ifdef QTHREAD_OMP_AFFINITY - if (thief_shepherd->stealing_mode == QTHREAD_STEAL_ON_ALL_IDLE) { - for (i = 0; i < qlib->nworkerspershep; i++) - if (thief_shepherd->workers[i].current != NULL) { - thiefq->stealing = 0; - return (NULL); - } - thief_shepherd->stealing_mode = QTHREAD_STEAL_ON_ANY_IDLE; - } -#endif -#ifdef STEAL_PROFILE // should give mechanism to make steal profiling optional - qthread_incr(&thief_shepherd->steal_attempted, 1); -#endif - int shepherd_offset = qthread_worker(NULL) % qlib->nshepherds; - for (i = 1; i < qlib->nshepherds; i++) { - shepherd_offset = (shepherd_offset + 1) % qlib->nshepherds; - if (shepherd_offset == thief_shepherd->shepherd_id) { - shepherd_offset = (shepherd_offset + 1) % qlib->nshepherds; - } - victim_shepherd = &qlib->shepherds[shepherd_offset]; - if (victim_shepherd->ready->empty) { continue; } - int amtStolen = qt_threadqueue_dequeue_steal( - victim_shepherd->ready, nostealbuffer, stealbuffer); - if (amtStolen > 0) { -#ifdef STEAL_PROFILE // should give mechanism to make steal profiling optional - qthread_incr(&thief_shepherd->steal_successful, 1); -#endif - qt_threadqueue_enqueue_multiple( - thiefq, amtStolen, stealbuffer, thief_shepherd); - thiefq->stealing = 0; - return (stealbuffer[0]); - } -#ifdef STEAL_PROFILE // should give mechanism to make steal profiling optional - else { - qthread_incr(&thief_shepherd->steal_failed, 1); - } -#endif - } - thiefq->stealing = 0; - return (NULL); -} /*}}}*/ - -#ifdef CAS_STEAL_PROFILE -void INTERNAL qthread_cas_steal_stat(void) { - int i, j; - uint64_strip_t accum; - uint64_t total = 0; - double weighted_sum = 0.0; - - for (j = 0; j < CAS_STEAL_PROFILE_LENGTH; j++) { accum.fields[j] = 0; } - for (i = 0; i < qlib->nshepherds * qlib->nworkerspershep; i++) { - for (j = 0; j < CAS_STEAL_PROFILE_LENGTH; j++) { - accum.fields[j] += qlib->cas_steal_profile[i].fields[j]; - } - } - for (j = 0; j < CAS_STEAL_PROFILE_LENGTH; j++) { - total += accum.fields[j]; - weighted_sum += (accum.fields[j] * j); - } - - fprintf(stdout, "threadqueue distribution of CAS retries\n"); - for (j = 0; j < (CAS_STEAL_PROFILE_LENGTH - 1); j++) { - fprintf( - stdout, "%d - %4.2f%%\n", j, ((double)accum.fields[j]) / total * 100.0); - } - fprintf( - stdout, "%d+ - %4.2f%%\n", j, ((double)accum.fields[j]) / total * 100.0); - fprintf(stdout, "approximate mean is %4.2f \n", weighted_sum / total); - fprintf(stdout, "\n"); -} - -#endif /* ifdef CAS_STEAL_PROFILE */ - -#ifdef STEAL_PROFILE // should give mechanism to make steal profiling optional -void INTERNAL qthread_steal_stat(void) { - int i; - - assert(qlib); - for (i = 0; i < qlib->nshepherds; i++) { - fprintf(stdout, - "shepherd %d - steals called %ld attempted %ld failed %ld " - "successful %ld work stolen %ld\n", - qlib->shepherds[i].shepherd_id, - qlib->shepherds[i].steal_called, - qlib->shepherds[i].steal_attempted, - qlib->shepherds[i].steal_failed, - qlib->shepherds[i].steal_successful, - qlib->shepherds[i].steal_amount_stolen); - } -} - -#endif /* ifdef STEAL_PROFILE */ - -/* walk queue looking for a specific value -- if found remove it (and start - * it running) -- if not return NULL - */ -qthread_t INTERNAL *qt_threadqueue_dequeue_specific(qt_threadqueue_t *q, - void *value) { /*{{{*/ - int id = qthread_worker_unique(NULL); - - assert(q != NULL); - - rwlock_wrlock(q->rwlock, id); - - qt_threadqueue_union_t top; - - top.sse = q->top; - qt_threadqueue_finish(q, top.entry); - - if (q->bottom == top.entry.index) { - rwlock_wrunlock(q->rwlock); - return (NULL); - } - - if (top.entry.value->ret == value) { - qt_threadqueue_union_t snapshot; - - qthread_t *retval = top.entry.value; - uint32_t previndex = (top.entry.index - 1) % q->size; - snapshot.sse = q->base[previndex]; - - top.entry.index = previndex; - top.entry.value = snapshot.entry.value; - top.entry.counter = snapshot.entry.counter; - - q->top = top.sse; - - rwlock_wrunlock(q->rwlock); - return (retval); - } else { - uint32_t current = (q->bottom + 1) % q->size; - uint32_t size = q->size; - uint32_t bottom = q->bottom; - while (current != top.entry.index) { - qt_threadqueue_union_t snapshot; - - snapshot.sse = q->base[current]; - qthread_t *t = snapshot.entry.value; - if (t->ret == value) { - /* Two cases: - * (i) The current index is below the top - * index in the array. It is easier to - * slide the elements above current, and - * decrement the top index. - * (ii) The current index is above the top - * index in the array. It is easier to - * slide the elements below current, and - * increment the bottom index. - */ - if (current < top.entry.index) { - memmove((void *)(q->base + current), - (void const *)(q->base + current + 1), - (top.entry.index - current - 1) * - sizeof(qt_threadqueue_entry_t)); - top.entry.index = (top.entry.index - 1) % q->size; - q->top = top.sse; - } else { - memmove((void *)(q->base + bottom + 1), - (void const *)(q->base + bottom), - (current - bottom) * sizeof(qt_threadqueue_entry_t)); - q->bottom = (bottom + 1) % q->size; - } - rwlock_wrunlock(q->rwlock); - return (t); - } - current = (current + 1) % size; - } - } - - rwlock_wrunlock(q->rwlock); - return (NULL); -} /*}}}*/ - -void INTERNAL qthread_steal_enable() { /*{{{*/ - qt_threadqueue_t *q; - size_t i; - size_t numSheps = qthread_num_shepherds(); - - for (i = 0; i < numSheps; i++) { - q = qlib->threadqueues[i]; - q->steal_disable = 0; - } -} /*}}}*/ - -void INTERNAL qthread_steal_disable() { /*{{{*/ - qt_threadqueue_t *q; - size_t i; - size_t numSheps = qthread_num_shepherds(); - - for (i = 0; i < numSheps; i++) { - q = qlib->threadqueues[i]; - q->steal_disable = 1; - } -} /*}}}*/ - -#if 0 // begin test code, because this function - // can't go in the test suite as it calls internal functions - -#include - -int qt_threadqueue_test() -{ - printf("Initializing test\n"); - qthread_initialize(); - qt_threadqueue_t *threadqueue = qt_threadqueue_new(&(qlib->shepherds[0])); - - assert(threadqueue->empty == 1); - if(threadqueue->empty != 1) { - fprintf(stderr, "Threadqueue was initialized with empty bit set to FALSE\n"); - return -1; - } - - qthread_t *task = qthread_thread_new(NULL, NULL, 0, NULL, 0); - - printf("Enqueueing task.\n"); - qt_threadqueue_enqueue(threadqueue, task); - - assert(threadqueue->empty != 1); - - if(threadqueue->empty == 1) { - fprintf(stderr, "Threadqueue empty bit set after enqueue operation\n"); - return -1; - } - - printf("Dequeueing task\n"); - qthread_t *result = qt_scheduler_get_thread(threadqueue, 1); - - assert(result == task); - - if(result != task) { - fprintf(stderr, "Task enqueued to stack is not identical to task dequeued from stack\n"); - return -1; - } - - uint32_t size = threadqueue->size; - printf("Queue size is %d\n", size); - printf("Enqueueing the same task %d times\n", size); - - for(int i = 0; i < size; i++) { - qt_threadqueue_enqueue(threadqueue, task); - } - - uint32_t newsize = threadqueue->size; - printf("Queue size is %d\n", newsize); - - assert(newsize == 2 * size); - if (newsize != 2 * size) { - fprintf(stderr, "New size is not equal to twice the old size\n"); - return -1; - } - - return 0; -} - -// end test code -#endif /* if 0 */ - -qthread_shepherd_id_t INTERNAL -qt_threadqueue_choose_dest(qthread_shepherd_t *curr_shep) { - if (curr_shep) { - return curr_shep->shepherd_id; - } else { - return (qthread_shepherd_id_t)0; - } -} - -size_t INTERNAL qt_threadqueue_policy(const enum threadqueue_policy policy) { - switch (policy) { - default: return THREADQUEUE_POLICY_UNSUPPORTED; - } -} - -/* vim:set expandtab: */ diff --git a/src/threadqueues/sherwood_threadqueues.c b/src/threadqueues/sherwood_threadqueues.c index 642937475..86483252e 100644 --- a/src/threadqueues/sherwood_threadqueues.c +++ b/src/threadqueues/sherwood_threadqueues.c @@ -198,7 +198,7 @@ qt_threadqueue_pools_t generic_threadqueue_pools; (qt_threadqueue_t *)qt_mpool_alloc(generic_threadqueue_pools.queues) #define FREE_THREADQUEUE(t) qt_mpool_free(generic_threadqueue_pools.queues, t) -static QINLINE qt_threadqueue_node_t *ALLOC_TQNODE(void) { /*{{{ */ +static inline qt_threadqueue_node_t *ALLOC_TQNODE(void) { /*{{{ */ return (qt_threadqueue_node_t *)qt_mpool_alloc( generic_threadqueue_pools.nodes); } /*}}} */ @@ -244,7 +244,7 @@ ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/ /* functions to manage the thread queues */ /*****************************************/ -static QINLINE qt_threadqueue_node_t * +static inline qt_threadqueue_node_t * qthread_steal(qthread_shepherd_t *thief_shepherd); qt_threadqueue_t INTERNAL *qt_threadqueue_new(void) { /*{{{*/ @@ -306,7 +306,7 @@ void INTERNAL qt_threadqueue_free(qt_threadqueue_t *q) { /*{{{*/ FREE_THREADQUEUE(q); } /*}}}*/ -static QINLINE int qt_threadqueue_isstealable(qthread_t *t) { /*{{{*/ +static inline int qt_threadqueue_isstealable(qthread_t *t) { /*{{{*/ return ((atomic_load_explicit(&t->flags, memory_order_relaxed) & QTHREAD_UNSTEALABLE) == 0) ? 1 @@ -1148,7 +1148,7 @@ qt_threadqueue_dequeue_steal(qt_threadqueue_t *h, /* Steal work from another shepherd's queue * Returns the work stolen */ -static QINLINE qt_threadqueue_node_t * +static inline qt_threadqueue_node_t * qthread_steal(qthread_shepherd_t *thief_shepherd) { /*{{{*/ qt_threadqueue_node_t *stolen = NULL; diff --git a/test/basics/Makefile.am b/test/basics/Makefile.am index adc017daa..8035ce924 100644 --- a/test/basics/Makefile.am +++ b/test/basics/Makefile.am @@ -51,10 +51,6 @@ check_PROGRAMS = $(TESTS) TESTS_ENVIRONMENT = -if HAVE_PROG_TIMELIMIT -TESTS_ENVIRONMENT += @timelimit_path@ -T 1 -endif - if WANT_SINGLE_WORKER_SCHEDULER TESTS_ENVIRONMENT += env QT_NUM_SHEPHERDS=2 QT_NUM_WORKERS_PER_SHEPHERD=1 endif diff --git a/test/benchmarks/pmea09/time_qpool.c b/test/benchmarks/pmea09/time_qpool.c index 29912e797..86b5c6029 100644 --- a/test/benchmarks/pmea09/time_qpool.c +++ b/test/benchmarks/pmea09/time_qpool.c @@ -9,9 +9,6 @@ #include #include #include -#ifdef QTHREAD_HAVE_LIBNUMA -#include -#endif #include "argparsing.h" #define ELEMENT_COUNT 10000 @@ -140,11 +137,7 @@ int main(int argc, char *argv[]) { numa_size = iterations * 48 / numshep; iprintf("numa_size = %i\n", (int)numa_size); for (i = 0; i < numshep; i++) { -#ifdef QTHREAD_HAVE_LIBNUMA - numa_allocs[i] = numa_alloc_onnode(numa_size, i); -#else numa_allocs[i] = malloc(numa_size); -#endif pthread_mutex_init(ptr_lock + i, NULL); } memcpy(numa_pools, numa_allocs, sizeof(void *) * numshep); @@ -171,11 +164,7 @@ int main(int argc, char *argv[]) { iterations, qtimer_secs(timer)); for (i = 0; i < numshep; i++) { -#ifdef QTHREAD_HAVE_LIBNUMA - numa_free(numa_pools[i], numa_size); -#else free(numa_pools[i]); -#endif } free(numa_pools); free(numa_allocs); diff --git a/test/features/Makefile.am b/test/features/Makefile.am index 70ad31824..eb5eedf41 100644 --- a/test/features/Makefile.am +++ b/test/features/Makefile.am @@ -46,10 +46,6 @@ check_PROGRAMS = $(TESTS) TESTS_ENVIRONMENT = -if HAVE_PROG_TIMELIMIT -TESTS_ENVIRONMENT += @timelimit_path@ -T 2 -endif - if WANT_SINGLE_WORKER_SCHEDULER TESTS_ENVIRONMENT += env QT_NUM_SHEPHERDS=2 QT_NUM_WORKERS_PER_SHEPHERD=1 endif diff --git a/test/stress/Makefile.am b/test/stress/Makefile.am index c68d7d499..712673da1 100644 --- a/test/stress/Makefile.am +++ b/test/stress/Makefile.am @@ -25,10 +25,6 @@ check_PROGRAMS = $(TESTS) TESTS_ENVIRONMENT = -if HAVE_PROG_TIMELIMIT -TESTS_ENVIRONMENT += @timelimit_path@ -T 30 -endif - if WANT_SINGLE_WORKER_SCHEDULER TESTS_ENVIRONMENT += env QT_NUM_SHEPHERDS=2 QT_NUM_WORKERS_PER_SHEPHERD=1 endif