From 0198c922726f87e08048b43e54463427c3a044fc Mon Sep 17 00:00:00 2001
From: Ian Henriksen <iandhenriksen@gmail.com>
Date: Wed, 18 Sep 2024 12:03:13 -0600
Subject: [PATCH 01/11] Remove old unused/untested threadqueues.

---
 configure.ac                               |  18 +-
 src/Makefile.am                            |   4 -
 src/threadqueues/lifo_threadqueues.c       | 349 --------
 src/threadqueues/mtsfifo_threadqueues.c    | 484 -----------
 src/threadqueues/mutexfifo_threadqueues.c  | 317 -------
 src/threadqueues/nottingham_threadqueues.c | 963 ---------------------
 6 files changed, 4 insertions(+), 2131 deletions(-)
 delete mode 100644 src/threadqueues/lifo_threadqueues.c
 delete mode 100644 src/threadqueues/mtsfifo_threadqueues.c
 delete mode 100644 src/threadqueues/mutexfifo_threadqueues.c
 delete mode 100644 src/threadqueues/nottingham_threadqueues.c

diff --git a/configure.ac b/configure.ac
index c875aadb..bfe0e6f7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -335,10 +335,9 @@ AC_ARG_ENABLE([third-party-benchmarks],
 AC_ARG_WITH([scheduler],
             [AS_HELP_STRING([--with-scheduler=[[type]]],
                             [Specify the scheduler. Options when using
-                             single-threaded shepherds are: nemesis (default),
-                             lifo, mdlifo, mutexfifo, and mtsfifo. Options 
-                             when using multi-threaded shepherds are: sherwood 
-                             (default), distrib and nottingham. Details on 
+                             single-threaded shepherds are: nemesis (default).
+                             Options when using multi-threaded shepherds are:
+                             sherwood (default), and distrib. Details on 
                              these options are in the SCHEDULING file.])])
 
 AC_ARG_WITH([sinc],
@@ -670,18 +669,9 @@ AS_IF([test "x$with_scheduler" != "x"],
          default)
            [with_scheduler="sherwood"]
            ;;
-         sherwood|nemesis|lifo|mutexfifo|mtsfifo|distrib)
+         sherwood|nemesis|distrib)
            # all valid options that require no additional configuration
            ;;
-         mdlifo)
-           [with_scheduler=lifo]
-           [using_mdlifo=yes]
-           AC_DEFINE([QTHREAD_LIFO_MULTI_DEQUEUER], [1], [Enable multiple-dequeuer support for lifo scheduler])
-           ;;
-         nottingham)
-           AS_IF([test "x$qthread_cv_atomic_CAS128" != "xyes"],
-                 [AC_MSG_ERROR([The $with_scheduler scheduler requires a 128-bit CAS.])])
-           ;;
          *)
            AC_MSG_ERROR([Unknown scheduler option])
            ;;
diff --git a/src/Makefile.am b/src/Makefile.am
index 86813209..389e5556 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -87,12 +87,8 @@ endif
 
 EXTRA_DIST += \
 			 threadqueues/distrib_threadqueues.c \
-			 threadqueues/lifo_threadqueues.c \
 			 threadqueues/nemesis_threadqueues.c \
-			 threadqueues/mutexfifo_threadqueues.c \
-			 threadqueues/mtsfifo_threadqueues.c \
 			 threadqueues/sherwood_threadqueues.c \
-			 threadqueues/nottingham_threadqueues.c \
 			 sincs/donecount.c \
 			 sincs/donecount_cas.c \
 			 sincs/original.c \
diff --git a/src/threadqueues/lifo_threadqueues.c b/src/threadqueues/lifo_threadqueues.c
deleted file mode 100644
index b23e7f59..00000000
--- a/src/threadqueues/lifo_threadqueues.c
+++ /dev/null
@@ -1,349 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-/* System Headers */
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-
-/* Internal Headers */
-#include "qt_asserts.h"
-#include "qt_atomics.h"
-#include "qt_debug.h"
-#include "qt_macros.h"
-#include "qt_prefetch.h"
-#include "qt_qthread_struct.h"
-#include "qt_threadqueues.h"
-#include "qt_visibility.h"
-#include "qthread/qthread.h"
-#include "qthread_innards.h" /* for qlib */
-#ifdef QTHREAD_USE_EUREKAS
-#include "qt_eurekas.h"
-#endif /* QTHREAD_USE_EUREKAS */
-#include "qt_subsystems.h"
-
-/* Note: this queue is SAFE to use with multiple de-queuers, with the caveat
- * that if you have multiple dequeuer's, you'll need to solve the ABA problem.
- * (single dequeuer == no ABA problem) Also, yielding changes behavior a bit in
- * the multiple-dequeuer case.
- */
-
-/* Data Structures */
-struct _qt_threadqueue_node {
-  struct _qt_threadqueue_node *next;
-  qthread_t *thread;
-};
-
-struct _qt_threadqueue {
-  qt_threadqueue_node_t *stack;
-  /* the following is for estimating a queue's "busy" level, and is not
-   * guaranteed accurate (that would be a race condition) */
-  saligned_t advisory_queuelen;
-#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE
-  uint32_t frustration;
-  QTHREAD_COND_DECL(trigger)
-#endif
-} /* qt_threadqueue_t */;
-
-/* Memory Management */
-#if defined(UNPOOLED_QUEUES) || defined(UNPOOLED)
-#define ALLOC_THREADQUEUE() (qt_threadqueue_t *)MALLOC(sizeof(qt_threadqueue_t))
-#define FREE_THREADQUEUE(t) FREE(t, sizeof(qt_threadqueue_t))
-#define ALLOC_TQNODE()                                                         \
-  (qt_threadqueue_node_t *)MALLOC(sizeof(qt_threadqueue_node_t))
-#define FREE_TQNODE(t) FREE(t, sizeof(qt_threadqueue_node_t))
-
-void INTERNAL qt_threadqueue_subsystem_init(void) {}
-#else /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */
-qt_threadqueue_pools_t generic_threadqueue_pools = {NULL, NULL};
-#define ALLOC_THREADQUEUE()                                                    \
-  (qt_threadqueue_t *)qt_mpool_alloc(generic_threadqueue_pools.queues)
-#define FREE_THREADQUEUE(t) qt_mpool_free(generic_threadqueue_pools.queues, t)
-#define ALLOC_TQNODE()                                                         \
-  (qt_threadqueue_node_t *)qt_mpool_alloc(generic_threadqueue_pools.nodes)
-#define FREE_TQNODE(t) qt_mpool_free(generic_threadqueue_pools.nodes, t)
-
-static void qt_threadqueue_subsystem_shutdown(void) {
-  qt_mpool_destroy(generic_threadqueue_pools.queues);
-  qt_mpool_destroy(generic_threadqueue_pools.nodes);
-}
-
-void INTERNAL qt_threadqueue_subsystem_init(void) {
-  generic_threadqueue_pools.queues = qt_mpool_create(sizeof(qt_threadqueue_t));
-  generic_threadqueue_pools.nodes =
-    qt_mpool_create_aligned(sizeof(qt_threadqueue_node_t), sizeof(void *));
-  qthread_internal_cleanup(qt_threadqueue_subsystem_shutdown);
-}
-#endif /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */
-
-/* Thankfully, a basic LIFO stack does not suffer from the ABA problem. */
-
-qt_threadqueue_t INTERNAL *qt_threadqueue_new(void) { /*{{{*/
-  qt_threadqueue_t *q = ALLOC_THREADQUEUE();
-
-  qassert_ret(q != NULL, NULL);
-
-  q->stack = NULL;
-  q->advisory_queuelen = 0;
-#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE
-  q->frustration = 0;
-  QTHREAD_COND_INIT(q->trigger);
-#endif /* ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE */
-
-  return q;
-} /*}}}*/
-
-static qthread_t *qt_threadqueue_dequeue(qt_threadqueue_t *q) { /*{{{*/
-  qt_threadqueue_node_t *retval = q->stack;
-
-  if (retval != NULL) {
-    qt_threadqueue_node_t *old, *new;
-
-#ifdef QTHREAD_LIFO_MULTI_DEQUEUER
-# error This dequeue function is not safe! retval may be freed before we dereference it to find the next ptr. Need to use hazardptrs.
-#endif
-    do {
-      old = retval;
-      new = retval->next;
-      retval = qthread_cas_ptr(&q->stack, old, new);
-    } while (retval != old && retval != NULL);
-  }
-  if (retval != NULL) {
-    qthread_t *t = retval->thread;
-    FREE_TQNODE(retval);
-    (void)qthread_incr(&(q->advisory_queuelen), -1);
-    return t;
-  } else {
-    return NULL;
-  }
-} /*}}}*/
-
-void INTERNAL qt_threadqueue_free(qt_threadqueue_t *q) { /*{{{*/
-  assert(q);
-  while (qt_threadqueue_dequeue(q));
-#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE
-  QTHREAD_COND_DESTROY(q->trigger);
-#endif
-  FREE_THREADQUEUE(q);
-} /*}}}*/
-
-#ifdef QTHREAD_USE_SPAWNCACHE
-qthread_t INTERNAL *
-qt_threadqueue_private_dequeue(qt_threadqueue_private_t *c) { /*{{{*/
-  return NULL;
-} /*}}}*/
-
-int INTERNAL
-qt_threadqueue_private_enqueue(qt_threadqueue_private_t *restrict pq,
-                               qt_threadqueue_t *restrict q,
-                               qthread_t *restrict t) { /*{{{*/
-  return 0;
-} /*}}}*/
-
-int INTERNAL qt_threadqueue_private_enqueue_yielded(
-  qt_threadqueue_private_t *restrict q, qthread_t *restrict t) { /*{{{*/
-  return 0;
-} /*}}}*/
-
-void INTERNAL qt_threadqueue_enqueue_cache(qt_threadqueue_t *q,
-                                           qt_threadqueue_private_t *cache) {}
-
-void INTERNAL qt_threadqueue_private_filter(
-  qt_threadqueue_private_t *restrict c, qt_threadqueue_filter_f f) {}
-#endif /* ifdef QTHREAD_USE_SPAWNCACHE */
-
-void INTERNAL qt_threadqueue_enqueue(qt_threadqueue_t *restrict q,
-                                     qthread_t *restrict t) { /*{{{*/
-  qt_threadqueue_node_t *old, *new;
-  qt_threadqueue_node_t *node;
-
-  assert(q);
-  assert(t);
-
-  qthread_debug(THREADQUEUE_CALLS, "q(%p), t(%p->%u)\n", q, t, t->thread_id);
-
-  node = ALLOC_TQNODE();
-  assert(node != NULL);
-  node->thread = t;
-  node->next = NULL;
-
-  old = q->stack; /* should be an atomic read */
-  do {
-    node->next = old;
-    new = qthread_cas_ptr(&(q->stack), old, node);
-    if (new != old) {
-      old = new;
-    } else {
-      break;
-    }
-  } while (1);
-  (void)qthread_incr(&(q->advisory_queuelen), 1);
-
-  /* awake waiter */
-#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE
-  if (q->frustration) {
-    QTHREAD_COND_LOCK(q->trigger);
-    if (q->frustration) {
-      q->frustration = 0;
-      QTHREAD_COND_SIGNAL(q->trigger);
-    }
-    QTHREAD_COND_UNLOCK(q->trigger);
-  }
-#endif
-} /*}}}*/
-
-void INTERNAL qt_threadqueue_enqueue_yielded(qt_threadqueue_t *restrict q,
-                                             qthread_t *restrict t) { /*{{{*/
-  assert(q);
-  assert(t);
-
-#ifdef QTHREAD_LIFO_MULTI_DEQUEUER
-  qthread_t *top = qt_threadqueue_dequeue(q);
-  qt_threadqueue_enqueue(q, t);
-  if (top) { qt_threadqueue_enqueue(q, top); }
-#else
-  /* THIS is not safe for multiple dequeuers */
-  qt_threadqueue_node_t *cursor = q->stack;
-  if (cursor) {
-    qt_threadqueue_node_t *node;
-    while (cursor->next) { cursor = cursor->next; }
-    assert(cursor->next == NULL);
-    /* alloc the node */
-    node = ALLOC_TQNODE();
-    assert(node != NULL);
-    node->thread = t;
-    node->next = NULL;
-    /* append the node */
-    cursor->next = node;
-    (void)qthread_incr(&(q->advisory_queuelen), 1);
-  } else {
-    qt_threadqueue_enqueue(q, t);
-  }
-#endif /* ifdef QTHREAD_LIFO_MULTI_DEQUEUER */
-} /*}}}*/
-
-ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/
-  assert(q);
-  return q->advisory_queuelen;
-} /*}}}*/
-
-qthread_t INTERNAL *
-qt_scheduler_get_thread(qt_threadqueue_t *q,
-                        qt_threadqueue_private_t *Q_UNUSED(qc),
-                        uint_fast8_t Q_UNUSED(active)) { /*{{{*/
-#ifdef QTHREAD_USE_EUREKAS
-  qt_eureka_disable();
-#endif /* QTHREAD_USE_EUREKAS */
-  qthread_t *retval = qt_threadqueue_dequeue(q);
-
-  qthread_debug(THREADQUEUE_CALLS, "q(%p)\n", q);
-  if (retval == NULL) {
-#ifdef QTHREAD_USE_EUREKAS
-    qt_eureka_check(0);
-#endif /* QTHREAD_USE_EUREKAS */
-    while (q->stack == NULL) {
-#ifndef QTHREAD_CONDWAIT_BLOCKING_QUEUE
-      SPINLOCK_BODY();
-#else
-      COMPILER_FENCE;
-      if (qthread_incr(&q->frustration, 1) > 1000) {
-        QTHREAD_COND_LOCK(q->trigger);
-        if (q->frustration > 1000) { QTHREAD_COND_WAIT(q->trigger); }
-        QTHREAD_COND_UNLOCK(q->trigger);
-      }
-#endif /* ifdef USE_HARD_POLLING */
-    }
-#ifdef QTHREAD_USE_EUREKAS
-    qt_eureka_disable();
-#endif /* QTHREAD_USE_EUREKAS */
-    retval = qt_threadqueue_dequeue(q);
-  }
-  assert(retval);
-  qthread_debug(THREADQUEUE_BEHAVIOR,
-                "found thread %u (%p); q(%p)\n",
-                retval->thread_id,
-                retval,
-                q);
-  return retval;
-} /*}}}*/
-
-/* walk queue removing all tasks matching this description */
-void INTERNAL qt_threadqueue_filter(qt_threadqueue_t *q,
-                                    qt_threadqueue_filter_f f) { /*{{{*/
-  qt_threadqueue_node_t *curs, **ptr;
-
-  assert(q != NULL);
-
-  curs = q->stack;
-  ptr = &q->stack;
-  while (curs) {
-    qthread_t *t = curs->thread;
-    switch (f(t)) {
-      case IGNORE_AND_CONTINUE: // ignore, move on
-        ptr = &curs->next;
-        curs = curs->next;
-        break;
-      case IGNORE_AND_STOP: // ignore, stop looking
-        return;
-
-      case REMOVE_AND_CONTINUE: // remove, move on
-      {
-        qt_threadqueue_node_t *freeme = curs;
-
-#ifdef QTHREAD_USE_EUREKAS
-        qthread_internal_assassinate(t);
-#endif /* QTHREAD_USE_EUREKAS */
-        *ptr = curs->next;
-        curs = curs->next;
-        FREE_TQNODE(freeme);
-        break;
-      }
-      case REMOVE_AND_STOP: // remove, stop looking;
-#ifdef QTHREAD_USE_EUREKAS
-        qthread_internal_assassinate(t);
-#endif /* QTHREAD_USE_EUREKAS */
-        *ptr = curs->next;
-        FREE_TQNODE(curs);
-        return;
-    }
-  }
-} /*}}}*/
-
-/* some place-holder functions */
-void INTERNAL qthread_steal_stat(void) {}
-
-void INTERNAL qthread_steal_enable(void) {}
-
-void INTERNAL qthread_steal_disable(void) {}
-
-void INTERNAL qthread_cas_steal_stat(void) {}
-
-qthread_shepherd_id_t INTERNAL
-qt_threadqueue_choose_dest(qthread_shepherd_t *curr_shep) {
-  qthread_shepherd_id_t dest_shep_id = 0;
-
-  if (curr_shep) {
-    dest_shep_id = curr_shep->sched_shepherd++;
-    curr_shep->sched_shepherd *= (qlib->nshepherds > (dest_shep_id + 1));
-  } else {
-    dest_shep_id = (qthread_shepherd_id_t)qthread_internal_incr_mod(
-      &qlib->sched_shepherd, qlib->nshepherds, &qlib->sched_shepherd_lock);
-  }
-
-  return dest_shep_id;
-}
-
-qthread_t INTERNAL *qt_threadqueue_dequeue_specific(qt_threadqueue_t *q,
-                                                    void *value) {
-  return NULL;
-}
-
-size_t INTERNAL qt_threadqueue_policy(const enum threadqueue_policy policy) {
-  switch (policy) {
-    case SINGLE_WORKER: return THREADQUEUE_POLICY_TRUE;
-    default: return THREADQUEUE_POLICY_UNSUPPORTED;
-  }
-}
-
-/* vim:set expandtab: */
diff --git a/src/threadqueues/mtsfifo_threadqueues.c b/src/threadqueues/mtsfifo_threadqueues.c
deleted file mode 100644
index 7ed8f93f..00000000
--- a/src/threadqueues/mtsfifo_threadqueues.c
+++ /dev/null
@@ -1,484 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-/* System Headers */
-#include <stdlib.h>
-
-/* Internal Headers */
-#include "qt_asserts.h"
-#include "qt_atomics.h"
-#include "qt_debug.h"
-#include "qt_macros.h"
-#include "qt_prefetch.h"
-#include "qt_qthread_struct.h"
-#include "qt_shepherd_innards.h"
-#include "qt_threadqueues.h"
-#include "qt_visibility.h"
-#include "qthread/qthread.h"
-#include "qthread_innards.h" /* for qlib */
-#if defined(UNPOOLED_QUEUES) || defined(UNPOOLED)
-#include "qt_alloc.h"
-#endif
-#ifdef QTHREAD_USE_EUREKAS
-#include "qt_eurekas.h"
-#endif /* QTHREAD_USE_EUREKAS */
-#include "qt_subsystems.h"
-
-/* Data Structures */
-struct _qt_threadqueue_node {
-  struct _qt_threadqueue_node *next;
-  qthread_t *value;
-} /* qt_threadqueue_node_t */;
-
-struct _qt_threadqueue {
-  qt_threadqueue_node_t *head;
-  qt_threadqueue_node_t *tail;
-#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE
-  aligned_t fruitless;
-  QTHREAD_COND_DECL(trigger);
-#endif /* CONDWAIT */
-  /* the following is for estimating a queue's "busy" level, and is not
-   * guaranteed accurate (that would be a race condition) */
-  saligned_t advisory_queuelen;
-} /* qt_threadqueue_t */;
-
-/* Memory Management */
-#if defined(UNPOOLED_QUEUES) || defined(UNPOOLED)
-#define ALLOC_THREADQUEUE() (qt_threadqueue_t *)MALLOC(sizeof(qt_threadqueue_t))
-#define FREE_THREADQUEUE(t) FREE(t, sizeof(qt_threadqueue_t))
-
-static QINLINE void ALLOC_TQNODE(qt_threadqueue_node_t **ret) { /*{{{ */
-  *ret = (qt_threadqueue_node_t *)qt_internal_aligned_alloc(
-    sizeof(qt_threadqueue_node_t), 16);
-  if (*ret != NULL) { memset(*ret, 0, sizeof(qt_threadqueue_node_t)); }
-} /*}}} */
-
-static void FREE_TQNODE(void *p) {
-  FREE_SCRIBBLE(p, sizeof(qt_threadqueue_node_t));
-  qt_internal_aligned_free(p, 16);
-}
-
-void INTERNAL qt_threadqueue_subsystem_init(void) {}
-#else /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */
-qt_threadqueue_pools_t generic_threadqueue_pools;
-#define ALLOC_THREADQUEUE()                                                    \
-  (qt_threadqueue_t *)qt_mpool_alloc(generic_threadqueue_pools.queues)
-#define FREE_THREADQUEUE(t) qt_mpool_free(generic_threadqueue_pools.queues, t)
-
-static QINLINE void ALLOC_TQNODE(qt_threadqueue_node_t **ret) { /*{{{ */
-  *ret =
-    (qt_threadqueue_node_t *)qt_mpool_alloc(generic_threadqueue_pools.nodes);
-  if (*ret != NULL) { memset(*ret, 0, sizeof(qt_threadqueue_node_t)); }
-} /*}}} */
-
-static void FREE_TQNODE(void *p) {
-  qt_mpool_free(generic_threadqueue_pools.nodes, p);
-}
-
-static void qt_threadqueue_subsystem_shutdown(void) {
-  qt_mpool_destroy(generic_threadqueue_pools.nodes);
-  qt_mpool_destroy(generic_threadqueue_pools.queues);
-}
-
-void INTERNAL qt_threadqueue_subsystem_init(void) {
-  generic_threadqueue_pools.nodes =
-    qt_mpool_create_aligned(sizeof(qt_threadqueue_node_t), 16);
-  generic_threadqueue_pools.queues = qt_mpool_create(sizeof(qt_threadqueue_t));
-  qthread_internal_cleanup(qt_threadqueue_subsystem_shutdown);
-}
-
-#endif /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */
-
-ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/
-  return qthread_internal_atomic_read_s(&q->advisory_queuelen,
-                                        &q->advisory_queuelen_m);
-} /*}}}*/
-
-/*****************************************/
-/* functions to manage the thread queues */
-/*****************************************/
-
-// This lock-free algorithm borrowed from
-// http://www.research.ibm.com/people/m/michael/podc-1996.pdf
-// ... and modified to use hazard ptrs according to
-// http://www.research.ibm.com/people/m/michael/ieeetpds-2004.pdf
-
-qt_threadqueue_t INTERNAL *qt_threadqueue_new(void) { /*{{{ */
-  qt_threadqueue_t *q = ALLOC_THREADQUEUE();
-
-  if (q != NULL) {
-#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE
-    q->fruitless = 0;
-    QTHREAD_COND_INIT(q->trigger);
-#endif /* ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE */
-    ALLOC_TQNODE(((qt_threadqueue_node_t **)&(q->head)));
-    assert(q->head != NULL);
-    if (q->head == NULL) { // if we're not using asserts, fail nicely
-#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE
-      QTHREAD_COND_DESTROY(q->trigger);
-#endif
-      FREE_THREADQUEUE(q);
-      q = NULL;
-    }
-    q->tail = q->head;
-    q->tail->next = NULL;
-  }
-  return q;
-} /*}}} */
-
-static qthread_t *qt_threadqueue_dequeue(qt_threadqueue_t *q) { /*{{{ */
-  qthread_t *p = NULL;
-
-  qt_threadqueue_node_t *head;
-  qt_threadqueue_node_t *tail;
-  qt_threadqueue_node_t *next_ptr;
-
-  assert(q != NULL);
-  while (1) {
-    head = q->head;
-
-    hazardous_ptr(0, head);
-    if (head != q->head) {
-      continue; // are head, tail, and next consistent?
-    }
-
-    tail = q->tail;
-    next_ptr = head->next;
-
-    hazardous_ptr(1, next_ptr);
-
-    if (next_ptr == NULL) {
-      return NULL; // queue is empty
-    }
-    if (head == tail) { // tail is falling behind!
-      (void)qt_cas((void **)&(q->tail),
-                   (void *)tail,
-                   next_ptr); // advance tail ptr
-      continue;
-    }
-    // read value before CAS, otherwise another dequeue might free the next node
-    p = next_ptr->value;
-    if (qt_cas((void **)&(q->head), (void *)head, next_ptr) == head) {
-      break; // success!
-    }
-  }
-  hazardous_release_node(FREE_TQNODE, head);
-  if (p != NULL) {
-    Q_PREFETCH(p);
-    (void)qthread_internal_incr_s(
-      &q->advisory_queuelen, &q->advisory_queuelen_m, -1);
-  }
-  return p;
-} /*}}} */
-
-void INTERNAL qt_threadqueue_free(qt_threadqueue_t *q) { /*{{{ */
-  while (q->head != q->tail) { qt_threadqueue_dequeue(q); }
-  assert(q->head == q->tail);
-#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE
-  QTHREAD_COND_DESTROY(q->trigger);
-#endif
-  FREE_TQNODE((qt_threadqueue_node_t *)q->head);
-  FREE_THREADQUEUE(q);
-} /*}}} */
-
-#ifdef QTHREAD_USE_SPAWNCACHE
-qthread_t INTERNAL *
-qt_threadqueue_private_dequeue(qt_threadqueue_private_t *c) {
-  return NULL;
-}
-
-int INTERNAL
-qt_threadqueue_private_enqueue(qt_threadqueue_private_t *restrict pq,
-                               qt_threadqueue_t *restrict q,
-                               qthread_t *restrict t) {
-  return 0;
-}
-
-int INTERNAL qt_threadqueue_private_enqueue_yielded(
-  qt_threadqueue_private_t *restrict q, qthread_t *restrict t) {
-  return 0;
-}
-
-void INTERNAL qt_threadqueue_enqueue_cache(qt_threadqueue_t *q,
-                                           qt_threadqueue_private_t *cache) {}
-
-void INTERNAL qt_threadqueue_private_filter(
-  qt_threadqueue_private_t *restrict c, qt_threadqueue_filter_f f) {}
-#endif /* ifdef QTHREAD_USE_SPAWNCACHE */
-
-void INTERNAL qt_threadqueue_enqueue(qt_threadqueue_t *restrict q,
-                                     qthread_t *restrict t) { /*{{{ */
-  qt_threadqueue_node_t *tail;
-  qt_threadqueue_node_t *next;
-  qt_threadqueue_node_t *node;
-
-  assert(t != NULL);
-  assert(q != NULL);
-  qthread_debug(THREADQUEUE_CALLS,
-                "q(%p), t(%p:%i): began head:%p tail:%p\n",
-                q,
-                t,
-                t->thread_id,
-                q->head,
-                q->tail);
-
-  ALLOC_TQNODE(&node);
-  assert(node != NULL);
-
-  node->value = t;
-  node->next = NULL;
-
-  while (1) {
-    qthread_debug(THREADQUEUE_DETAILS,
-                  "q(%p), t(%p:%i): reading q->tail\n",
-                  q,
-                  t,
-                  t->thread_id);
-    tail = q->tail;
-
-    hazardous_ptr(0, tail);
-    if (tail != q->tail) {
-      continue; // are tail and next consistent?
-    }
-
-    next = tail->next;
-    if (next != NULL) { // tail not pointing to last node
-      (void)qt_cas((void **)&(q->tail),
-                   (void *)tail,
-                   next); // ABA hazard (mitigated by QCOMPOSE)
-      continue;
-    }
-    // tail must be pointing to the last node
-    if (qt_cas((void **)&(tail->next), (void *)next, node) == next) {
-      break; // success!
-    }
-  }
-  (void)qt_cas((void **)&(q->tail), (void *)tail, node);
-  qthread_debug(THREADQUEUE_DETAILS,
-                "q(%p), t(%p:%i): appended head:%p nextptr:%p tail:%p\n",
-                q,
-                t,
-                t->thread_id,
-                q->head,
-                q->head ? q->head->next : NULL,
-                q->tail);
-
-  (void)qthread_incr(&q->advisory_queuelen, 1);
-#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE
-  if (q->fruitless) {
-    QTHREAD_COND_LOCK(q->trigger);
-    if (q->fruitless) {
-      q->fruitless = 0;
-      QTHREAD_BCAST(q->trigger);
-    }
-    QTHREAD_COND_UNLOCK(q->trigger);
-  }
-#endif
-  hazardous_ptr(0,
-                NULL); // release the ptr (avoid hazardptr resource exhaustion)
-} /*}}} */
-
-void qt_threadqueue_enqueue_yielded(qt_threadqueue_t *restrict q,
-                                    qthread_t *restrict t) { /*{{{*/
-  qt_threadqueue_enqueue(q, t);
-} /*}}}*/
-
-qthread_t INTERNAL *
-qt_scheduler_get_thread(qt_threadqueue_t *q,
-                        qt_threadqueue_private_t *Q_UNUSED(qc),
-                        uint_fast8_t Q_UNUSED(active)) { /*{{{ */
-  qthread_t *p = NULL;
-
-  qt_threadqueue_node_t *head;
-  qt_threadqueue_node_t *tail;
-  qt_threadqueue_node_t *next_ptr;
-
-  assert(q != NULL);
-  qthread_debug(THREADQUEUE_CALLS, "q(%p): began\n", q);
-#ifdef QTHREAD_USE_EUREKAS
-  qt_eureka_disable();
-#endif /* QTHREAD_USE_EUREKAS */
-  qthread_debug(THREADQUEUE_DETAILS,
-                "q(%p): head=%p next_ptr=%p tail=%p\n",
-                q,
-                q->head,
-                q->head ? q->head->next : NULL,
-                q->tail);
-  while (1) {
-    head = q->head;
-
-    hazardous_ptr(0, head);
-    if (head != q->head) {
-      continue; // are head, tail, and next consistent?
-    }
-
-    tail = q->tail;
-    next_ptr = head->next;
-
-    hazardous_ptr(1, next_ptr);
-
-    if (next_ptr == NULL) { // queue is empty
-#ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE
-      if (qthread_internal_incr(&q->fruitless, &q->fruitless_m, 1) > 1000) {
-#ifdef QTHREAD_USE_EUREKAS
-        qt_eureka_check(0);
-#endif /* QTHREAD_USE_EUREKAS */
-        QTHREAD_COND_LOCK(q->trigger);
-        while (q->fruitless > 1000) { QTHREAD_COND_WAIT(q->trigger); }
-        QTHREAD_COND_UNLOCK(q->trigger);
-#ifdef QTHREAD_USE_EUREKAS
-        qt_eureka_disable();
-#endif /* QTHREAD_USE_EUREKAS */
-      } else {
-#ifdef QTHREAD_USE_EUREKAS
-        qt_eureka_check(0);
-#endif /* QTHREAD_USE_EUREKAS */
-#ifdef HAVE_PTHREAD_YIELD
-        pthread_yield();
-#elif HAVE_SHED_YIELD
-        sched_yield();
-#endif
-#ifdef QTHREAD_USE_EUREKAS
-        qt_eureka_disable();
-#endif /* QTHREAD_USE_EUREKAS */
-      }
-#else /* ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE */
-#ifdef QTHREAD_USE_EUREKAS
-      qt_eureka_check(1);
-#endif /* QTHREAD_USE_EUREKAS */
-      SPINLOCK_BODY();
-#endif /* ifdef QTHREAD_CONDWAIT_BLOCKING_QUEUE */
-      continue;
-    }
-    qthread_debug(THREADQUEUE_DETAILS, "q(%p): next_ptr = %p\n", q, next_ptr);
-    if (head == tail) { // tail is falling behind
-      (void)qt_cas((void **)&(q->tail),
-                   (void *)tail,
-                   next_ptr); // advance tail ptr
-    }
-    // read value before CAS, otherwise another dequeue might free the next node
-    p = next_ptr->value;
-    if (qt_cas((void **)&(q->head), (void *)head, next_ptr) == head) {
-      break; // success!
-    }
-  }
-  qthread_debug(THREADQUEUE_DETAILS,
-                "q(%p): found a thread! p=%p:%i\n",
-                q,
-                p,
-                p->thread_id);
-  hazardous_release_node(FREE_TQNODE, head);
-  if (p != NULL) {
-    (void)qthread_internal_incr_s(
-      &q->advisory_queuelen, &q->advisory_queuelen_m, -1);
-  }
-  return p;
-} /*}}} */
-
-/* walk queue removing all tasks matching this description */
-void INTERNAL qt_threadqueue_filter(qt_threadqueue_t *q,
-                                    qt_threadqueue_filter_f f) { /*{{{*/
-  qt_threadqueue_node_t *curs, **ptr;
-
-  qthread_debug(THREADQUEUE_CALLS,
-                "q(%p), f(%p): began head:%p next:%p tail:%p\n",
-                q,
-                f,
-                q->head,
-                q->head ? q->head->next : NULL,
-                q->tail);
-
-  assert(q != NULL);
-  do {
-    curs = q->head;
-    if (curs == NULL) { return; }
-    hazardous_ptr(0, curs);
-    COMPILER_FENCE;
-  } while (curs != q->head);
-  ptr = &curs->next;
-  curs = curs->next;
-  hazardous_ptr(1, curs);
-  while (curs) {
-    qthread_t *t = curs->value;
-    switch (f(t)) {
-      case IGNORE_AND_CONTINUE: // ignore, move on
-        hazardous_ptr(0, curs);
-        ptr = &curs->next;
-        curs = curs->next;
-        hazardous_ptr(1, curs);
-        continue;
-      case IGNORE_AND_STOP: // ignore, stop looking
-        return;
-
-      case REMOVE_AND_CONTINUE: // remove, move on
-      {
-        qt_threadqueue_node_t *freeme = curs;
-
-#ifdef QTHREAD_USE_EUREKAS
-        qthread_internal_assassinate(t);
-#endif /* QTHREAD_USE_EUREKAS */
-        if (curs->next == NULL) {
-          /* this is clever: since 'next' is the first field, its
-           * address is the address of the entire structure */
-          q->tail = (qt_threadqueue_node_t *)ptr;
-        }
-        *ptr = curs->next;
-        curs = curs->next;
-        hazardous_ptr(1, curs);
-        hazardous_release_node(FREE_TQNODE, freeme);
-      }
-        continue;
-      case REMOVE_AND_STOP: // remove, stop looking
-#ifdef QTHREAD_USE_EUREKAS
-        qthread_internal_assassinate(t);
-#endif /* QTHREAD_USE_EUREKAS */
-        if (curs->next == NULL) {
-          /* this is clever: since 'next' is the first field, its
-           * address is the address of the entire structure */
-          q->tail = (qt_threadqueue_node_t *)ptr;
-        }
-        *ptr = curs->next;
-        hazardous_release_node(FREE_TQNODE, curs);
-        return;
-    }
-  }
-} /*}}}*/
-
-/* some place-holder functions */
-void INTERNAL qthread_steal_stat(void) {}
-
-void INTERNAL qthread_steal_enable(void) {}
-
-void INTERNAL qthread_steal_disable(void) {}
-
-void INTERNAL qthread_cas_steal_stat(void) {}
-
-qthread_shepherd_id_t INTERNAL
-qt_threadqueue_choose_dest(qthread_shepherd_t *curr_shep) {
-  qthread_shepherd_id_t dest_shep_id = 0;
-
-  if (curr_shep) {
-    dest_shep_id = curr_shep->sched_shepherd++;
-    curr_shep->sched_shepherd *= (qlib->nshepherds > (dest_shep_id + 1));
-  } else {
-    dest_shep_id = (qthread_shepherd_id_t)qthread_internal_incr_mod(
-      &qlib->sched_shepherd, qlib->nshepherds, &qlib->sched_shepherd_lock);
-  }
-
-  return dest_shep_id;
-}
-
-qthread_t INTERNAL *qt_threadqueue_dequeue_specific(qt_threadqueue_t *q,
-                                                    void *value) {
-  return NULL;
-}
-
-size_t INTERNAL qt_threadqueue_policy(const enum threadqueue_policy policy) {
-  switch (policy) {
-    case SINGLE_WORKER: return THREADQUEUE_POLICY_TRUE;
-    default: return THREADQUEUE_POLICY_UNSUPPORTED;
-  }
-}
-
-/* vim:set expandtab: */
diff --git a/src/threadqueues/mutexfifo_threadqueues.c b/src/threadqueues/mutexfifo_threadqueues.c
deleted file mode 100644
index 950e51bc..00000000
--- a/src/threadqueues/mutexfifo_threadqueues.c
+++ /dev/null
@@ -1,317 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-/* System Headers */
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-
-/* API Headers */
-#include "qthread/qthread.h"
-
-/* Internal Headers */
-#include "qt_asserts.h"
-#include "qt_debug.h"
-#include "qt_macros.h"
-#include "qt_prefetch.h"
-#include "qt_qthread_struct.h"
-#include "qt_shepherd_innards.h"
-#include "qt_threadqueues.h"
-#include "qt_visibility.h"
-#include "qthread_innards.h" /* for qlib */
-#ifdef QTHREAD_USE_EUREKAS
-#include "qt_eurekas.h"
-#endif /* QTHREAD_USE_EUREKAS */
-#include "qt_subsystems.h"
-
-/* Data Structures */
-struct _qt_threadqueue_node {
-  struct _qt_threadqueue_node *next;
-  qthread_t *value;
-} /* qt_threadqueue_node_t */;
-
-struct _qt_threadqueue {
-  qt_threadqueue_node_t *head;
-  qt_threadqueue_node_t *tail;
-  QTHREAD_FASTLOCK_TYPE head_lock;
-  QTHREAD_FASTLOCK_TYPE tail_lock;
-  QTHREAD_FASTLOCK_TYPE advisory_queuelen_m;
-  /* the following is for estimating a queue's "busy" level, and is not
-   * guaranteed accurate (that would be a race condition) */
-  saligned_t advisory_queuelen;
-} /* qt_threadqueue_t */;
-
-/* Memory Management */
-#if defined(UNPOOLED_QUEUES) || defined(UNPOOLED)
-#define ALLOC_THREADQUEUE() (qt_threadqueue_t *)MALLOC(sizeof(qt_threadqueue_t))
-#define FREE_THREADQUEUE(t) FREE(t, sizeof(qt_threadqueue_t))
-#define ALLOC_TQNODE()                                                         \
-  (qt_threadqueue_node_t *)MALLOC(sizeof(qt_threadqueue_node_t))
-#define FREE_TQNODE(t) FREE(t, sizeof(qt_threadqueue_node_t))
-
-void INTERNAL qt_threadqueue_subsystem_init(void) {}
-#else /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */
-qt_threadqueue_pools_t generic_threadqueue_pools;
-#define ALLOC_THREADQUEUE()                                                    \
-  (qt_threadqueue_t *)qt_mpool_alloc(generic_threadqueue_pools.queues)
-#define FREE_THREADQUEUE(t) qt_mpool_free(generic_threadqueue_pools.queues, t)
-#define ALLOC_TQNODE()                                                         \
-  (qt_threadqueue_node_t *)qt_mpool_alloc(generic_threadqueue_pools.nodes)
-#define FREE_TQNODE(t) qt_mpool_free(generic_threadqueue_pools.nodes, t)
-
-static void qt_threadqueue_subsystem_shutdown(void) { /*{{{*/
-  qt_mpool_destroy(generic_threadqueue_pools.nodes);
-  qt_mpool_destroy(generic_threadqueue_pools.queues);
-} /*}}}*/
-
-void INTERNAL qt_threadqueue_subsystem_init(void) { /*{{{*/
-  generic_threadqueue_pools.nodes =
-    qt_mpool_create(sizeof(qt_threadqueue_node_t));
-  generic_threadqueue_pools.queues = qt_mpool_create(sizeof(qt_threadqueue_t));
-  qthread_internal_cleanup(qt_threadqueue_subsystem_shutdown);
-} /*}}}*/
-#endif /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */
-
-ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/
-  return qthread_internal_atomic_read_s(&q->advisory_queuelen,
-                                        &q->advisory_queuelen_m);
-} /*}}}*/
-
-#define QTHREAD_INITLOCK(l)                                                    \
-  do {                                                                         \
-    if (pthread_mutex_init(l, NULL) != 0) { return QTHREAD_PTHREAD_ERROR; }    \
-  } while (0)
-#define QTHREAD_LOCK(l) qassert(pthread_mutex_lock(l), 0)
-#define QTHREAD_UNLOCK(l) qassert(pthread_mutex_unlock(l), 0)
-// #define QTHREAD_DESTROYLOCK(l) do { int __ret__ = pthread_mutex_destroy(l);
-// if (__ret__ != 0) fprintf(stderr, "pthread_mutex_destroy(%p) returned %i
-// (%s)\n", l, __ret__, strerror(__ret__)); assert(__ret__ == 0); } while (0)
-#define QTHREAD_DESTROYLOCK(l) qassert(pthread_mutex_destroy(l), 0)
-#define QTHREAD_DESTROYCOND(l) qassert(pthread_cond_destroy(l), 0)
-
-/*****************************************/
-/* functions to manage the thread queues */
-/*****************************************/
-
-qt_threadqueue_t INTERNAL *qt_threadqueue_new(void) { /*{{{ */
-  qt_threadqueue_t *q = ALLOC_THREADQUEUE();
-
-  if (q != NULL) {
-    QTHREAD_FASTLOCK_INIT(q->head_lock);
-    QTHREAD_FASTLOCK_INIT(q->tail_lock);
-    QTHREAD_FASTLOCK_INIT(q->advisory_queuelen_m);
-    q->advisory_queuelen = 0;
-    q->head = ALLOC_TQNODE();
-    assert(q->head != NULL);
-    if (q->head == NULL) {
-      QTHREAD_FASTLOCK_DESTROY(q->advisory_queuelen_m);
-      QTHREAD_FASTLOCK_DESTROY(q->head_lock);
-      QTHREAD_FASTLOCK_DESTROY(q->tail_lock);
-      FREE_THREADQUEUE(q);
-      q = NULL;
-    } else {
-      q->tail = q->head;
-      q->head->next = NULL;
-      q->head->value = NULL;
-    }
-  }
-  return q;
-} /*}}} */
-
-static qthread_t *qt_threadqueue_dequeue(qt_threadqueue_t *q) { /*{{{ */
-  qthread_t *p = NULL;
-
-  qt_threadqueue_node_t *node, *new_head;
-
-  assert(q != NULL);
-  QTHREAD_FASTLOCK_LOCK(&q->head_lock);
-  {
-    node = q->head;
-    new_head = node->next;
-    if (new_head != NULL) {
-      p = new_head->value;
-      q->head = new_head;
-    }
-  }
-  QTHREAD_FASTLOCK_UNLOCK(&q->head_lock);
-  if (p != NULL) {
-    Q_PREFETCH(p);
-    (void)qthread_internal_incr_s(
-      &q->advisory_queuelen, &q->advisory_queuelen_m, -1);
-  }
-  return p;
-} /*}}} */
-
-void INTERNAL qt_threadqueue_free(qt_threadqueue_t *q) { /*{{{ */
-  while (q->head != q->tail) { qt_threadqueue_dequeue(q); }
-  QTHREAD_FASTLOCK_DESTROY(q->head_lock);
-  QTHREAD_FASTLOCK_DESTROY(q->tail_lock);
-  QTHREAD_FASTLOCK_DESTROY(q->advisory_queuelen_m);
-  FREE_TQNODE((qt_threadqueue_node_t *)(q->head));
-  FREE_THREADQUEUE(q);
-} /*}}} */
-
-#ifdef QTHREAD_USE_SPAWNCACHE
-qthread_t INTERNAL *
-qt_threadqueue_private_dequeue(qt_threadqueue_private_t *c) { /*{{{*/
-  return NULL;
-} /*}}}*/
-
-int INTERNAL
-qt_threadqueue_private_enqueue(qt_threadqueue_private_t *restrict pq,
-                               qt_threadqueue_t *restrict q,
-                               qthread_t *restrict t) { /*{{{*/
-  return 0;
-} /*}}}*/
-
-int INTERNAL qt_threadqueue_private_enqueue_yielded(
-  qt_threadqueue_private_t *restrict q, qthread_t *restrict t) { /*{{{*/
-  return 0;
-} /*}}}*/
-
-void INTERNAL qt_threadqueue_enqueue_cache(qt_threadqueue_t *q,
-                                           qt_threadqueue_private_t *cache) {}
-
-void INTERNAL qt_threadqueue_private_filter(
-  qt_threadqueue_private_t *restrict c, qt_threadqueue_filter_f f) {}
-#endif /* ifdef QTHREAD_USE_SPAWNCACHE */
-
-void INTERNAL qt_threadqueue_enqueue(qt_threadqueue_t *restrict q,
-                                     qthread_t *restrict t) { /*{{{ */
-  qt_threadqueue_node_t *node;
-
-  node = ALLOC_TQNODE();
-  assert(node != NULL);
-  node->value = t;
-  node->next = NULL;
-  QTHREAD_FASTLOCK_LOCK(&q->tail_lock);
-  {
-    q->tail->next = node;
-    q->tail = node;
-  }
-  QTHREAD_FASTLOCK_UNLOCK(&q->tail_lock);
-  (void)qthread_internal_incr_s(
-    &q->advisory_queuelen, &q->advisory_queuelen_m, 1);
-} /*}}} */
-
-void qt_threadqueue_enqueue_yielded(qt_threadqueue_t *restrict q,
-                                    qthread_t *restrict t) { /*{{{*/
-  qt_threadqueue_enqueue(q, t);
-} /*}}}*/
-
-/* this function is amusing, but the point is to avoid unnecessary bus traffic
- * by allowing idle shepherds to sit for a while while still allowing for
- * low-overhead for busy shepherds. This is a hybrid approach: normally, it
- * functions as a spinlock, but if it spins too much, it waits for a signal */
-qthread_t INTERNAL *
-qt_scheduler_get_thread(qt_threadqueue_t *q,
-                        qt_threadqueue_private_t *Q_UNUSED(qc),
-                        uint_fast8_t Q_UNUSED(active)) { /*{{{ */
-  qthread_t *p = NULL;
-
-#ifdef QTHREAD_USE_EUREKAS
-  qt_eureka_disable();
-#endif /* QTHREAD_USE_EUREKAS */
-  while ((p = qt_threadqueue_dequeue(q)) == NULL) {
-#ifdef QTHREAD_USE_EUREKAS
-    qt_eureka_check(1);
-#endif /* QTHREAD_USE_EUREKAS */
-    SPINLOCK_BODY();
-  }
-  return p;
-} /*}}} */
-
-/* walk queue removing all tasks matching this description */
-void INTERNAL qt_threadqueue_filter(qt_threadqueue_t *q,
-                                    qt_threadqueue_filter_f f) { /*{{{*/
-  QTHREAD_FASTLOCK_LOCK(&q->head_lock);
-  {
-    qt_threadqueue_node_t *curs = q->head->next;
-    qt_threadqueue_node_t **ptr = &q->head->next;
-
-    while (curs) {
-      qthread_t *t = curs->value;
-      switch (f(t)) {
-        case IGNORE_AND_CONTINUE: // ignore, move on
-          ptr = &curs->next;
-          curs = curs->next;
-          break;
-        case IGNORE_AND_STOP: // ignore, stop looking
-          curs = NULL;
-          continue;
-        case REMOVE_AND_CONTINUE: // remove, move on
-        {
-          qt_threadqueue_node_t *tmp = curs;
-#ifdef QTHREAD_USE_EUREKAS
-          qthread_internal_assassinate(t);
-#endif /* QTHREAD_USE_EUREKAS */
-          if (curs->next == NULL) {
-            /* this is clever: since 'next' is the first field, its
-             * address is the address of the entire structure */
-            q->tail = (qt_threadqueue_node_t *)ptr;
-          }
-          *ptr = curs->next;
-          curs = curs->next;
-          FREE_TQNODE(tmp);
-          break;
-        }
-        case REMOVE_AND_STOP: // remove, stop looking
-        {
-#ifdef QTHREAD_USE_EUREKAS
-          qthread_internal_assassinate(t);
-#endif /* QTHREAD_USE_EUREKAS */
-          if (curs->next == NULL) {
-            /* this is clever: since 'next' is the first field, its
-             * address is the address of the entire structure */
-            q->tail = (qt_threadqueue_node_t *)ptr;
-          }
-          *ptr = curs->next;
-          FREE_TQNODE(curs);
-          curs = NULL;
-          continue;
-        }
-      }
-    }
-  }
-  QTHREAD_FASTLOCK_UNLOCK(&q->head_lock);
-} /*}}}*/
-
-/* some place-holder functions */
-void INTERNAL qthread_steal_stat(void) {}
-
-void INTERNAL qthread_steal_enable(void) {}
-
-void INTERNAL qthread_steal_disable(void) {}
-
-void INTERNAL qthread_cas_steal_stat(void) {}
-
-qthread_shepherd_id_t INTERNAL
-qt_threadqueue_choose_dest(qthread_shepherd_t *curr_shep) {
-  qthread_shepherd_id_t dest_shep_id = 0;
-
-  if (curr_shep) {
-    dest_shep_id = curr_shep->sched_shepherd++;
-    curr_shep->sched_shepherd *= (qlib->nshepherds > (dest_shep_id + 1));
-  } else {
-    dest_shep_id = (qthread_shepherd_id_t)qthread_internal_incr_mod(
-      &qlib->sched_shepherd, qlib->nshepherds, &qlib->sched_shepherd_lock);
-  }
-
-  return dest_shep_id;
-}
-
-qthread_t INTERNAL *qt_threadqueue_dequeue_specific(qt_threadqueue_t *q,
-                                                    void *value) {
-  return NULL;
-}
-
-size_t INTERNAL qt_threadqueue_policy(const enum threadqueue_policy policy) {
-  switch (policy) {
-    case SINGLE_WORKER: return THREADQUEUE_POLICY_TRUE;
-    default: return THREADQUEUE_POLICY_UNSUPPORTED;
-  }
-}
-
-/* vim:set expandtab: */
diff --git a/src/threadqueues/nottingham_threadqueues.c b/src/threadqueues/nottingham_threadqueues.c
deleted file mode 100644
index 23dfc62d..00000000
--- a/src/threadqueues/nottingham_threadqueues.c
+++ /dev/null
@@ -1,963 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-/* System Headers */
-#include <emmintrin.h>
-#include <pthread.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <sys/types.h>
-
-/* Internal Headers */
-#include "56reader-rwlock.h"
-#include "qt_asserts.h"
-#include "qt_envariables.h"
-#include "qt_macros.h"
-#include "qt_prefetch.h"
-#include "qt_qthread_struct.h"
-#include "qt_shepherd_innards.h"
-#include "qt_threadqueues.h"
-#include "qt_visibility.h"
-#include "qthread/qthread.h"
-#include "qthread_innards.h" /* for qlib */
-
-#ifndef NOINLINE
-#define NOINLINE __attribute__((noinline))
-#endif
-
-/* Data Structures */
-struct uint128 {
-  uint64_t lo;
-  uint64_t hi;
-};
-
-typedef struct uint128 uint128_t;
-
-struct _qt_threadqueue_entry {
-  qthread_t *value;
-  uint32_t index;
-  uint32_t counter;
-};
-
-typedef struct _qt_threadqueue_entry qt_threadqueue_entry_t;
-
-typedef __m128i m128i;
-
-typedef union {
-  m128i sse;
-  qt_threadqueue_entry_t entry;
-} qt_threadqueue_union_t;
-
-struct _qt_threadqueue {
-  m128i top;
-  m128i blanks[3];
-  m128i *base;
-  uint32_t size;
-  uint32_t bottom;
-  rwlock_t *rwlock;
-  QTHREAD_FASTLOCK_TYPE spinlock;
-
-  /* used for the work stealing queue implementation */
-  m128i flush[1];
-  uint32_t empty;
-  uint32_t stealing;
-  uint32_t steal_disable;
-} /* qt_threadqueue_t */;
-
-// Forward declarations
-
-void INTERNAL qt_threadqueue_enqueue_multiple(qt_threadqueue_t *q,
-                                              int stealcount,
-                                              qthread_t **stealbuffer,
-                                              qthread_shepherd_t *shep);
-
-INTERNAL int qt_threadqueue_dequeue_steal(qt_threadqueue_t *q,
-                                          qthread_t **nostealbuffer,
-                                          qthread_t **stealbuffer);
-
-void INTERNAL qt_threadqueue_resize_and_enqueue(qt_threadqueue_t *q,
-                                                qthread_t *t);
-
-int static QINLINE qt_threadqueue_stealable(qthread_t *t);
-
-qthread_t static QINLINE *qt_threadqueue_dequeue_helper(qt_threadqueue_t *q);
-
-void INTERNAL qt_threadqueue_enqueue_unstealable(qt_threadqueue_t *q,
-                                                 qthread_t **nostealbuffer,
-                                                 int amtNotStolen);
-
-void INTERNAL qt_threadqueue_subsystem_init(void) {}
-
-#ifdef CAS_STEAL_PROFILE
-static void cas_profile_update(int id, int retries) {
-  uint64_strip_t *cas_steal_profile = qlib->cas_steal_profile;
-
-  if (cas_steal_profile == NULL) { return; }
-  if (retries >= CAS_STEAL_PROFILE_LENGTH) {
-    cas_steal_profile[id].fields[CAS_STEAL_PROFILE_LENGTH - 1]++;
-  } else {
-    cas_steal_profile[id].fields[retries]++;
-  }
-}
-
-#else /* ifdef CAS_STEAL_PROFILE */
-#define cas_profile_update(x, y)                                               \
-  do {                                                                         \
-  } while (0)
-#endif /* ifdef CAS_STEAL_PROFILE */
-
-ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/
-  return 0;
-} /*}}}*/
-
-/*****************************************/
-/* functions to manage the thread queues */
-/*****************************************/
-
-static QINLINE long qthread_steal_chunksize(void);
-static QINLINE qthread_t *qthread_steal(qt_threadqueue_t *thiefq);
-
-qt_threadqueue_t INTERNAL *qt_threadqueue_new(void) { /*{{{*/
-  qt_threadqueue_t *q;
-
-  posix_memalign((void **)&q, 64, sizeof(qt_threadqueue_t));
-
-  if (q != NULL) {
-    qt_threadqueue_union_t top;
-    top.entry.value = NULL;
-    top.entry.index = 0;
-    top.entry.counter = 0;
-
-    q->top = top.sse;
-    q->size = 1024;
-    q->bottom = 0;
-    q->empty = 1;
-    q->stealing = 0;
-    QTHREAD_FASTLOCK_INIT(q->spinlock);
-    posix_memalign((void **)&(q->base), 64, q->size * sizeof(m128i));
-    posix_memalign((void **)&(q->rwlock), 64, sizeof(rwlock_t));
-    rwlock_init(q->rwlock);
-    memset(q->base, 0, q->size * sizeof(m128i));
-  }
-  return q;
-} /*}}}*/
-
-void INTERNAL qt_threadqueue_free(qt_threadqueue_t *q) { /*{{{*/
-  // mspiegel: is it necessary to drain the queue?
-  /* while (q->head != q->tail) {
-   *  qt_scheduler_get_thread(q, 1);
-   * } */
-  qt_free((void *)q->base);
-  qt_free((void *)q);
-} /*}}}*/
-
-static QINLINE int
-qt_threadqueue_cas128(uint128_t *src, uint128_t *cmp, uint128_t *with) { /*{{{*/
-  char result;
-
-  // (AT&T syntax)
-  __asm__ __volatile__("lock; cmpxchg16b (%6);"
-                       "setz %7; "
-                       : "=a"(cmp->lo), "=d"(cmp->hi)
-                       : "0"(cmp->lo),
-                         "1"(cmp->hi),
-                         "b"(with->lo),
-                         "c"(with->hi),
-                         "r"(src),
-                         "m"(result)
-                       : "cc", "memory");
-  /* (Intel syntax)
-   * __asm__ __volatile__
-   * (
-   *  "lock cmpxchg16b oword ptr %1\n\t"
-   *  "setz %0"
-   *  : "=q" ( result )
-   *  , "+m" ( *src )
-   *  , "+d" ( cmp->hi )
-   *  , "+a" ( cmp->lo )
-   *  : "c" ( with->hi )
-   *  , "b" ( with->lo )
-   *  : "cc"
-   * );    */
-  return result;
-} /*}}}*/
-
-static QINLINE void
-qt_threadqueue_finish(qt_threadqueue_t *q,
-                      qt_threadqueue_entry_t top_entry) { /*{{{*/
-  qt_threadqueue_union_t snapshot, oldnode;
-
-  uint32_t index = top_entry.index;
-
-  snapshot.sse = q->base[index];
-
-  oldnode.entry.value = snapshot.entry.value;
-  oldnode.entry.counter = top_entry.counter - 1;
-  oldnode.entry.index = top_entry.index = 0;
-
-  if (snapshot.entry.counter > oldnode.entry.counter) { return; }
-  if (snapshot.entry.counter == top_entry.counter) { return; }
-
-  qt_threadqueue_cas128((uint128_t *)q->base + index,
-                        (uint128_t *)&oldnode,
-                        (uint128_t *)&top_entry);
-} /*}}}*/
-
-#ifdef QTHREAD_USE_SPAWNCACHE
-qthread_t INTERNAL *
-qt_threadqueue_private_dequeue(qt_threadqueue_private_t *c) {
-  return NULL;
-}
-
-int INTERNAL
-qt_threadqueue_private_enqueue(qt_threadqueue_private_t *restrict pq,
-                               qt_threadqueue_t *restrict q,
-                               qthread_t *restrict t) {
-  return 0;
-}
-
-int INTERNAL qt_threadqueue_private_enqueue_yielded(
-  qt_threadqueue_private_t *restrict q, qthread_t *restrict t) {
-  return 0;
-}
-
-#endif /* ifdef QTHREAD_USE_SPAWNCACHE */
-
-/* enqueue at tail */
-void INTERNAL qt_threadqueue_enqueue(qt_threadqueue_t *restrict q,
-                                     qthread_t *restrict t) { /*{{{*/
-  qt_threadqueue_union_t oldtop, snapshot, lastchance;
-  qt_threadqueue_entry_t newtop;
-  uint32_t nextindex;
-
-#ifdef CAS_STEAL_PROFILE
-  int cycles = 0;
-#endif
-
-  int id = qthread_worker_unique(NULL);
-
-  rwlock_rdlock(q->rwlock, id);
-
-  oldtop.sse = q->top;
-
-  while (1) {
-#ifdef CAS_STEAL_PROFILE
-    cycles++;
-#endif
-
-    qt_threadqueue_finish(q, oldtop.entry);
-
-    nextindex = (oldtop.entry.index + 1) % q->size;
-
-    if (nextindex == q->bottom) {
-      // Pthread reader-writer locks will deadlock
-      // on lock promotion attempts.
-      rwlock_rdunlock(q->rwlock, id);
-      qt_threadqueue_resize_and_enqueue(q, t);
-      cas_profile_update(id, cycles - 1);
-      return;
-    }
-
-    snapshot.sse = q->base[nextindex];
-    newtop.value = t;
-    newtop.index = nextindex;
-    newtop.counter = snapshot.entry.counter + 1;
-
-    lastchance.sse = q->top;
-
-    if (lastchance.entry.counter != oldtop.entry.counter) {
-      oldtop.entry = lastchance.entry;
-      continue;
-    }
-
-    if (qt_threadqueue_cas128(
-          (uint128_t *)&(q->top), (uint128_t *)&oldtop, (uint128_t *)&newtop)) {
-      break;
-    }
-  }
-
-  q->empty = 0;
-
-  rwlock_rdunlock(q->rwlock, id);
-
-  cas_profile_update(id, cycles - 1);
-} /*}}}*/
-
-/* enqueue multiple (from steal) */
-void INTERNAL
-qt_threadqueue_enqueue_multiple(qt_threadqueue_t *q,
-                                int stealcount,
-                                qthread_t **stealbuffer,
-                                qthread_shepherd_t *shep) { /*{{{*/
-  /* save element 0 for the thief */
-  for (int i = 1; i < stealcount; i++) {
-    qthread_t *t = stealbuffer[i];
-    t->target_shepherd = shep->shepherd_id;
-    qt_threadqueue_enqueue(q, t);
-  }
-} /*}}}*/
-
-/* This function is called when the queue is full.
- * Either a thread is enqueuing at the tail,
- * or a thread is enqueuing at the head.
- * In both cases, we need to grow the array.
- * PRECONDITION: the writer lock must be held. */
-static QINLINE void qt_threadqueue_resize(qt_threadqueue_t *q) {
-  // TODO: error checking has not been performed.
-  // If oldsize == UINT32_MAX, then indicate an error.
-  // If memory allocation returns NULL, then indicate an error.
-
-  uint32_t oldsize = q->size, bottom = q->bottom;
-  uint32_t newsize = (oldsize > (UINT32_MAX / 2)) ? UINT32_MAX : oldsize * 2;
-  m128i *newloc;
-  qt_threadqueue_union_t top;
-
-  qassert(posix_memalign((void **)&(newloc), 64, newsize * sizeof(m128i)), 0);
-
-  assert(newsize > oldsize);
-  assert(newloc != NULL);
-
-  uint32_t len1 = oldsize - bottom;
-  uint32_t len2 = bottom;
-
-  m128i *dest1 = newloc;
-  m128i *dest2 = newloc + len1;
-
-  m128i *src1 = q->base + bottom;
-  m128i *src2 = q->base;
-
-  memcpy(dest1, src1, len1 * sizeof(m128i));
-  memcpy(dest2, src2, len2 * sizeof(m128i));
-  memset(newloc + oldsize, 0, (newsize - oldsize) * sizeof(m128i));
-
-  top.sse = q->top;
-  top.entry.counter = top.entry.counter + 1;
-  top.entry.index = oldsize - 1;
-  q->top = top.sse;
-
-  q->base = newloc;
-  q->size = newsize;
-  q->bottom = 0;
-}
-
-void INTERNAL qt_threadqueue_resize_and_enqueue(qt_threadqueue_t *q,
-                                                qthread_t *t) { /*{{{*/
-  int id = qthread_worker_unique(NULL);
-
-  rwlock_wrlock(q->rwlock, id);
-
-  qt_threadqueue_union_t top, newtop, snapshot;
-  uint32_t nextindex, oldsize, bottom;
-
-  top.sse = q->top;
-
-  qt_threadqueue_finish(q, top.entry);
-
-  oldsize = q->size;
-  bottom = q->bottom;
-  nextindex = (top.entry.index + 1) % oldsize;
-
-  if (nextindex == bottom) {
-    qt_threadqueue_resize(q);
-    nextindex = oldsize;
-  }
-
-  snapshot.sse = q->base[nextindex];
-
-  newtop.entry.value = t;
-  newtop.entry.index = nextindex;
-  newtop.entry.counter = snapshot.entry.counter + 1;
-
-  snapshot.entry.value = t;
-  snapshot.entry.counter = snapshot.entry.counter + 1;
-
-  q->top = newtop.sse;
-  q->base[nextindex] = snapshot.sse;
-
-  q->empty = 0;
-
-  rwlock_wrunlock(q->rwlock);
-} /*}}}*/
-
-/* yielded threads enqueue at head */
-void INTERNAL qt_threadqueue_enqueue_yielded(qt_threadqueue_t *restrict q,
-                                             qthread_t *restrict t) { /*{{{*/
-  int id = qthread_worker_unique(NULL);
-
-  rwlock_wrlock(q->rwlock, id);
-
-  qt_threadqueue_union_t top;
-
-  top.sse = q->top;
-
-  qt_threadqueue_finish(q, top.entry);
-
-  /* Three cases to consider:
-   * (a) The queue is empty. Move the
-   *     new thread into q->top and return.
-   * (b) The queue is full. Resize the
-   *     queue and then continue to part (c).
-   * (c) otherwise.
-   */
-  if (top.entry.index == q->bottom) {
-    qt_threadqueue_union_t snapshot, newtop;
-
-    uint32_t nextindex = (top.entry.index + 1) % q->size;
-    snapshot.sse = q->base[nextindex];
-    uint32_t nextcounter = snapshot.entry.counter + 1;
-
-    newtop.entry.index = nextindex;
-    newtop.entry.value = t;
-    newtop.entry.counter = nextcounter;
-
-    snapshot.entry.value = t;
-    snapshot.entry.counter = nextcounter;
-
-    q->top = newtop.sse;
-    q->base[nextindex] = snapshot.sse;
-    rwlock_wrunlock(q->rwlock);
-    return;
-  } else if ((top.entry.index + 1) % q->size == q->bottom) {
-    qt_threadqueue_resize(q);
-  }
-
-  uint32_t bot = q->bottom, size = q->size;
-  uint32_t newbot = (bot - 1) % size;
-
-  qt_threadqueue_union_t bottom, newbottom;
-
-  bottom.sse = q->base[bot];
-  newbottom.sse = q->base[newbot];
-  bottom.entry.counter += 1;
-  newbottom.entry.counter += 1;
-  bottom.entry.value = t;
-  newbottom.entry.value = NULL;
-
-  q->base[bot] = bottom.sse;
-  q->base[newbot] = newbottom.sse;
-  q->bottom = newbot;
-
-  q->empty = 0;
-
-  rwlock_wrunlock(q->rwlock);
-} /*}}}*/
-
-qthread_t static QINLINE *qt_threadqueue_dequeue_helper(qt_threadqueue_t *q) {
-  qthread_t *t = NULL;
-
-  q->stealing = 1;
-
-  QTHREAD_FASTLOCK_LOCK(&q->spinlock);
-  if (!(q->steal_disable) && (q->stealing)) { t = qthread_steal(q); }
-  QTHREAD_FASTLOCK_UNLOCK(&q->spinlock);
-
-  return (t);
-}
-
-/* dequeue at tail, unlike original qthreads implementation */
-qthread_t INTERNAL *
-qt_scheduler_get_thread(qt_threadqueue_t *q,
-                        qt_threadqueue_private_t *Q_UNUSED(qc),
-                        uint_fast8_t active) { /*{{{*/
-  qthread_t *t = NULL;
-  rwlock_t *rwlock = q->rwlock;
-  qt_threadqueue_union_t oldtop, lastchance;
-
-#ifdef CAS_STEAL_PROFILE
-  int cycles = 0;
-#endif
-
-  int id = qthread_worker_unique(NULL);
-
-  assert(q != NULL);
-
-  rwlock_rdlock(rwlock, id);
-
-  oldtop.sse = q->top;
-
-  while (1) {
-#ifdef CAS_STEAL_PROFILE
-    cycles++;
-#endif
-
-    if (oldtop.entry.index == q->bottom) {
-      rwlock_rdunlock(rwlock, id);
-      if (active) {
-        t = qt_threadqueue_dequeue_helper(q);
-        if (t != NULL) {
-          cas_profile_update(id, cycles - 1);
-          return (t);
-        }
-      }
-      rwlock_rdlock(rwlock, id);
-      oldtop.sse = q->top;
-    } else {
-      t = oldtop.entry.value;
-
-      if ((t->flags & QTHREAD_REAL_MCCOY)) { // only needs to be on worker 0 for
-                                             // termination
-        switch (qthread_worker(NULL)) {
-          case NO_WORKER: // only happens during termination -- keep trying
-            rwlock_rdunlock(rwlock, id); // release lock and get new value
-            rwlock_rdlock(rwlock, id);
-            oldtop.sse = q->top;
-            continue;
-          case 0: break;
-          default:
-            /* McCoy thread can only run on worker 0 */
-            rwlock_rdunlock(rwlock, id);
-            if (active) {
-              t = qt_threadqueue_dequeue_helper(q);
-              if (t != NULL) {
-                cas_profile_update(id, cycles - 1);
-                return (t);
-              }
-            }
-            rwlock_rdlock(rwlock, id);
-            oldtop.sse = q->top;
-            continue;
-        }
-      }
-
-      qt_threadqueue_finish(q, oldtop.entry);
-
-      uint32_t previndex = (oldtop.entry.index - 1) % q->size;
-
-      qt_threadqueue_entry_t newtop;
-      qt_threadqueue_union_t belowtop;
-      belowtop.sse = q->base[previndex];
-      newtop.index = previndex;
-      newtop.value = belowtop.entry.value;
-      newtop.counter = belowtop.entry.counter + 1;
-
-      lastchance.sse = q->top;
-
-      if (lastchance.entry.counter != oldtop.entry.counter) {
-        oldtop.entry = lastchance.entry;
-        continue;
-      }
-
-      if (qt_threadqueue_cas128((uint128_t *)&(q->top),
-                                (uint128_t *)&oldtop,
-                                (uint128_t *)&newtop)) {
-        rwlock_rdunlock(rwlock, id);
-        assert(t != NULL);
-        cas_profile_update(id, cycles - 1);
-        return (t);
-      }
-    }
-  }
-} /*}}}*/
-
-int static QINLINE qt_threadqueue_stealable(qthread_t *t) {
-  return (t->thread_state != QTHREAD_STATE_YIELDED &&
-          t->thread_state != QTHREAD_STATE_TERM_SHEP &&
-          !(t->flags & QTHREAD_UNSTEALABLE));
-}
-
-void INTERNAL qt_threadqueue_enqueue_unstealable(qt_threadqueue_t *q,
-                                                 qthread_t **nostealbuffer,
-                                                 int amtNotStolen) {
-  if (amtNotStolen == 0) { return; }
-
-  uint32_t bottom = q->bottom;
-
-  qt_threadqueue_union_t top;
-  top.sse = q->top;
-
-  if (top.entry.index == bottom) {
-    top.entry.value = nostealbuffer[amtNotStolen - 1];
-    q->top = top.sse;
-  }
-
-  qt_threadqueue_union_t snapshot;
-  for (int i = amtNotStolen - 1; i >= 0; i--) {
-    snapshot.sse = q->base[bottom];
-    snapshot.entry.value = nostealbuffer[i];
-    q->base[bottom] = snapshot.sse;
-    bottom = (bottom - 1) % q->size;
-  }
-  q->bottom = bottom;
-}
-
-/* dequeue stolen threads at head, skip yielded threads */
-INTERNAL int qt_threadqueue_dequeue_steal(qt_threadqueue_t *q,
-                                          qthread_t **nostealbuffer,
-                                          qthread_t **stealbuffer) { /*{{{ */
-  assert(q != NULL);
-
-  int amtStolen = 0, amtNotStolen = 0;
-
-  int id = qthread_worker_unique(NULL);
-
-  if (q->empty) { return (0); }
-
-  rwlock_wrlock(q->rwlock, id);
-
-  qt_threadqueue_union_t top;
-
-  top.sse = q->top;
-  qt_threadqueue_finish(q, top.entry);
-
-  uint32_t bottom = q->bottom;
-  uint32_t current = (bottom + 1) % q->size;
-
-  qt_threadqueue_union_t snapshot;
-
-  while (amtStolen < qthread_steal_chunksize()) {
-    snapshot.sse = q->base[current];
-
-    /* Three cases to consider:
-     * (a) The queue is empty.
-     * (b) The queue contains a single element.
-     * (c) Otherwise.
-     */
-    if (bottom == top.entry.index) {
-      q->empty = 1;
-      break;
-    } else if (current == top.entry.index) {
-      qthread_t *candidate = top.entry.value;
-      if (qt_threadqueue_stealable(candidate)) {
-        stealbuffer[amtStolen++] = candidate;
-        snapshot.entry.value = NULL;
-        snapshot.entry.counter = snapshot.entry.counter + 1;
-        top.entry.value = NULL;
-        top.entry.counter = snapshot.entry.counter;
-        q->base[current] = snapshot.sse;
-        q->top = top.sse;
-        bottom = current;
-      }
-      q->empty = 1;
-      break;
-    } else {
-      qthread_t *candidate = snapshot.entry.value;
-      if (qt_threadqueue_stealable(candidate)) {
-        stealbuffer[amtStolen++] = candidate;
-      } else if (amtNotStolen == STEAL_BUFFER_LENGTH) {
-        abort(); // should never happen
-      } else {
-        nostealbuffer[amtNotStolen++] = candidate;
-      }
-      snapshot.entry.value = NULL;
-      snapshot.entry.counter = snapshot.entry.counter + 1;
-      q->base[current] = snapshot.sse;
-      bottom = current;
-      current = (current + 1) % q->size;
-    }
-  }
-  q->bottom = bottom;
-
-  qt_threadqueue_enqueue_unstealable(q, nostealbuffer, amtNotStolen);
-
-  rwlock_wrunlock(q->rwlock);
-
-#ifdef STEAL_PROFILE // should give mechanism to make steal profiling optional
-  qthread_incr(&q->steal_amount_stolen, amtStolen);
-#endif
-
-  return (amtStolen);
-} /*}}} */
-
-/* Returns the number of tasks to steal per steal operation (chunk size) */
-static QINLINE long qthread_steal_chunksize(void) { /*{{{*/
-  static long chunksize = 0;
-
-  if (chunksize == 0) {
-    chunksize =
-      qt_internal_get_env_num("STEAL_CHUNKSIZE", qlib->nworkerspershep, 1);
-  }
-
-  return chunksize;
-} /*}}}*/
-
-/*  Steal work from another shepherd's queue
- *    Returns the amount of work stolen
- *  PRECONDITION: the readlock must be aquired.
- */
-static QINLINE qthread_t *qthread_steal(qt_threadqueue_t *thiefq) { /*{{{*/
-  int i;
-
-  extern TLS_DECL(qthread_shepherd_t *, shepherd_structs);
-  qthread_shepherd_t *victim_shepherd;
-  qthread_worker_t *worker = (qthread_worker_t *)TLS_GET(shepherd_structs);
-  qthread_shepherd_t *thief_shepherd = (qthread_shepherd_t *)worker->shepherd;
-  qthread_t **nostealbuffer = worker->nostealbuffer;
-  qthread_t **stealbuffer = worker->stealbuffer;
-
-#ifdef STEAL_PROFILE // should give mechanism to make steal profiling optional
-  qthread_incr(&thief_shepherd->steal_called, 1);
-#endif
-#ifdef QTHREAD_OMP_AFFINITY
-  if (thief_shepherd->stealing_mode == QTHREAD_STEAL_ON_ALL_IDLE) {
-    for (i = 0; i < qlib->nworkerspershep; i++)
-      if (thief_shepherd->workers[i].current != NULL) {
-        thiefq->stealing = 0;
-        return (NULL);
-      }
-    thief_shepherd->stealing_mode = QTHREAD_STEAL_ON_ANY_IDLE;
-  }
-#endif
-#ifdef STEAL_PROFILE // should give mechanism to make steal profiling optional
-  qthread_incr(&thief_shepherd->steal_attempted, 1);
-#endif
-  int shepherd_offset = qthread_worker(NULL) % qlib->nshepherds;
-  for (i = 1; i < qlib->nshepherds; i++) {
-    shepherd_offset = (shepherd_offset + 1) % qlib->nshepherds;
-    if (shepherd_offset == thief_shepherd->shepherd_id) {
-      shepherd_offset = (shepherd_offset + 1) % qlib->nshepherds;
-    }
-    victim_shepherd = &qlib->shepherds[shepherd_offset];
-    if (victim_shepherd->ready->empty) { continue; }
-    int amtStolen = qt_threadqueue_dequeue_steal(
-      victim_shepherd->ready, nostealbuffer, stealbuffer);
-    if (amtStolen > 0) {
-#ifdef STEAL_PROFILE // should give mechanism to make steal profiling optional
-      qthread_incr(&thief_shepherd->steal_successful, 1);
-#endif
-      qt_threadqueue_enqueue_multiple(
-        thiefq, amtStolen, stealbuffer, thief_shepherd);
-      thiefq->stealing = 0;
-      return (stealbuffer[0]);
-    }
-#ifdef STEAL_PROFILE // should give mechanism to make steal profiling optional
-    else {
-      qthread_incr(&thief_shepherd->steal_failed, 1);
-    }
-#endif
-  }
-  thiefq->stealing = 0;
-  return (NULL);
-} /*}}}*/
-
-#ifdef CAS_STEAL_PROFILE
-void INTERNAL qthread_cas_steal_stat(void) {
-  int i, j;
-  uint64_strip_t accum;
-  uint64_t total = 0;
-  double weighted_sum = 0.0;
-
-  for (j = 0; j < CAS_STEAL_PROFILE_LENGTH; j++) { accum.fields[j] = 0; }
-  for (i = 0; i < qlib->nshepherds * qlib->nworkerspershep; i++) {
-    for (j = 0; j < CAS_STEAL_PROFILE_LENGTH; j++) {
-      accum.fields[j] += qlib->cas_steal_profile[i].fields[j];
-    }
-  }
-  for (j = 0; j < CAS_STEAL_PROFILE_LENGTH; j++) {
-    total += accum.fields[j];
-    weighted_sum += (accum.fields[j] * j);
-  }
-
-  fprintf(stdout, "threadqueue distribution of CAS retries\n");
-  for (j = 0; j < (CAS_STEAL_PROFILE_LENGTH - 1); j++) {
-    fprintf(
-      stdout, "%d  - %4.2f%%\n", j, ((double)accum.fields[j]) / total * 100.0);
-  }
-  fprintf(
-    stdout, "%d+ - %4.2f%%\n", j, ((double)accum.fields[j]) / total * 100.0);
-  fprintf(stdout, "approximate mean is %4.2f \n", weighted_sum / total);
-  fprintf(stdout, "\n");
-}
-
-#endif /* ifdef CAS_STEAL_PROFILE */
-
-#ifdef STEAL_PROFILE // should give mechanism to make steal profiling optional
-void INTERNAL qthread_steal_stat(void) {
-  int i;
-
-  assert(qlib);
-  for (i = 0; i < qlib->nshepherds; i++) {
-    fprintf(stdout,
-            "shepherd %d - steals called %ld attempted %ld failed %ld "
-            "successful %ld work stolen %ld\n",
-            qlib->shepherds[i].shepherd_id,
-            qlib->shepherds[i].steal_called,
-            qlib->shepherds[i].steal_attempted,
-            qlib->shepherds[i].steal_failed,
-            qlib->shepherds[i].steal_successful,
-            qlib->shepherds[i].steal_amount_stolen);
-  }
-}
-
-#endif /* ifdef STEAL_PROFILE */
-
-/* walk queue looking for a specific value  -- if found remove it (and start
- * it running)  -- if not return NULL
- */
-qthread_t INTERNAL *qt_threadqueue_dequeue_specific(qt_threadqueue_t *q,
-                                                    void *value) { /*{{{*/
-  int id = qthread_worker_unique(NULL);
-
-  assert(q != NULL);
-
-  rwlock_wrlock(q->rwlock, id);
-
-  qt_threadqueue_union_t top;
-
-  top.sse = q->top;
-  qt_threadqueue_finish(q, top.entry);
-
-  if (q->bottom == top.entry.index) {
-    rwlock_wrunlock(q->rwlock);
-    return (NULL);
-  }
-
-  if (top.entry.value->ret == value) {
-    qt_threadqueue_union_t snapshot;
-
-    qthread_t *retval = top.entry.value;
-    uint32_t previndex = (top.entry.index - 1) % q->size;
-    snapshot.sse = q->base[previndex];
-
-    top.entry.index = previndex;
-    top.entry.value = snapshot.entry.value;
-    top.entry.counter = snapshot.entry.counter;
-
-    q->top = top.sse;
-
-    rwlock_wrunlock(q->rwlock);
-    return (retval);
-  } else {
-    uint32_t current = (q->bottom + 1) % q->size;
-    uint32_t size = q->size;
-    uint32_t bottom = q->bottom;
-    while (current != top.entry.index) {
-      qt_threadqueue_union_t snapshot;
-
-      snapshot.sse = q->base[current];
-      qthread_t *t = snapshot.entry.value;
-      if (t->ret == value) {
-        /* Two cases:
-         * (i)  The current index is below the top
-         *    index in the array. It is easier to
-         *    slide the elements above current, and
-         *    decrement the top index.
-         * (ii) The current index is above the top
-         *    index in the array. It is easier to
-         *    slide the elements below current, and
-         *    increment the bottom index.
-         */
-        if (current < top.entry.index) {
-          memmove((void *)(q->base + current),
-                  (void const *)(q->base + current + 1),
-                  (top.entry.index - current - 1) *
-                    sizeof(qt_threadqueue_entry_t));
-          top.entry.index = (top.entry.index - 1) % q->size;
-          q->top = top.sse;
-        } else {
-          memmove((void *)(q->base + bottom + 1),
-                  (void const *)(q->base + bottom),
-                  (current - bottom) * sizeof(qt_threadqueue_entry_t));
-          q->bottom = (bottom + 1) % q->size;
-        }
-        rwlock_wrunlock(q->rwlock);
-        return (t);
-      }
-      current = (current + 1) % size;
-    }
-  }
-
-  rwlock_wrunlock(q->rwlock);
-  return (NULL);
-} /*}}}*/
-
-void INTERNAL qthread_steal_enable() { /*{{{*/
-  qt_threadqueue_t *q;
-  size_t i;
-  size_t numSheps = qthread_num_shepherds();
-
-  for (i = 0; i < numSheps; i++) {
-    q = qlib->threadqueues[i];
-    q->steal_disable = 0;
-  }
-} /*}}}*/
-
-void INTERNAL qthread_steal_disable() { /*{{{*/
-  qt_threadqueue_t *q;
-  size_t i;
-  size_t numSheps = qthread_num_shepherds();
-
-  for (i = 0; i < numSheps; i++) {
-    q = qlib->threadqueues[i];
-    q->steal_disable = 1;
-  }
-} /*}}}*/
-
-#if 0 // begin test code, because this function
-      // can't go in the test suite as it calls internal functions
-
-#include <stdio.h>
-
-int qt_threadqueue_test()
-{
-    printf("Initializing test\n");
-    qthread_initialize();
-    qt_threadqueue_t *threadqueue = qt_threadqueue_new(&(qlib->shepherds[0]));
-
-    assert(threadqueue->empty == 1);
-    if(threadqueue->empty != 1) {
-        fprintf(stderr, "Threadqueue was initialized with empty bit set to FALSE\n");
-        return -1;
-    }
-
-    qthread_t *task = qthread_thread_new(NULL, NULL, 0, NULL, 0);
-
-    printf("Enqueueing task.\n");
-    qt_threadqueue_enqueue(threadqueue, task);
-
-    assert(threadqueue->empty != 1);
-
-    if(threadqueue->empty == 1) {
-        fprintf(stderr, "Threadqueue empty bit set after enqueue operation\n");
-        return -1;
-    }
-
-    printf("Dequeueing task\n");
-    qthread_t *result = qt_scheduler_get_thread(threadqueue, 1);
-
-    assert(result == task);
-
-    if(result != task) {
-        fprintf(stderr, "Task enqueued to stack is not identical to task dequeued from stack\n");
-        return -1;
-    }
-
-    uint32_t size = threadqueue->size;
-    printf("Queue size is %d\n", size);
-    printf("Enqueueing the same task %d times\n", size);
-
-    for(int i = 0; i < size; i++) {
-        qt_threadqueue_enqueue(threadqueue, task);
-    }
-
-    uint32_t newsize = threadqueue->size;
-    printf("Queue size is %d\n", newsize);
-
-    assert(newsize == 2 * size);
-    if (newsize != 2 * size) {
-        fprintf(stderr, "New size is not equal to twice the old size\n");
-        return -1;
-    }
-
-    return 0;
-}
-
-// end test code
-#endif /* if 0 */
-
-qthread_shepherd_id_t INTERNAL
-qt_threadqueue_choose_dest(qthread_shepherd_t *curr_shep) {
-  if (curr_shep) {
-    return curr_shep->shepherd_id;
-  } else {
-    return (qthread_shepherd_id_t)0;
-  }
-}
-
-size_t INTERNAL qt_threadqueue_policy(const enum threadqueue_policy policy) {
-  switch (policy) {
-    default: return THREADQUEUE_POLICY_UNSUPPORTED;
-  }
-}
-
-/* vim:set expandtab: */

From 52ff57a76870d65b7f1dee72eaa6ec16f9f7905d Mon Sep 17 00:00:00 2001
From: Ian Henriksen <iandhenriksen@gmail.com>
Date: Wed, 18 Sep 2024 14:42:58 -0600
Subject: [PATCH 02/11] Get rid of unsupported/untested topology detection
 options.

---
 config/qthread_check_libnuma.m4     |  58 ---
 config/qthread_check_linux.m4       |  36 --
 config/qthread_check_machtopo.m4    |  29 --
 config/qthread_check_plpa.m4        |  37 --
 configure.ac                        |  24 +-
 include/qt_affinity.h               |   4 -
 src/affinity/hwloc_v2.c             | 763 ----------------------------
 src/affinity/lgrp.c                 | 281 ----------
 src/affinity/libnuma.c              | 198 --------
 src/affinity/libnumaV2.c            | 233 ---------
 src/affinity/mach.c                 | 108 ----
 src/affinity/plpa.c                 |  97 ----
 src/affinity/sys.c                  | 112 ----
 src/ds/qarray.c                     |   3 -
 src/ds/qdqueue.c                    |   4 -
 test/benchmarks/pmea09/time_qpool.c |  11 -
 16 files changed, 2 insertions(+), 1996 deletions(-)
 delete mode 100644 config/qthread_check_libnuma.m4
 delete mode 100644 config/qthread_check_linux.m4
 delete mode 100644 config/qthread_check_machtopo.m4
 delete mode 100644 config/qthread_check_plpa.m4
 delete mode 100644 src/affinity/hwloc_v2.c
 delete mode 100644 src/affinity/lgrp.c
 delete mode 100644 src/affinity/libnuma.c
 delete mode 100644 src/affinity/libnumaV2.c
 delete mode 100644 src/affinity/mach.c
 delete mode 100644 src/affinity/plpa.c
 delete mode 100644 src/affinity/sys.c

diff --git a/config/qthread_check_libnuma.m4 b/config/qthread_check_libnuma.m4
deleted file mode 100644
index a1755401..00000000
--- a/config/qthread_check_libnuma.m4
+++ /dev/null
@@ -1,58 +0,0 @@
-# -*- Autoconf -*-
-#
-# Copyright (c)      2008  Sandia Corporation
-#
-
-# QTHREAD_CHECK_LIBNUMA([action-if-found], [action-if-not-found])
-# ------------------------------------------------------------------------------
-AC_DEFUN([QTHREAD_CHECK_LIBNUMA], [
-AC_CHECK_HEADERS([numa.h],
-				 [libnuma_happy=yes
-				  break],
-				 [libnuma_happy=no])
-QT_OLDLIBS="$LIBS"
-AS_IF([test "x$libnuma_happy" = "xyes"],
-	  [AC_SEARCH_LIBS([numa_available],
-		              [numa],
-					  [libnuma_happy=yes],
-					  [libnuma_happy=no])])
-AS_IF([test "x$libnuma_happy" = "xyes"],
-  [AC_MSG_CHECKING(if NUMA is available)
-   LIBS="$LIBS -lnuma"
-   AC_TRY_RUN([
-#include <numa.h>
-int main() { return ( numa_available() != -1 ) ? 0 : 1; }
-  ],
-  [libnuma_happy=yes],
-  [libnuma_happy=no
-   LIBS="$QT_OLDLIBS"],
-  [libnuma_happy=no
-   LIBS="$QT_OLDLIBS"])
-  AC_MSG_RESULT($libnuma_happy)
-  ])
-
-AS_IF([test "x$libnuma_happy" = "xyes"],
-  [
-  dnl okay, so at this point, we need to determine what KIND of
-  dnl libnuma interface we're dealing with
-  AC_CHECK_FUNC([numa_allocate_nodemask],
-    [AC_DEFINE([QTHREAD_LIBNUMA_V2],[1],[if libnuma provides numa_allocate_nodemask])])
-  AC_CHECK_FUNCS([numa_num_configured_cpus numa_num_thread_cpus numa_bitmask_nbytes numa_distance])
-  AS_IF([test "x$ac_cv_func_numa_distance" = "xyes"],
-        [AC_TRY_RUN([
-#include <numa.h>
-int main() { return (numa_distance(0,0) >= 0); }
-         ],
-		 [numa_distance_happy=yes],
-		 [numa_distance_happy=no],
-		 [numa_distance_happy=yes])])
-  AS_IF([test "x$numa_distance_happy" = "xyes"],
-      [AC_DEFINE([QTHREAD_NUMA_DISTANCE_WORKING],[1],[if libnuma's numa_distance() function works])])
-  ])
-  
-AS_IF([test "x$libnuma_happy" = "xyes"],
-      [AC_DEFINE([QTHREAD_HAVE_LIBNUMA],[1],[if libnuma is available])
-       AS_IF([test "x$ac_cv_func_numa_allocate_nodemask" == "xyes"],
-             [$2],[$1])],
-      [$3])
-])
diff --git a/config/qthread_check_linux.m4 b/config/qthread_check_linux.m4
deleted file mode 100644
index 1379c774..00000000
--- a/config/qthread_check_linux.m4
+++ /dev/null
@@ -1,36 +0,0 @@
-# -*- Autoconf -*-
-#
-# Copyright (c)      2008  Sandia Corporation
-#
-
-# QTHREAD_CHECK_LINUX([action-if-found], [action-if-not-found])
-# ------------------------------------------------------------------------------
-AC_DEFUN([QTHREAD_CHECK_LINUX], [
-AC_CACHE_CHECK([for _SC_NPROCESSORS_CONF],
-               [qthread_cv_sc_nprocessors_conf],
-	       [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#include <unistd.h>
-
-int main() {
-  return sysconf(_SC_NPROCESSORS_CONF);
-}]])],
-  [qthread_cv_sc_nprocessors_conf=yes],
-  [qthread_cv_sc_nprocessors_conf=no])])
-AS_IF([test "x$qthread_cv_sc_nprocessors_conf" = xyes], 
-      [AC_DEFINE([HAVE_SC_NPROCESSORS_CONF], [1], [define if you have _SC_NPROCESSORS_CONF])],
-      [AC_CACHE_CHECK([for HW_NCPU],
-                      [qthread_cv_hw_ncpu],
-		      [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#include <unistd.h>
-
-int main() {
-  int name[2] = { CTL_HW, HW_NCPU };
-  unsigned int oldv;
-  unsigned int oldvlen = sizeof(oldv);
-  return sysctl(name, &oldv, &oldvlen, NULL, 0);
-}]])],
-  [qthread_cv_hw_ncpu=yes],
-  [qthread_cv_hw_ncpu=no])])
-  AS_IF([test "x$qthread_cv_hw_ncpu" = xyes],
-        [AC_DEFINE([HAVE_HW_NCPU], [1], [define if you have HW_NCPU and CTL_HW])])])
-])
diff --git a/config/qthread_check_machtopo.m4 b/config/qthread_check_machtopo.m4
deleted file mode 100644
index 5bbf6841..00000000
--- a/config/qthread_check_machtopo.m4
+++ /dev/null
@@ -1,29 +0,0 @@
-# -*- Autoconf -*-
-#
-# Copyright (c)      2008  Sandia Corporation
-#
-
-# QTHREAD_CHECK_MACHTOPO([action-if-found], [action-if-not-found])
-# ------------------------------------------------------------------------------
-AC_DEFUN([QTHREAD_CHECK_MACHTOPO], [
-  qt_allgoodsofar=yes
-  AC_CHECK_HEADERS([mach/mach_init.h mach/thread_policy.h],[],
-  			       [qt_allgoodsofar=no
-				    break])
-  AS_IF([test "x$qt_allgoodsofar" = xyes],
-        [AC_SEARCH_LIBS([thread_policy_set],[],[],
-		                [qt_allgoodsofar=no])])
-  AS_IF([test "x$qt_allgoodsofar" = xyes],
-  		[AC_SEARCH_LIBS([thread_policy_get],[],[],
-						[qt_allgoodsofar=no])])
-  AS_IF([test "x$qt_allgoodsofar" = xyes],
-  	    [AC_CHECK_DECL([THREAD_AFFINITY_POLICY_COUNT],[],
-						[qt_allgoodsofar=no],
-						[[#include <mach/thread_policy.h>
-#include <mach/mach_init.h>]])])
-  
-  AS_IF([test "x$qt_allgoodsofar" = xyes],
-	    [AC_DEFINE([QTHREAD_HAVE_MACHTOPO],[1],[if the machine has a MacOS-style Mach topology interface])
-		 $1],
-		[$2])
-])
diff --git a/config/qthread_check_plpa.m4 b/config/qthread_check_plpa.m4
deleted file mode 100644
index 61d209d1..00000000
--- a/config/qthread_check_plpa.m4
+++ /dev/null
@@ -1,37 +0,0 @@
-# -*- Autoconf -*-
-#
-# Copyright (c)      2008  Sandia Corporation
-#
-
-# QTHREAD_CHECK_PLPA([action-if-found], [action-if-not-found])
-# ------------------------------------------------------------------------------
-AC_DEFUN([QTHREAD_CHECK_PLPA], [
-AC_CHECK_LIB([plpa],[plpa_api_probe],
-  [plpa_found=yes
-   LIBS="$LIBS -lplpa"
-   AC_MSG_CHECKING(whether plpa works)
-   AC_RUN_IFELSE([AC_LANG_SOURCE([[
-#include <stdio.h>
-#include <plpa.h>
-#include <assert.h>
-
-int main(int argc, char* argv[]) {
-    plpa_api_type_t p;
-    if (0 == plpa_api_probe(&p) && PLPA_PROBE_OK == p) {
-        return 0;
-    } else {
-        assert(0);
-		return -1;
-    }
-}
-  ]])],
-  [plpa_happy=yes],
-  [plpa_happy=no],
-  [plpa_happy=no])
-  AC_MSG_RESULT($plpa_happy)
-  ])
-  AS_IF([test "x$plpa_happy" = "xyes"],
-		[AC_DEFINE([QTHREAD_USE_PLPA],[1],[define to 1 if PLPA is available and works])
-		 $1],
-		[$2])
-])
diff --git a/configure.ac b/configure.ac
index bfe0e6f7..014ba743 100644
--- a/configure.ac
+++ b/configure.ac
@@ -582,7 +582,6 @@ esac
 # Figure out whether the compiler has builtin atomic operations
 AS_IF([test "x$enable_hardware_atomics" != xno],
       [QTHREAD_CHECK_ATOMICS($sizeof_aligned_t)])
-QTHREAD_VAMACROS
 
 QTHREAD_BUILTIN_SYNCHRONIZE
 
@@ -866,30 +865,11 @@ AS_IF([test "x$qthread_topo" != xno],
       [AS_IF([test "x$qthread_topo" = "xnone_specified"],
              [qthread_topo=no])
        # First, check for hwloc, since it gives me the most portable/flexible/reliable/detailed information.
-       AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xbinders -o "x$qthread_topo" = xhwloc -o "x$qthread_topo" = xhwloc_v2],
-             [QTHREAD_CHECK_HWLOC([AS_IF([test "x$qthread_topo" != xhwloc -a "x$qthread_topo" != xhwloc_v2 -a "x$qthread_topo" != xbinders],
+       AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xbinders -o "x$qthread_topo" = xhwloc],
+             [QTHREAD_CHECK_HWLOC([AS_IF([test "x$qthread_topo" != xhwloc -a "x$qthread_topo" != xbinders],
                                          [qthread_topo=hwloc])],
                                   [AS_IF([test "x$qthread_topo" != xno],
                                          [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])])
-       AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xlibnuma -o "x$qthread_topo" = xlibnumaV2],
-             [QTHREAD_CHECK_LIBNUMA([qthread_topo=libnuma],
-                                    [qthread_topo=libnumaV2],
-                                    [AS_IF([test "x$qthread_topo" != xno],
-                                           [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])])
-       # Third, check any others.
-       AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xmach],
-             [QTHREAD_CHECK_MACHTOPO([qthread_topo=mach],
-                                     [AS_IF([test "x$qthread_topo" != xno],
-                                            [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])])
-       # PLPA is deprecated in favor of hwloc
-       AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xplpa],
-             [QTHREAD_CHECK_PLPA([qthread_topo=plpa],
-                                 [AS_IF([test "x$qthread_topo" != xno],
-                                        [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])])
-       AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xsys],
-             [QTHREAD_CHECK_LINUX([qthread_topo=sys],
-                           [AS_IF([test "x$qthread_topo" = xsys],
-                                  [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])])
        ])
 
 AS_IF([test "x$enable_lf_febs" == "xyes"],
diff --git a/include/qt_affinity.h b/include/qt_affinity.h
index 8e4ecf17..bc989e60 100644
--- a/include/qt_affinity.h
+++ b/include/qt_affinity.h
@@ -12,10 +12,6 @@
 typedef struct qthread_shepherd_s qthread_shepherd_t;
 #endif
 
-#if defined(QTHREAD_HAVE_LIBNUMA)
-#define QTHREAD_HAVE_MEM_AFFINITY
-#endif
-
 #if defined(QTHREAD_HAVE_HWLOC) && (HWLOC_API_VERSION > 0x00010000)
 #define QTHREAD_HAVE_MEM_AFFINITY
 #endif
diff --git a/src/affinity/hwloc_v2.c b/src/affinity/hwloc_v2.c
deleted file mode 100644
index a15a1b7c..00000000
--- a/src/affinity/hwloc_v2.c
+++ /dev/null
@@ -1,763 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <hwloc.h>
-
-#include "qt_affinity.h"
-#include "qt_asserts.h" /* for qassert() */
-#include "qt_debug.h"
-#include "qt_envariables.h"
-#include "qt_output_macros.h"
-#include "qt_subsystems.h"
-#include "shufflesheps.h"
-
-static hwloc_topology_t sys_topo;
-static uint32_t initialized = 0;
-
-static hwloc_cpuset_t mccoy_thread_bindings;
-
-static unsigned int qt_topology_output_level = 0;
-
-typedef struct {
-  int uid;
-  qthread_shepherd_id_t shep_id;
-  qthread_worker_id_t worker_id;
-  hwloc_obj_t bind_obj;
-} topo_worker_t;
-
-typedef struct {
-  /* Shepherd mapping info */
-  int num_sheps;
-  int shep_level;
-  hwloc_obj_t shep_obj;
-
-  /* Worker mapping info */
-  int num_workers;
-  int worker_level;
-  hwloc_obj_t worker_obj;
-
-  /* linear mapping */
-  topo_worker_t *worker_map;
-
-  int num_wps;
-} qt_topology_t;
-
-static qt_topology_t qt_topo;
-
-static void initialize_qt_topo(void) {
-  qt_topo.num_sheps = -1;
-  qt_topo.shep_level = -1;
-  qt_topo.shep_obj = NULL;
-  qt_topo.num_workers = -1;
-  qt_topo.worker_level = -1;
-  qt_topo.worker_obj = NULL;
-  qt_topo.num_wps = -1;
-  qt_topo.worker_map = NULL;
-}
-
-#define HIERARCHY_NAME_LEN 128
-static hwloc_obj_type_t *topo_types;
-static char (*topo_type_names)[HIERARCHY_NAME_LEN];
-static int num_types;
-
-static void print_system_view(hwloc_topology_t sys_topo) {
-  char str[128];
-  hwloc_obj_t obj = NULL;
-  hwloc_const_cpuset_t allowed_cpuset =
-    hwloc_topology_get_allowed_cpuset(sys_topo);
-
-  obj = hwloc_get_obj_inside_cpuset_by_depth(sys_topo, allowed_cpuset, 0, 0);
-  while (NULL != obj) {
-    int const depth = obj->depth;
-    int const arity = obj->arity;
-    int const num_objs =
-      hwloc_get_nbobjs_inside_cpuset_by_depth(sys_topo, allowed_cpuset, depth);
-
-    hwloc_obj_snprintf(str, sizeof(str), sys_topo, obj, "#", 0);
-
-    printf("TOPO: depth: %d\n", depth);
-    printf("TOPO:\tarity: %d\n", arity);
-    printf("TOPO:\tnum_objs: %d\n", num_objs);
-    printf("TOPO:\ttype: %s\n", str);
-
-    obj = obj->first_child;
-  }
-
-  int const num_cores = hwloc_get_nbobjs_inside_cpuset_by_type(
-    sys_topo, allowed_cpuset, HWLOC_OBJ_CACHE_UNIFIED);
-
-  printf("TOPO: number of available COREs: %d\n", num_cores);
-}
-
-static void print_logical_view(void) {
-  char shep_level[128];
-  char worker_level[128];
-  char str[128];
-
-  printf("QT_TOPO: shep_level: %d\n", qt_topo.shep_level);
-  if (NULL != qt_topo.shep_obj) {
-    hwloc_obj_type_snprintf(
-      shep_level, sizeof(shep_level), qt_topo.shep_obj, 0);
-    printf("QT_TOPO: shep type: %s\n", shep_level);
-  }
-
-  printf("QT_TOPO: worker_level: %d\n", qt_topo.worker_level);
-  if (NULL != qt_topo.worker_obj) {
-    hwloc_obj_type_snprintf(
-      worker_level, sizeof(worker_level), qt_topo.worker_obj, 0);
-    printf("QT_TOPO: worker type: %s\n", worker_level);
-  }
-
-  printf("QT_TOPO: #(sheps): %d\n", qt_topo.num_sheps);
-  printf("QT_TOPO: #(workers): %d\n", qt_topo.num_workers);
-  printf("QT_TOPO: #(wps): %d\n", qt_topo.num_wps);
-
-  if (!strcmp(worker_level, "Core")) {
-    hwloc_obj_snprintf(str,
-                       sizeof(str),
-                       sys_topo,
-                       qt_topo.worker_map[0].bind_obj->first_child,
-                       "#",
-                       0);
-    printf("worker_map: {%d(%d,%d, Core containing %s)",
-           qt_topo.worker_map[0].uid,
-           qt_topo.worker_map[0].shep_id,
-           qt_topo.worker_map[0].worker_id,
-           str);
-  } else {
-    hwloc_obj_snprintf(
-      str, sizeof(str), sys_topo, qt_topo.worker_map[0].bind_obj, "#", 0);
-    printf("worker_map: {%d(%d,%d,%s)",
-           qt_topo.worker_map[0].uid,
-           qt_topo.worker_map[0].shep_id,
-           qt_topo.worker_map[0].worker_id,
-           str);
-  }
-
-  for (int i = 1; i < qt_topo.num_workers; i++) {
-    if (!strcmp(worker_level, "Core")) {
-      hwloc_obj_snprintf(str,
-                         sizeof(str),
-                         sys_topo,
-                         qt_topo.worker_map[i].bind_obj->first_child,
-                         "#",
-                         0);
-      printf(", %d(%d,%d, Core containing %s)",
-             qt_topo.worker_map[i].uid,
-             qt_topo.worker_map[i].shep_id,
-             qt_topo.worker_map[i].worker_id,
-             str);
-    } else {
-      hwloc_obj_snprintf(
-        str, sizeof(str), sys_topo, qt_topo.worker_map[i].bind_obj, "#", 0);
-      printf(", %d(%d,%d,%s)",
-             qt_topo.worker_map[i].uid,
-             qt_topo.worker_map[i].shep_id,
-             qt_topo.worker_map[i].worker_id,
-             str);
-    }
-  }
-  printf("}\n");
-}
-
-static void fini_type_options(void) {
-  qthread_debug(AFFINITY_CALLS, "destroying type options\n");
-
-  qt_free(topo_types);
-  qt_free(topo_type_names);
-}
-
-static void init_type_options(void) {
-  qthread_debug(AFFINITY_CALLS, "creating type options\n");
-
-  hwloc_const_cpuset_t allowed_cpuset =
-    hwloc_topology_get_allowed_cpuset(sys_topo);
-  hwloc_obj_t obj =
-    hwloc_get_obj_inside_cpuset_by_depth(sys_topo, allowed_cpuset, 0, 0);
-
-  /* Walk down tree */
-  while (NULL != obj) {
-    num_types += 1;
-
-    obj = obj->first_child;
-  }
-
-  topo_types = qt_malloc(num_types * sizeof(hwloc_obj_type_t));
-  topo_type_names = qt_malloc(num_types * HIERARCHY_NAME_LEN * sizeof(char));
-  assert(NULL != topo_types);
-  assert(NULL != topo_type_names);
-
-  /* Walk up tree */
-  obj = hwloc_get_obj_inside_cpuset_by_depth(
-    sys_topo, allowed_cpuset, num_types - 1, 0);
-  int cache_level = 1;
-  int type_id = num_types - 1;
-  while (NULL != obj) {
-    topo_types[type_id] = obj->type;
-
-    if (0 == hwloc_compare_types(HWLOC_OBJ_CACHE_UNIFIED, obj->type)) {
-      snprintf(topo_type_names[type_id], 8, "L%dcache", cache_level);
-      cache_level += 1;
-    } else {
-      strncpy(topo_type_names[type_id],
-              hwloc_obj_type_string(obj->type),
-              HIERARCHY_NAME_LEN);
-    }
-
-    type_id -= 1;
-    obj = obj->parent;
-  }
-}
-
-static void print_type_options(void) {
-  printf("type_options: {%s", topo_type_names[0]);
-  for (int i = 1; i < num_types; i++) { printf(", %s", topo_type_names[i]); }
-  printf("}\n");
-}
-
-static hwloc_obj_type_t wkr_type = HWLOC_OBJ_CORE;
-static int wkr_index = -1;
-static int wkr_depth = -1;
-
-#if HWLOC_API_VERSION < 0x00010100
-#define hwloc_bitmap_weight hwloc_cpuset_weight
-#define hwloc_bitmap_asprintf hwloc_cpuset_asprintf
-#define hwloc_bitmap_foreach_begin hwloc_cpuset_foreach_begin
-#define hwloc_bitmap_foreach_end hwloc_cpuset_foreach_end
-#define hwloc_bitmap_alloc hwloc_cpuset_alloc
-#define hwloc_bitmap_free hwloc_cpuset_free
-#endif /* if HWLOC_API_VERSION < 0x00010100 */
-
-static void qt_affinity_internal_hwloc_teardown(void) { /*{{{*/
-  DEBUG_ONLY(hwloc_topology_check(sys_topo));
-
-  fini_type_options();
-
-  /* Reestablish mccoy thread bindings */
-  hwloc_set_cpubind(sys_topo, mccoy_thread_bindings, HWLOC_CPUBIND_THREAD);
-  hwloc_bitmap_free(mccoy_thread_bindings);
-
-  qt_free(qt_topo.worker_map);
-  hwloc_topology_destroy(sys_topo);
-  initialized = 0;
-} /*}}}*/
-
-/*
- * User hints:
- * - QT_TOPO_OUTPUT_LEVEL: The amount of topology information to print. Level
- *                         '2' will print a synopsis of the topology reported
- *                         by hwloc. Currently only level '2' is used.
- * - QT_SHEPHERD_BOUNDARY: The level in the hierarchy to associate with
- *                         shepherds.
- * - QT_WORKER_UNIT:       The level in the hierarchy to use for binding
- *                         workers.
- *                         The worker will be bound to the set of COREs under
- * this level.
- */
-void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
-                               qthread_worker_id_t *nbworkers,
-                               size_t *hw_par) { /*{{{ */
-  qthread_debug(AFFINITY_CALLS,
-                "nbshepherds=%u, nbworkers=%u, hw_par=%u\n",
-                *nbshepherds,
-                *nbworkers,
-                *hw_par);
-
-  /* Collect user hints */
-  qthread_shepherd_id_t num_sheps_hint = *nbshepherds;
-  qthread_worker_id_t num_wps_hint = *nbworkers;
-  size_t num_workers_hint = *hw_par;
-
-  /* Set up logical sys_topo */
-  initialize_qt_topo();
-
-  /* Initialize hwloc setup */
-  if (qthread_cas(&initialized, 0, 1) == 0) {
-    qassert(hwloc_topology_init(&sys_topo), 0);
-    qassert(hwloc_topology_load(sys_topo), 0);
-    MACHINE_FENCE;
-    initialized = 2;
-  } else {
-    while (initialized == 1) SPINLOCK_BODY();
-  }
-  DEBUG_ONLY(hwloc_topology_check(sys_topo));
-  qthread_internal_cleanup(qt_affinity_internal_hwloc_teardown);
-
-  /* Record mccoy thread bindings so we can reestablish during finalization */
-  mccoy_thread_bindings = hwloc_bitmap_alloc();
-  hwloc_get_cpubind(sys_topo, mccoy_thread_bindings, HWLOC_CPUBIND_THREAD);
-
-  hwloc_const_cpuset_t allowed_cpuset =
-    hwloc_topology_get_allowed_cpuset(sys_topo);
-
-  /* Collect CORE info */
-  hwloc_obj_t core_obj = hwloc_get_obj_inside_cpuset_by_type(
-    sys_topo, allowed_cpuset, HWLOC_OBJ_CORE, 0);
-  if (NULL == core_obj) {
-    print_error("failed to locate CORE#0\n");
-    exit(EXIT_FAILURE);
-  }
-  int const num_cores = hwloc_get_nbobjs_inside_cpuset_by_depth(
-    sys_topo, allowed_cpuset, core_obj->depth);
-  if (0 >= core_obj) {
-    print_error("failed to count COREs\n");
-    exit(EXIT_FAILURE);
-  }
-
-  /* Process boundary and unit environment variables */
-  int shep_type_id = -1;
-  int worker_type_id = -1;
-
-  /* Build up sys_topo options */
-  init_type_options();
-
-  /* Collect environment variables */
-  {
-    qt_topology_output_level =
-      qt_internal_get_env_num("TOPO_OUTPUT_LEVEL", 0, 0);
-  }
-  {
-    char const *qsh = qt_internal_get_env_str("SHEPHERD_BOUNDARY", "node");
-    if (qsh) {
-      for (int ti = 0; ti < num_types; ++ti) {
-        if (!strncasecmp(
-              topo_type_names[ti], qsh, strlen(topo_type_names[ti]))) {
-          shep_type_id = ti;
-        }
-      }
-      if (shep_type_id == -1) {
-        fprintf(stderr, "unparsable shepherd boundary (%s)\n", qsh);
-        exit(EXIT_FAILURE);
-      }
-    }
-
-    qsh = qt_internal_get_env_str("WORKER_UNIT", "pu");
-    if (qsh) {
-      for (int ti = 0; ti < num_types; ++ti) {
-        if (!strncasecmp(
-              topo_type_names[ti], qsh, strlen(topo_type_names[ti]))) {
-          worker_type_id = ti;
-        }
-      }
-      if (worker_type_id == -1) {
-        fprintf(stderr, "unparsable worker unit (%s)\n", qsh);
-        exit(EXIT_FAILURE);
-      }
-    }
-  }
-
-  /* Print system view */
-  if (2 == qt_topology_output_level) {
-    print_system_view(sys_topo);
-    print_type_options();
-  }
-
-  /**************************************************************************/
-
-  qthread_debug(AFFINITY_DETAILS, "HINTS: max-sheps: %d\n", num_sheps_hint);
-  qthread_debug(AFFINITY_DETAILS, "HINTS: max-wps: %d\n", num_wps_hint);
-  qthread_debug(AFFINITY_DETAILS, "HINTS: max-workers: %d\n", num_workers_hint);
-  qthread_debug(AFFINITY_DETAILS, "HINTS: shep-boundary: %d\n", shep_type_id);
-  qthread_debug(AFFINITY_DETAILS, "HINTS: worker-unit: %d\n", worker_type_id);
-
-  /* Process boundary and units requests */
-  if (-1 != worker_type_id) {
-    /* User specified worker unit */
-
-    hwloc_obj_t worker_obj = hwloc_get_obj_inside_cpuset_by_type(
-      sys_topo, allowed_cpuset, topo_types[worker_type_id], 0);
-    if (NULL == worker_obj) {
-      print_error("failed to locate worker unit object\n");
-      exit(EXIT_FAILURE);
-    }
-
-    /* Calculate number of these objects */
-    int num_objs = hwloc_get_nbobjs_inside_cpuset_by_type(
-      sys_topo, allowed_cpuset, topo_types[worker_type_id]);
-
-    qthread_debug(AFFINITY_DETAILS,
-                  "found %d %s unit obj(s)\n",
-                  num_objs,
-                  topo_type_names[worker_type_id]);
-
-    /* Update logical sys_topo info */
-    qt_topo.worker_obj = worker_obj;
-    qt_topo.worker_level = worker_obj->depth;
-
-    /* Update hints */
-  }
-  if (-1 != shep_type_id) {
-    /* User specified shepherd boundary */
-
-    hwloc_obj_t shep_obj = hwloc_get_obj_inside_cpuset_by_type(
-      sys_topo, allowed_cpuset, topo_types[shep_type_id], 0);
-    if (NULL == shep_obj) {
-      print_error("failed to locate shepherd boundary object\n");
-      exit(EXIT_FAILURE);
-    }
-
-    /* Calculate number of these objects */
-    int const num_shep_objs = hwloc_get_nbobjs_inside_cpuset_by_type(
-      sys_topo, allowed_cpuset, topo_types[shep_type_id]);
-
-    qthread_debug(AFFINITY_DETAILS,
-                  "found %d %s shep obj(s)\n",
-                  num_shep_objs,
-                  topo_type_names[shep_type_id]);
-
-    /* Calculate number of CORE within boundary: this is max num-workers */
-    int const num_shep_cores = num_cores / num_shep_objs;
-
-    /* Update logical sys_topo info */
-    qt_topo.shep_obj = shep_obj;
-    qt_topo.shep_level = shep_obj->depth;
-
-    /* Update hints */
-    if (0 == num_sheps_hint || num_shep_objs < num_sheps_hint) {
-      qthread_debug(AFFINITY_DETAILS,
-                    "%s shep obj => max-sheps=%d\n",
-                    topo_type_names[shep_type_id],
-                    num_shep_objs);
-      num_sheps_hint = num_shep_objs;
-    }
-    if (0 == num_wps_hint || num_shep_cores < num_wps_hint) {
-      qthread_debug(AFFINITY_DETAILS,
-                    "%s shep obj => max-wps=%d\n",
-                    topo_type_names[shep_type_id],
-                    num_shep_cores);
-      num_wps_hint = num_shep_cores;
-    }
-  }
-
-  /* Establish boundary and unit */
-  if (NULL == qt_topo.worker_obj) {
-    qthread_debug(AFFINITY_DETAILS,
-                  "No worker unit hint, choosing Core unit obj\n");
-
-    /* Assume unit is CORE; handle boundary selection below. */
-    qt_topo.worker_obj = core_obj;
-    qt_topo.worker_level = core_obj->depth;
-  }
-
-  if (NULL == qt_topo.shep_obj) {
-    /* Have only unit, must find boundary */
-
-    /* Policy:
-     * - If have num-sheps and num-wps hints, then select first shared level
-     *   that accomodates both values.
-     * - If have only num-wps hint, then select first shared level that
-     *   accomodates that values.
-     * - If have only num-sheps hint, then select first shared level that
-     *   accomodates that values.
-     * - Otherwise select first shared level at or above the unit level. */
-
-    /* Policy: select the first shared level at or above the unit with
-     *         at least num-wps shared COREs (if requested) and at most
-     *         num-sheps boundary objects (if requested); num-wps should
-     *         take precedence over num-sheps */
-    hwloc_obj_t shep_obj = core_obj;
-
-    /* Walk up tree counting shared COREs */
-    int shared = 1;
-    while (NULL != shep_obj->parent) {
-      shared *= (shep_obj->arity == 0) ? 1 : shep_obj->arity;
-
-      if (0 != num_sheps_hint && 0 != num_wps_hint) {
-        /* Have both num-sheps and num-wps hints */
-        int num_shep_objs = hwloc_get_nbobjs_inside_cpuset_by_depth(
-          sys_topo, allowed_cpuset, shep_obj->depth);
-        if (num_shep_objs == num_sheps_hint && shared >= num_wps_hint) {
-          /* Choose this level */
-          break;
-        }
-      } else if (0 != num_sheps_hint) {
-        /* Have only num-sheps hint */
-        int num_shep_objs = hwloc_get_nbobjs_inside_cpuset_by_depth(
-          sys_topo, allowed_cpuset, shep_obj->depth);
-        if (num_shep_objs == num_sheps_hint) {
-          /* Choose this level */
-          break;
-        }
-      } else if (0 != num_wps_hint) {
-        /* Have only num-wps hint */
-        if (shared >= num_wps_hint) {
-          /* Choose this level */
-          break;
-        }
-      } else {
-        /* No hints */
-        if (shep_obj->depth <= qt_topo.worker_obj->depth && 1 < shared) {
-          /* Choose this level */
-          break;
-        }
-      }
-
-      shep_obj = shep_obj->parent;
-    }
-
-    /* Update logical sys_topo info */
-    qt_topo.shep_obj = shep_obj;
-    qt_topo.shep_level = shep_obj->depth;
-
-    qthread_debug(AFFINITY_DETAILS,
-                  "chose %s shep obj\n",
-                  topo_type_names[qt_topo.shep_level]);
-
-    /* Update hints */
-  }
-
-  /* Sanity-check that boundary and units are reasonable */
-  assert(NULL != qt_topo.shep_obj && NULL != qt_topo.worker_obj);
-  if (qt_topo.shep_level > qt_topo.worker_level) {
-    print_warning("shepherd boundary smaller than worker unit\n");
-  }
-
-  /**************************************************************************/
-
-  /* Establish num-sheps and num-wps */
-  if (0 == num_sheps_hint && 0 == num_wps_hint) {
-    /* No sys_topo hints */
-    qt_topo.num_sheps = hwloc_get_nbobjs_inside_cpuset_by_depth(
-      sys_topo, allowed_cpuset, qt_topo.shep_obj->depth);
-    qt_topo.num_wps = num_cores / qt_topo.num_sheps;
-  } else if (0 == num_wps_hint) {
-    /* Only sheps hint given */
-    int num_sheps_objs = hwloc_get_nbobjs_inside_cpuset_by_depth(
-      sys_topo, allowed_cpuset, qt_topo.shep_obj->depth);
-
-    if (num_sheps_hint <= num_sheps_objs) {
-      qt_topo.num_sheps = num_sheps_hint;
-    } else {
-      print_warning("disregarding num-sheps hint (%d) in favor of maximum "
-                    "number of objects at shepherd boundary level (%d)\n",
-                    num_sheps_hint,
-                    num_sheps_objs);
-      qt_topo.num_sheps = num_sheps_objs;
-    }
-    qt_topo.num_wps = num_cores / qt_topo.num_sheps;
-  } else if (0 == num_sheps_hint) {
-    /* Only WPS hint given */
-    qt_topo.num_sheps = hwloc_get_nbobjs_inside_cpuset_by_depth(
-      sys_topo, allowed_cpuset, qt_topo.shep_obj->depth);
-    qt_topo.num_wps = num_wps_hint;
-  } else {
-    /* Both hints */
-    qt_topo.num_sheps = num_sheps_hint;
-    qt_topo.num_wps = num_wps_hint;
-  }
-
-  /* Establish num-workers */
-  if (0 != num_workers_hint) {
-    /* Adjust num-wps count to account for requested num-workers */
-    if ((num_workers_hint / num_sheps_hint) > num_wps_hint) {
-      print_error("invalid HWPAR, too many workers\n");
-      exit(EXIT_FAILURE);
-    }
-    if (0 != num_workers_hint % num_sheps_hint) {
-      print_error("invalid HWPAR, odd workers\n");
-      exit(EXIT_FAILURE);
-    }
-
-    qt_topo.num_workers = num_workers_hint;
-    qt_topo.num_wps = num_workers_hint / qt_topo.num_sheps;
-  } else {
-    qt_topo.num_workers = qt_topo.num_sheps * qt_topo.num_wps;
-  }
-
-  /* Construct worker map */
-  qt_topo.worker_map = qt_malloc(qt_topo.num_workers * sizeof(topo_worker_t));
-  assert(qt_topo.worker_map);
-
-  for (int i = 0; i < qt_topo.num_sheps; i++) {
-    for (int j = 0; j < qt_topo.num_wps; j++) {
-      int uid = (i * qt_topo.num_wps) + j;
-      qt_topo.worker_map[uid].uid = uid;
-      qt_topo.worker_map[uid].shep_id = i;
-      qt_topo.worker_map[uid].worker_id = j;
-
-      /* Set binding location */
-      hwloc_obj_t shep_obj = hwloc_get_obj_inside_cpuset_by_depth(
-        sys_topo, allowed_cpuset, qt_topo.shep_level, i);
-      hwloc_obj_t logical_core_obj = hwloc_get_obj_inside_cpuset_by_type(
-        sys_topo, shep_obj->cpuset, HWLOC_OBJ_CORE, j);
-      qt_topo.worker_map[uid].bind_obj = hwloc_get_ancestor_obj_by_depth(
-        sys_topo, qt_topo.worker_obj->depth, logical_core_obj);
-    }
-  }
-
-  /* Print logical sys_topo */
-  if (2 == qt_topology_output_level) { print_logical_view(); }
-
-  /* Set "outputs" */
-  *nbshepherds = qt_topo.num_sheps;
-  *nbworkers = qt_topo.num_wps;
-
-  wkr_type = qt_topo.worker_obj->type;
-  wkr_depth = qt_topo.worker_level;
-
-  return;
-} /*}}} */
-
-void INTERNAL qt_affinity_set(qthread_worker_t *me,
-                              unsigned int nworkerspershep) { /*{{{ */
-  ASSERT_ONLY(hwloc_topology_check(sys_topo));
-
-  hwloc_obj_t target_obj = qt_topo.worker_map[me->unique_id - 1].bind_obj;
-  if (hwloc_set_cpubind(sys_topo, target_obj->cpuset, HWLOC_CPUBIND_THREAD)) {
-    char *str;
-    int i = errno;
-#ifdef __APPLE__
-    if (i == ENOSYS) { return; }
-#endif
-    hwloc_bitmap_asprintf(&str, target_obj->cpuset);
-    fprintf(stderr,
-            "Couldn't bind to cpuset %s because %s (%i)\n",
-            str,
-            strerror(i),
-            i);
-    FREE(str, strlen(str));
-  }
-} /*}}} */
-
-int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps,
-                                  qthread_shepherd_id_t nshepherds) { /*{{{ */
-  qthread_debug(AFFINITY_CALLS,
-                "generating distances for %i sheps (%p)\n",
-                (int)qt_topo.num_sheps,
-                sheps);
-
-  for (size_t i = 0; i < qt_topo.num_sheps; i++) {
-    sheps[i].node = i % qt_topo.num_sheps;
-    sheps[i].sorted_sheplist =
-      qt_calloc(qt_topo.num_sheps - 1, sizeof(qthread_shepherd_id_t));
-    sheps[i].shep_dists = qt_calloc(qt_topo.num_sheps, sizeof(unsigned int));
-  }
-
-#ifdef QTHREAD_HAVE_HWLOC_DISTS
-  hwloc_const_cpuset_t allowed_cpuset =
-    hwloc_topology_get_allowed_cpuset(sys_topo);
-  /* XXX: should this really find the obj closest to the shep level that
-   *      has a distance matrix? */
-  const struct hwloc_distances_s *matrix =
-    hwloc_get_whole_distance_matrix_by_type(sys_topo, HWLOC_OBJ_NODE);
-
-  if (matrix) {
-    qthread_debug(AFFINITY_DETAILS,
-                  "matrix is %p, type at this depth: %s, relative_depth: "
-                  "%u(%s), nbobj: %u\n",
-                  matrix,
-                  hwloc_obj_type_string(qt_topo.shep_obj->type),
-                  matrix->relative_depth,
-                  hwloc_obj_type_string(
-                    hwloc_get_depth_type(sys_topo, matrix->relative_depth)),
-                  matrix->nbobjs);
-    assert(matrix->latency);
-  } else {
-    qthread_debug(AFFINITY_DETAILS,
-                  "matrix is %p, type at this depth: %s\n",
-                  matrix,
-                  hwloc_obj_type_string(HWLOC_OBJ_NODE));
-  }
-  size_t node_to_NUMAnode[qt_topo.num_sheps];
-  for (size_t i = 0; i < qt_topo.num_sheps; ++i) {
-    hwloc_obj_t node_obj = hwloc_get_obj_inside_cpuset_by_depth(
-      sys_topo, allowed_cpuset, qt_topo.shep_level, i);
-    while (node_obj->type > HWLOC_OBJ_NODE) {
-      node_obj = node_obj->parent;
-      assert(node_obj);
-    }
-    node_to_NUMAnode[i] = node_obj->logical_index;
-    qthread_debug(
-      AFFINITY_DETAILS, "obj %i maps to node %i\n", i, node_to_NUMAnode[i]);
-  }
-#endif /* ifdef QTHREAD_HAVE_HWLOC_DISTS */
-
-  for (size_t i = 0; i < qt_topo.num_sheps; ++i) {
-    for (size_t j = 0, k = 0; j < qt_topo.num_sheps; ++j) {
-      if (j != i) {
-#ifdef QTHREAD_HAVE_HWLOC_DISTS
-        if (matrix) {
-          sheps[i].shep_dists[j] =
-            matrix->latency[node_to_NUMAnode[sheps[i].node] +
-                            matrix->nbobjs * node_to_NUMAnode[sheps[j].node]] *
-            10;
-          qthread_debug(AFFINITY_DETAILS,
-                        "distance from %i(%i) to %i(%i) is %i\n",
-                        (int)i,
-                        (int)sheps[i].node,
-                        (int)j,
-                        (int)sheps[j].node,
-                        (int)(sheps[i].shep_dists[j]));
-        } else {
-          // handle what is fundamentally a bug in old versions of hwloc
-          sheps[i].shep_dists[j] = 10;
-          qthread_debug(AFFINITY_DETAILS,
-                        "pretending distance from %i to %i is %i\n",
-                        (int)i,
-                        (int)j,
-                        (int)(sheps[i].shep_dists[j]));
-        }
-#else  /* ifdef QTHREAD_HAVE_HWLOC_DISTS */
-        sheps[i].shep_dists[j] = 10;
-        qthread_debug(AFFINITY_DETAILS,
-                      "pretending distance from %i to %i is %i\n",
-                      (int)i,
-                      (int)j,
-                      (int)(sheps[i].shep_dists[j]));
-#endif /* ifdef QTHREAD_HAVE_HWLOC_DISTS */
-        sheps[i].sorted_sheplist[k++] = j;
-      }
-    }
-    if (qt_topo.num_sheps > 1) {
-      sort_sheps(
-        sheps[i].shep_dists, sheps[i].sorted_sheplist, qt_topo.num_sheps);
-    }
-  }
-  /* there does not seem to be a way to extract distances... <sigh> */
-  return QTHREAD_SUCCESS;
-} /*}}} */
-
-#ifdef QTHREAD_HAVE_MEM_AFFINITY
-void INTERNAL qt_affinity_mem_tonode(void *addr,
-                                     size_t bytes,
-                                     int node) { /*{{{ */
-  hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
-
-  DEBUG_ONLY(hwloc_topology_check(sys_topo));
-  hwloc_bitmap_set(nodeset, node);
-  hwloc_set_area_membind(sys_topo,
-                         addr,
-                         bytes,
-                         nodeset,
-                         HWLOC_MEMBIND_BIND,
-                         HWLOC_MEMBIND_NOCPUBIND);
-  hwloc_bitmap_free(nodeset);
-} /*}}} */
-
-void INTERNAL *qt_affinity_alloc(size_t bytes) { /*{{{ */
-  DEBUG_ONLY(hwloc_topology_check(sys_topo));
-  return hwloc_alloc(sys_topo, bytes);
-} /*}}} */
-
-void INTERNAL *qt_affinity_alloc_onnode(size_t bytes, int node) { /*{{{ */
-  void *ret;
-  hwloc_nodeset_t nodeset;
-
-  DEBUG_ONLY(hwloc_topology_check(sys_topo));
-  nodeset = hwloc_bitmap_alloc();
-  hwloc_bitmap_set(nodeset, node);
-  ret = hwloc_alloc_membind_nodeset(
-    sys_topo, bytes, nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_NOCPUBIND);
-  hwloc_bitmap_free(nodeset);
-  return ret;
-} /*}}} */
-
-void INTERNAL qt_affinity_free(void *ptr, size_t bytes) { /*{{{ */
-  DEBUG_ONLY(hwloc_topology_check(sys_topo));
-  hwloc_free(sys_topo, ptr, bytes);
-} /*}}} */
-
-#endif /* ifdef QTHREAD_HAVE_MEM_AFFINITY */
-
-/* vim:set expandtab: */
diff --git a/src/affinity/lgrp.c b/src/affinity/lgrp.c
deleted file mode 100644
index f6a7f804..00000000
--- a/src/affinity/lgrp.c
+++ /dev/null
@@ -1,281 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#ifdef HAVE_PROCESSOR_BIND
-#include <sys/processor.h>
-#include <sys/procset.h>
-#include <sys/types.h>
-#ifdef HAVE_SYS_LGRP_USER_H
-#include <sys/lgrp_user.h>
-#endif
-#endif
-#include <stdio.h>  /* for perror() */
-#include <stdlib.h> /* for malloc() */
-
-#include "qt_asserts.h"
-#include "qt_subsystems.h"
-#include "qt_visibility.h"
-// #include "qthread_innards.h"
-#include "qt_affinity.h"
-#include "qt_debug.h"
-
-#include "shepcomp.h"
-#include "shufflesheps.h"
-
-static lgrp_cookie_t lgrp_cookie;
-static lgrp_id_t mccoy_thread_home;
-static lgrp_affinity_t mccoy_thread_home_affinity;
-
-qthread_shepherd_id_t guess_num_shepherds(void);
-qthread_worker_id_t
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds);
-
-static int lgrp_maxcpus(lgrp_id_t const lgrp, int cpu_max) { /*{{{ */
-  int nchildren,
-    ncpus = lgrp_cpus(lgrp_cookie, lgrp, NULL, 0, LGRP_CONTENT_DIRECT);
-
-  if (ncpus == -1) {
-    return cpu_max;
-  } else if ((ncpus > 0) && (ncpus > cpu_max)) {
-    cpu_max = ncpus;
-  }
-  nchildren = lgrp_children(lgrp_cookie, lgrp, NULL, 0);
-  if (nchildren == -1) {
-    return cpu_max;
-  } else if (nchildren > 0) {
-    int i;
-    int nchildren_save = nchildren;
-    lgrp_id_t *children = MALLOC(nchildren * sizeof(lgrp_id_t));
-
-    nchildren = lgrp_children(lgrp_cookie, lgrp, children, nchildren);
-    if (nchildren == -1) {
-      qthread_debug(ALWAYS_OUTPUT, "hardware giving inconsistent answers!\n");
-      abort();
-      return cpu_max;
-    }
-    for (i = 0; i < nchildren; i++) {
-      cpu_max = lgrp_maxcpus(children[i], cpu_max);
-    }
-    FREE(children, nchildren * sizeof(lgrp_id_t));
-  }
-  return cpu_max;
-} /*}}} */
-
-static int lgrp_walk(lgrp_id_t const lgrp,
-                     processorid_t **cpus,
-                     lgrp_id_t *lgrp_ids,
-                     int cpu_grps) { /*{{{ */
-  int nchildren,
-    ncpus = lgrp_cpus(lgrp_cookie, lgrp, NULL, 0, LGRP_CONTENT_DIRECT);
-
-  if (ncpus == -1) {
-    return cpu_grps;
-  } else if (ncpus > 0) {
-    processorid_t *cpuids = MALLOC((ncpus + 1) * sizeof(processorid_t));
-
-    ncpus = lgrp_cpus(lgrp_cookie, lgrp, cpuids, ncpus, LGRP_CONTENT_DIRECT);
-    if (ncpus == -1) {
-      qthread_debug(ALWAYS_OUTPUT, "hardware giving inconsistent answers!\n");
-      abort();
-      return cpu_grps;
-    }
-    cpuids[ncpus] = -1;
-    if (cpus) { cpus[cpu_grps] = cpuids; }
-    if (lgrp_ids) { lgrp_ids[cpu_grps] = lgrp; }
-    cpu_grps++;
-  }
-  nchildren = lgrp_children(lgrp_cookie, lgrp, NULL, 0);
-  if (nchildren == -1) {
-    return cpu_grps;
-  } else if (nchildren > 0) {
-    int i;
-    lgrp_id_t *children = MALLOC(nchildren * sizeof(lgrp_id_t));
-
-    nchildren = lgrp_children(lgrp_cookie, lgrp, children, nchildren);
-    if (nchildren == -1) {
-      qthread_debug(ALWAYS_OUTPUT, "hardware giving inconsistent answers!\n");
-      abort();
-      return cpu_grps;
-    }
-    for (i = 0; i < nchildren; i++) {
-      cpu_grps = lgrp_walk(children[i], cpus, lgrp_ids, cpu_grps);
-    }
-    FREE(children, nchildren * sizeof(lgrp_id_t));
-  }
-  return cpu_grps;
-} /*}}} */
-
-static void qt_affinity_internal_lgrp_teardown(void) {
-  lgrp_affinity_set(
-    P_LWPID, P_MYID, mccoy_thread_home, mccoy_thread_home_affinity);
-}
-
-void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
-                               qthread_worker_id_t *nbworkers,
-                               size_t *hw_par) { /*{{{ */
-  lgrp_cookie = lgrp_init(LGRP_VIEW_OS);
-  mccoy_thread_home = lgrp_home(P_LWPID, P_MYID);
-  mccoy_thread_home_affinity =
-    lgrp_affinity_get(P_LWPID, P_MYID, mccoy_thread_home);
-  qthread_internal_cleanup(qt_affinity_internal_lgrp_teardown);
-  if (*nbshepherds == 0) { *nbshepherds = guess_num_shepherds(); }
-  if (*nbworkers == 0) {
-    *nbworkers = guess_num_workers_per_shep(*nbshepherds);
-  }
-} /*}}} */
-
-qthread_shepherd_id_t INTERNAL guess_num_shepherds(void) { /*{{{ */
-  qthread_shepherd_id_t guess = 1;
-
-  guess = lgrp_walk(lgrp_root(lgrp_cookie), NULL, NULL, 0);
-  if (guess <= 0) { guess = 1; }
-  qthread_debug(AFFINITY_DETAILS, "guessing %i shepherds\n", (int)guess);
-  return guess;
-} /*}}} */
-
-void INTERNAL qt_affinity_set(qthread_worker_t *me,
-                              unsigned int Q_UNUSED(nw)) { /*{{{ */
-  /* if this seems wrong, first answer: why should workers have more than socket
-   * affinity? */
-  qthread_debug(AFFINITY_DETAILS,
-                "set shep %i worker %i to lgrp %i\n",
-                (int)me->shepherd->shepherd_id,
-                (int)me->worker_id,
-                (int)me->shepherd->lgrp);
-  if (lgrp_affinity_set(P_LWPID, P_MYID, me->shepherd->lgrp, LGRP_AFF_STRONG) !=
-      0) {
-    perror("lgrp_affinity_set");
-  }
-} /*}}} */
-
-qthread_worker_id_t INTERNAL
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */
-  unsigned int guess = 1;
-  int tot_nodes = lgrp_walk(lgrp_root(lgrp_cookie), NULL, NULL, 0);
-
-  guess = lgrp_maxcpus(lgrp_root(lgrp_cookie), 0);
-
-  qthread_debug(AFFINITY_DETAILS,
-                "guessing num workers for %i sheps (nodes:%i max:%i)\n",
-                (int)nshepherds,
-                tot_nodes,
-                (int)guess);
-
-  if (nshepherds > tot_nodes) { guess /= (nshepherds / tot_nodes); }
-  if (guess == 0) { guess = 1; }
-
-  qthread_debug(AFFINITY_DETAILS, "guessing %i workers per shep\n", (int)guess);
-  return guess;
-} /*}}} */
-
-int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps,
-                                  qthread_shepherd_id_t nshepherds) { /*{{{ */
-  unsigned int lgrp_offset;
-  int lgrp_count_grps;
-  processorid_t **cpus = NULL;
-  lgrp_id_t *lgrp_ids = NULL;
-
-  switch (lgrp_cookie) {
-    case EINVAL:
-    case ENOMEM:
-      qthread_debug(AFFINITY_DETAILS, "lgrp_cookie is invalid!\n");
-      return QTHREAD_THIRD_PARTY_ERROR;
-  }
-  {
-    size_t max_lgrps = lgrp_nlgrps(lgrp_cookie);
-
-    if (max_lgrps <= 0) {
-      qthread_debug(
-        AFFINITY_DETAILS, "max_lgrps is <= zero! (%i)\n", max_lgrps);
-      return QTHREAD_THIRD_PARTY_ERROR;
-    }
-    cpus = qt_calloc(max_lgrps, sizeof(processorid_t *));
-    assert(cpus);
-    lgrp_ids = qt_calloc(max_lgrps, sizeof(lgrp_id_t));
-    assert(lgrp_ids);
-  }
-  lgrp_count_grps = lgrp_walk(lgrp_root(lgrp_cookie), cpus, lgrp_ids, 0);
-  if (lgrp_count_grps <= 0) {
-    qthread_debug(
-      AFFINITY_DETAILS, "lgrp_count_grps is <= zero ! (%i)\n", lgrp_count_grps);
-    return QTHREAD_THIRD_PARTY_ERROR;
-  }
-  for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) {
-    /* first, pick a lgrp/node */
-    int cpu;
-    unsigned int first_loff;
-
-    first_loff = lgrp_offset = i % lgrp_count_grps;
-    sheps[i].node = -1;
-    sheps[i].lgrp = -1;
-    /* now pick an available CPU */
-    while (1) {
-      cpu = 0;
-      /* find an unused one */
-      while (cpus[lgrp_offset][cpu] != (processorid_t)(-1)) cpu++;
-      if (cpu == 0) {
-        /* if no unused ones... try the next lgrp */
-        lgrp_offset++;
-        lgrp_offset *= (lgrp_offset < lgrp_count_grps);
-        if (lgrp_offset == first_loff) { break; }
-      } else {
-        /* found one! */
-        cpu--;
-        sheps[i].node = cpus[lgrp_offset][cpu];
-        sheps[i].lgrp = lgrp_ids[lgrp_offset];
-        cpus[lgrp_offset][cpu] = -1;
-        break;
-      }
-    }
-  }
-  for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) {
-    unsigned int const node_i = sheps[i].lgrp;
-    size_t j;
-    sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int));
-    assert(sheps[i].shep_dists);
-    for (j = 0; j < nshepherds; j++) {
-      unsigned int const node_j = sheps[j].lgrp;
-
-      if ((node_i != QTHREAD_NO_NODE) && (node_j != QTHREAD_NO_NODE)) {
-        int ret =
-          lgrp_latency_cookie(lgrp_cookie, node_i, node_j, LGRP_LAT_CPU_TO_MEM);
-
-        if (ret < 0) {
-          assert(ret >= 0);
-          return QTHREAD_THIRD_PARTY_ERROR;
-        } else {
-          sheps[i].shep_dists[j] = (unsigned int)ret;
-        }
-      } else {
-        /* XXX too arbitrary */
-        if (i == j) {
-          sheps[i].shep_dists[j] = 12;
-        } else {
-          sheps[i].shep_dists[j] = 18;
-        }
-      }
-    }
-  }
-  for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) {
-    size_t j, k = 0;
-
-    sheps[i].sorted_sheplist =
-      qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t));
-    assert(sheps[i].sorted_sheplist);
-    for (j = 0; j < nshepherds; j++) {
-      if (j != i) { sheps[i].sorted_sheplist[k++] = j; }
-    }
-    if (nshepherds > 1) {
-      sort_sheps(sheps[i].shep_dists, sheps[i].sorted_sheplist, nshepherds);
-    }
-  }
-  if (cpus) {
-    for (int i = 0; i < lgrp_count_grps; i++) { qt_free(cpus[i]); }
-    qt_free(cpus);
-  }
-  if (lgrp_ids) { qt_free(lgrp_ids); }
-  return QTHREAD_SUCCESS;
-} /*}}} */
-
-/* vim:set expandtab: */
diff --git a/src/affinity/libnuma.c b/src/affinity/libnuma.c
deleted file mode 100644
index acfe46c5..00000000
--- a/src/affinity/libnuma.c
+++ /dev/null
@@ -1,198 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <numa.h>
-
-#include "qt_affinity.h"
-#include "qt_asserts.h"
-#include "qt_debug.h"
-#include "qt_subsystems.h"
-
-#include "shepcomp.h"
-#include "shufflesheps.h"
-
-static nodemask_t *mccoy_bitmask = NULL;
-
-static qthread_shepherd_id_t guess_num_shepherds(void);
-qthread_worker_id_t
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds);
-
-static void qt_affinity_internal_numa_teardown(void) {
-  numa_run_on_node_mask(mccoy_bitmask);
-  FREE(mccoy_bitmask, sizeof(nodemask_t));
-}
-
-void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
-                               qthread_worker_id_t *nbworkers,
-                               size_t *hw_par) { /*{{{ */
-  mccoy_bitmask = MALLOC(sizeof(nodemask_t));
-  *mccoy_bitmask = numa_get_run_node_mask();
-  qthread_internal_cleanup(qt_affinity_internal_numa_teardown);
-  if (*nbshepherds == 0) { *nbshepherds = guess_num_shepherds(); }
-  if (*nbworkers == 0) {
-    *nbworkers = guess_num_workers_per_shep(*nbshepherds);
-  }
-} /*}}} */
-
-void INTERNAL qt_affinity_mem_tonode(void *addr,
-                                     size_t bytes,
-                                     int node) { /*{{{ */
-  numa_tonode_memory(addr, bytes, node);
-} /*}}} */
-
-void INTERNAL *qt_affinity_alloc(size_t bytes) { /*{{{ */
-  return numa_alloc(bytes);
-} /*}}} */
-
-void INTERNAL *qt_affinity_alloc_onnode(size_t bytes, int node) { /*{{{ */
-  return numa_alloc_onnode(bytes, node);
-} /*}}} */
-
-void INTERNAL qt_affinity_free(void *ptr, size_t bytes) { /*{{{ */
-  numa_free(ptr, bytes);
-} /*}}} */
-
-#define BMASK_WORDS 16
-
-static qthread_shepherd_id_t guess_num_shepherds(void) { /*{{{ */
-  qthread_shepherd_id_t nshepherds = 1;
-
-  if (numa_available() != 1) {
-    /* this is (probably) correct if/when we have multithreaded shepherds,
-     * ... BUT ONLY IF ALL NODES HAVE CPUS!!!!!! */
-    nshepherds = numa_max_node() + 1;
-    qthread_debug(
-      AFFINITY_DETAILS, "numa_max_node() returned %i\n", nshepherds);
-  }
-  if (nshepherds <= 0) { nshepherds = 1; }
-  return nshepherds;
-} /*}}} */
-
-void INTERNAL qt_affinity_set(qthread_worker_t *me,
-                              unsigned int Q_UNUSED(nw)) { /*{{{ */
-  assert(me);
-
-  qthread_shepherd_t *const myshep = me->shepherd;
-
-  /* It would be nice if we could do something more specific than
-   * "numa_run_on_node", but because sched_etaffinity() is so dangerous, we
-   * really can't, in good conscience. */
-  qthread_debug(AFFINITY_FUNCTIONS,
-                "calling numa_run_on_node(%i) for worker %i\n",
-                myshep->node,
-                me->packed_worker_id);
-  int ret = numa_run_on_node(myshep->node);
-  if (ret != 0) {
-    numa_error("setting thread affinity");
-    abort();
-  }
-  numa_set_localalloc();
-} /*}}} */
-
-qthread_worker_id_t INTERNAL
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */
-  size_t cpu_count = 1;
-  unsigned int guess = 1;
-
-  qthread_debug(
-    AFFINITY_CALLS, "guessing workers for %i shepherds\n", (int)nshepherds);
-#ifdef HAVE_NUMA_NUM_THREAD_CPUS
-  /* note: not numa_num_configured_cpus(), just in case an
-   * artificial limit has been imposed. */
-  cpu_count = numa_num_thread_cpus();
-  qthread_debug(
-    AFFINITY_DETAILS, "numa_num_thread_cpus returned %i\n", nshepherds);
-#elif defined(HAVE_NUMA_BITMASK_NBYTES)
-  cpu_count = 0;
-  for (size_t b = 0; b < numa_bitmask_nbytes(numa_all_cpus_ptr) * 8; b++) {
-    cpu_count += numa_bitmask_isbitset(numa_all_cpus_ptr, b);
-  }
-  qthread_debug(AFFINITY_DETAILS,
-                "after checking through the all_cpus_ptr, I counted %i cpus\n",
-                (int)cpu_count);
-#else  /* ifdef HAVE_NUMA_NUM_THREAD_CPUS */
-  cpu_count = numa_max_node() + 1;
-  qthread_debug(AFFINITY_DETAILS, "numa_max_node() returned %i\n", nshepherds);
-#endif /* ifdef HAVE_NUMA_NUM_THREAD_CPUS */
-  guess = cpu_count / nshepherds;
-  if (guess == 0) { guess = 1; }
-  qthread_debug(
-    AFFINITY_DETAILS, "guessing %i workers per shepherd\n", (int)guess);
-  return guess;
-} /*}}} */
-
-int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps,
-                                  qthread_shepherd_id_t nshepherds) { /*{{{ */
-  size_t const num_extant_nodes = numa_max_node() + 1;
-  nodemask_t bmask;
-
-  qthread_debug(AFFINITY_FUNCTIONS,
-                "sheps(%p), nshepherds(%u), num_extant_nodes:%u\n",
-                sheps,
-                nshepherds,
-                (unsigned)num_extant_nodes);
-  if (numa_available() == -1) { return QTHREAD_THIRD_PARTY_ERROR; }
-  nodemask_zero(&bmask);
-  /* assign nodes */
-  qthread_debug(AFFINITY_DETAILS, "assign nodes...\n");
-  for (size_t i = 0; i < nshepherds; ++i) {
-    sheps[i].node = i % num_extant_nodes;
-    qthread_debug(
-      AFFINITY_DETAILS, "set bit %u in bmask\n", i % num_extant_nodes);
-    nodemask_set(&bmask, i % num_extant_nodes);
-  }
-  qthread_debug(AFFINITY_DETAILS, "numa_set_interleave_mask\n");
-  numa_set_interleave_mask(&bmask);
-  qthread_debug(AFFINITY_DETAILS, "querying distances...\n");
-  /* truly ancient versions of libnuma (in the changelog, this is
-   * considered "pre-history") do not have numa_distance() */
-  for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) {
-    qthread_debug(AFFINITY_DETAILS, "i = %u < %u...\n", i, nshepherds);
-    unsigned int const node_i = sheps[i].node;
-    size_t j, k;
-    sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int));
-    sheps[i].sorted_sheplist =
-      qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t));
-    qthread_debug(AFFINITY_DETAILS,
-                  "allocs %p %p\n",
-                  sheps[i].shep_dists,
-                  sheps[i].sorted_sheplist);
-    assert(sheps[i].shep_dists);
-    assert(sheps[i].sorted_sheplist);
-    for (j = 0; j < nshepherds; j++) {
-      unsigned int const node_j = sheps[j].node;
-
-#if QTHREAD_NUMA_DISTANCE_WORKING
-      if ((node_i != QTHREAD_NO_NODE) && (node_j != QTHREAD_NO_NODE) &&
-          (node_i != node_j)) {
-        sheps[i].shep_dists[j] = numa_distance(node_i, node_j);
-      } else {
-#endif
-        /* XXX too arbitrary */
-        if (i == j) {
-          sheps[i].shep_dists[j] = 0;
-        } else {
-          sheps[i].shep_dists[j] = 20;
-        }
-#if QTHREAD_NUMA_DISTANCE_WORKING
-      }
-#endif
-      qthread_debug(AFFINITY_DETAILS,
-                    "shep %u to shep %u distance: %u\n",
-                    i,
-                    j,
-                    sheps[i].shep_dists[j]);
-    }
-    k = 0;
-    for (j = 0; j < nshepherds; j++) {
-      if (j != i) { sheps[i].sorted_sheplist[k++] = j; }
-    }
-    if (nshepherds > 1) {
-      sort_sheps(sheps[i].shep_dists, sheps[i].sorted_sheplist, nshepherds);
-    }
-  }
-  return QTHREAD_SUCCESS;
-} /*}}} */
-
-/* vim:set expandtab */
diff --git a/src/affinity/libnumaV2.c b/src/affinity/libnumaV2.c
deleted file mode 100644
index 94a611ff..00000000
--- a/src/affinity/libnumaV2.c
+++ /dev/null
@@ -1,233 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <numa.h>
-#include <stdio.h>
-
-#include "qt_affinity.h"
-#include "qt_asserts.h"
-#include "qt_debug.h"
-#include "qt_subsystems.h"
-
-#include "shepcomp.h"
-#include "shufflesheps.h"
-
-static struct bitmask *mccoy_bitmask = NULL;
-
-qthread_shepherd_id_t guess_num_shepherds(void);
-qthread_worker_id_t
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds);
-
-static void qt_affinity_internal_numaV2_teardown(void) {
-  numa_run_on_node_mask(mccoy_bitmask);
-}
-
-void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
-                               qthread_worker_id_t *nbworkers,
-                               size_t *hw_par) { /*{{{ */
-  qthread_debug(AFFINITY_FUNCTIONS, "start\n");
-  qassert(numa_available(), 0);
-  mccoy_bitmask = numa_get_run_node_mask();
-  qthread_internal_cleanup(qt_affinity_internal_numaV2_teardown);
-  if (*nbshepherds == 0) { *nbshepherds = guess_num_shepherds(); }
-  if (*nbworkers == 0) {
-    *nbworkers = guess_num_workers_per_shep(*nbshepherds);
-  }
-} /*}}} */
-
-void INTERNAL qt_affinity_mem_tonode(void *addr,
-                                     size_t bytes,
-                                     int node) { /*{{{ */
-  numa_tonode_memory(addr, bytes, node);
-} /*}}} */
-
-void INTERNAL *qt_affinity_alloc(size_t bytes) { /*{{{ */
-  return numa_alloc(bytes);
-} /*}}} */
-
-void INTERNAL *qt_affinity_alloc_onnode(size_t bytes, int node) { /*{{{ */
-  return numa_alloc_onnode(bytes, node);
-} /*}}} */
-
-void INTERNAL qt_affinity_free(void *ptr, size_t bytes) { /*{{{ */
-  numa_free(ptr, bytes);
-} /*}}} */
-
-qthread_shepherd_id_t INTERNAL guess_num_shepherds(void) { /*{{{ */
-  qthread_shepherd_id_t nshepherds = 1;
-
-  if (numa_available() != 1) {
-    qthread_debug(AFFINITY_FUNCTIONS, "numa_available != 1\n");
-    /* this is (probably) correct if/when we have multithreaded shepherds,
-     * ... BUT ONLY IF ALL NODES HAVE CPUS!!!!!! */
-    nshepherds = numa_max_node() + 1;
-    qthread_debug(
-      AFFINITY_DETAILS, "numa_max_node() returned %i\n", nshepherds);
-  }
-  if (nshepherds <= 0) { nshepherds = 1; }
-  qthread_debug(AFFINITY_FUNCTIONS, "guessing %i shepherds\n", (int)nshepherds);
-  return nshepherds;
-} /*}}} */
-
-void INTERNAL qt_affinity_set(qthread_worker_t *me,
-                              unsigned int Q_UNUSED(nw)) { /*{{{ */
-  assert(me);
-
-  qthread_shepherd_t *const myshep = me->shepherd;
-
-  /* It would be nice if we could do something more specific than
-   * "numa_run_on_node", but because sched_setaffinity() is so dangerous, we
-   * really can't, in good conscience. */
-  qthread_debug(AFFINITY_DETAILS,
-                "calling numa_run_on_node(%i) for worker %i\n",
-                myshep->node,
-                me->packed_worker_id);
-  int ret = numa_run_on_node(myshep->node);
-  if (ret != 0) {
-    qthread_debug(ALWAYS_OUTPUT,
-                  "numa_run_on_node() returned an error: %s\n",
-                  strerror(errno));
-    abort();
-  }
-  numa_set_localalloc();
-} /*}}} */
-
-qthread_worker_id_t INTERNAL
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */
-  size_t cpu_count = 1;
-  unsigned int guess = 1;
-
-  qthread_debug(
-    AFFINITY_DETAILS, "guessing workers for %i shepherds\n", (int)nshepherds);
-#ifdef HAVE_NUMA_NUM_THREAD_CPUS
-  /* note: not numa_num_configured_cpus(), just in case an
-   * artificial limit has been imposed. */
-  cpu_count = numa_num_thread_cpus();
-  qthread_debug(
-    AFFINITY_DETAILS, "numa_num_thread_cpus returned %i\n", (int)cpu_count);
-#elif defined(HAVE_NUMA_BITMASK_NBYTES)
-  cpu_count = 0;
-  for (size_t b = 0; b < numa_bitmask_nbytes(numa_all_cpus_ptr) * 8; b++) {
-    cpu_count += numa_bitmask_isbitset(numa_all_cpus_ptr, b);
-  }
-  qthread_debug(AFFINITY_DETAILS,
-                "after checking through the all_cpus_ptr, I counted %i cpus\n",
-                (int)cpu_count);
-#else  /* ifdef HAVE_NUMA_NUM_THREAD_CPUS */
-  cpu_count = numa_max_node() + 1;
-  qthread_debug(
-    AFFINITY_DETAILS, "numa_max_node() returned %i\n", (int)cpu_count);
-#endif /* ifdef HAVE_NUMA_NUM_THREAD_CPUS */
-  guess = cpu_count / nshepherds;
-  if (guess == 0) { guess = 1; }
-  qthread_debug(
-    AFFINITY_FUNCTIONS, "guessing %i workers per shepherd\n", (int)guess);
-  return guess;
-} /*}}} */
-
-static void assign_nodes(qthread_shepherd_t *sheps, size_t nsheps) { /*{{{ */
-  size_t const num_extant_nodes = numa_max_node() + 1;
-  struct bitmask *nmask = numa_get_run_node_mask();
-  struct bitmask *cmask = numa_allocate_cpumask();
-  size_t *cpus_left_per_node =
-    qt_calloc(num_extant_nodes,
-              sizeof(size_t)); // handle heterogeneous core counts
-  int over_subscribing = 0;
-
-  assert(cmask);
-  assert(nmask);
-  assert(cpus_left_per_node);
-  numa_bitmask_clearall(cmask);
-  /* get the # cpus for each node */
-  for (size_t i = 0; i < numa_bitmask_nbytes(nmask) * 8; ++i) {
-    if (numa_bitmask_isbitset(nmask, i)) {
-      numa_node_to_cpus(i, cmask);
-      for (size_t j = 0; j < numa_bitmask_nbytes(cmask) * 8; j++) {
-        cpus_left_per_node[i] += numa_bitmask_isbitset(cmask, j) ? 1 : 0;
-      }
-      qthread_debug(AFFINITY_DETAILS,
-                    "there are %i CPUs on node %i\n",
-                    (int)cpus_left_per_node[i],
-                    (int)i);
-    }
-  }
-  /* assign nodes by iterating over cpus_left_per_node array (which is of
-   * size num_extant_nodes rather than of size nodes_i_can_use) */
-  int node = 0;
-  for (size_t i = 0; i < nsheps; ++i) {
-    switch (over_subscribing) {
-      case 0: {
-        int count = 0;
-        while (count < num_extant_nodes && cpus_left_per_node[node] == 0) {
-          node++;
-          node *= (node < num_extant_nodes);
-          count++;
-        }
-        if (count < num_extant_nodes) {
-          cpus_left_per_node[node]--;
-          break;
-        }
-      }
-        over_subscribing = 1;
-    }
-    qthread_debug(
-      AFFINITY_DETAILS, "setting shep %i to numa node %i\n", (int)i, (int)node);
-    sheps[i].node = node;
-    node++;
-    node *= (node < num_extant_nodes);
-  }
-  numa_bitmask_free(nmask);
-  numa_bitmask_free(cmask);
-  FREE(cpus_left_per_node, num_extant_nodes * sizeof(size_t));
-} /*}}} */
-
-int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps,
-                                  qthread_shepherd_id_t nshepherds) { /*{{{ */
-  qthread_debug(AFFINITY_FUNCTIONS, "sheps:%p, nsheps:%u\n", sheps, nshepherds);
-  if (numa_available() == -1) { return QTHREAD_THIRD_PARTY_ERROR; }
-  assign_nodes(sheps, nshepherds);
-#ifdef HAVE_NUMA_DISTANCE
-  qthread_debug(AFFINITY_DETAILS, "querying distances...\n");
-  /* truly ancient versions of libnuma (in the changelog, this is
-   * considered "pre-history") do not have numa_distance() */
-  for (unsigned int i = 0; i < nshepherds; i++) {
-    unsigned int const node_i = sheps[i].node;
-    size_t j, k;
-    sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int));
-    sheps[i].sorted_sheplist =
-      qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t));
-    assert(sheps[i].shep_dists);
-    assert(sheps[i].sorted_sheplist);
-    for (j = 0; j < nshepherds; j++) {
-      unsigned int const node_j = sheps[j].node;
-
-      if ((node_i != QTHREAD_NO_NODE) && (node_j != QTHREAD_NO_NODE)) {
-        sheps[i].shep_dists[j] = numa_distance(node_i, node_j);
-      } else {
-        /* XXX too arbitrary */
-        if (i == j) {
-          sheps[i].shep_dists[j] = 0;
-        } else {
-          sheps[i].shep_dists[j] = 20;
-        }
-      }
-      qthread_debug(AFFINITY_DETAILS,
-                    "shep %u to shep %u distance: %u\n",
-                    i,
-                    j,
-                    sheps[i].shep_dists[j]);
-    }
-    k = 0;
-    for (j = 0; j < nshepherds; j++) {
-      if (j != i) { sheps[i].sorted_sheplist[k++] = j; }
-    }
-    if (nshepherds > 1) {
-      sort_sheps(sheps[i].shep_dists, sheps[i].sorted_sheplist, nshepherds);
-    }
-  }
-#endif /* ifdef HAVE_NUMA_DISTANCE */
-  return QTHREAD_SUCCESS;
-} /*}}} */
-
-/* vim:set expandtab: */
diff --git a/src/affinity/mach.c b/src/affinity/mach.c
deleted file mode 100644
index fac6d82e..00000000
--- a/src/affinity/mach.c
+++ /dev/null
@@ -1,108 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h>
-#ifdef HAVE_SYSCTL
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-#ifdef HAVE_SYS_SYSCTL_H
-#include <sys/sysctl.h>
-#endif
-#endif
-#ifdef HAVE_MACH_THREAD_POLICY_H
-#include <mach/mach_init.h>
-#include <mach/thread_policy.h>
-kern_return_t thread_policy_set(thread_t thread,
-                                thread_policy_flavor_t flavor,
-                                thread_policy_t policy_info,
-                                mach_msg_type_number_t count);
-kern_return_t thread_policy_get(thread_t thread,
-                                thread_policy_flavor_t flavor,
-                                thread_policy_t policy_info,
-                                mach_msg_type_number_t *count,
-                                boolean_t *get_default);
-#endif /* ifdef HAVE_MACH_THREAD_POLICY_H */
-
-#include "qt_affinity.h"
-#include "qt_asserts.h"
-#include "shufflesheps.h"
-
-qthread_shepherd_id_t INTERNAL guess_num_shepherds(void);
-qthread_worker_id_t INTERNAL
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds);
-
-void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
-                               qthread_worker_id_t *nbworkers,
-                               size_t *hw_par) { /*{{{ */
-  if (*nbshepherds == 0) { *nbshepherds = guess_num_shepherds(); }
-  if (*nbworkers == 0) {
-    *nbworkers = guess_num_workers_per_shep(*nbshepherds);
-  }
-} /*}}} */
-
-qthread_shepherd_id_t INTERNAL guess_num_shepherds(void) { /*{{{ */
-  qthread_shepherd_id_t nshepherds = 1;
-
-#if defined(HAVE_SYSCTL) && defined(CTL_HW) && defined(HW_NCPU)
-  int name[2] = {CTL_HW, HW_NCPU};
-  uint32_t oldv;
-  size_t oldvlen = sizeof(oldv);
-  if (sysctl(name, 2, &oldv, &oldvlen, NULL, 0) < 0) {
-    /* sysctl is the official query mechanism on Macs, so if it failed,
-     * we want to know */
-    perror("sysctl");
-  } else {
-    assert(oldvlen == sizeof(oldv));
-    nshepherds = (qthread_shepherd_id_t)oldv;
-  }
-#endif /* if defined(HAVE_SYSCTL) && defined(CTL_HW) && defined(HW_NCPU) */
-  if (nshepherds <= 0) { nshepherds = 1; }
-  return nshepherds;
-} /*}}} */
-
-void INTERNAL qt_affinity_set(qthread_worker_t *me,
-                              unsigned int Q_UNUSED(nw)) { /*{{{ */
-  mach_msg_type_number_t Count = THREAD_AFFINITY_POLICY_COUNT;
-  thread_affinity_policy_data_t mask[THREAD_AFFINITY_POLICY_COUNT];
-
-  memset(mask,
-         0,
-         sizeof(thread_affinity_policy_data_t) * THREAD_AFFINITY_POLICY_COUNT);
-  mask[0].affinity_tag = me->packed_worker_id + 1;
-  Count = 1;
-  if (thread_policy_set(mach_thread_self(),
-                        THREAD_AFFINITY_POLICY,
-                        (thread_policy_t)&mask,
-                        Count) != KERN_SUCCESS) {
-    fprintf(stderr, "ERROR! Cannot SET affinity for some reason\n");
-  }
-} /*}}} */
-
-qthread_worker_id_t INTERNAL
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */
-  return 1;
-} /*}}} */
-
-int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps,
-                                  qthread_shepherd_id_t nshepherds) { /*{{{ */
-  /* there is no native way to detect distances, so unfortunately we must assume
-   * that they're all equidistant */
-  for (size_t i = 0; i < nshepherds; ++i) {
-    sheps[i].sorted_sheplist =
-      qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t));
-    sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int));
-    for (size_t j = 0, k = 0; j < nshepherds; ++j) {
-      if (j != i) {
-        sheps[i].shep_dists[j] = 10;
-        sheps[i].sorted_sheplist[k++] = j;
-      }
-    }
-    // no need to sort; they're all equidistant
-    shuffle_sheps(sheps[i].sorted_sheplist, nshepherds - 1);
-  }
-  return QTHREAD_SUCCESS;
-} /*}}} */
-
-/* vim:set expandtab: */
diff --git a/src/affinity/plpa.c b/src/affinity/plpa.c
deleted file mode 100644
index 3e5b19ba..00000000
--- a/src/affinity/plpa.c
+++ /dev/null
@@ -1,97 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#ifdef HAVE_SYSCTL
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-#ifdef HAVE_SYS_SYSCTL_H
-#include <sys/sysctl.h>
-#endif
-#endif
-#if defined(HAVE_SYSCONF) && defined(HAVE_UNISTD_H)
-#include <unistd.h>
-#endif
-
-#include <plpa.h>
-
-#include "qt_affinity.h"
-#include "qt_asserts.h"
-#include "shufflesheps.h"
-
-qthread_shepherd_id_t guess_num_shepherds(void);
-qthread_worker_id_t
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds);
-
-void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
-                               qthread_worker_id_t *nbworkers,
-                               size_t *hw_par) { /*{{{ */
-  if (*nbshepherds == 0) {
-    *nbshepherds = guess_num_shepherds();
-    if (*nbshepherds <= 0) { *nbshepherds = 1; }
-  }
-  if (*nbworkers == 0) {
-    *nbworkers = guess_num_workers_per_shep(*nbshepherds);
-    if (*nbworkers <= 0) { *nbworkers = 1; }
-  }
-} /*}}} */
-
-qthread_shepherd_id_t INTERNAL guess_num_shepherds(void) { /*{{{ */
-  qthread_shepherd_id_t nshepherds = 1;
-
-#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) /* Linux */
-  long ret = sysconf(_SC_NPROCESSORS_CONF);
-  nshepherds = (ret > 0) ? ret : 1;
-#elif defined(HAVE_SYSCTL) && defined(CTL_HW) && defined(HW_NCPU)
-  int name[2] = {CTL_HW, HW_NCPU};
-  uint32_t oldv;
-  size_t oldvlen = sizeof(oldv);
-  if (sysctl(name, 2, &oldv, &oldvlen, NULL, 0) >= 0) {
-    assert(oldvlen == sizeof(oldv));
-    nshepherds = (int)oldv;
-  }
-#endif /* if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) */
-  return nshepherds;
-} /*}}} */
-
-void INTERNAL qt_affinity_set(qthread_worker_t *me,
-                              unsigned int Q_UNUSED(nw)) { /*{{{ */
-  plpa_cpu_set_t *cpuset = (plpa_cpu_set_t *)MALLOC(sizeof(plpa_cpu_set_t));
-
-  PLPA_CPU_ZERO(cpuset);
-  PLPA_CPU_SET(me->packed_worker_id, cpuset);
-  if ((plpa_sched_setaffinity(0, sizeof(plpa_cpu_set_t), cpuset) < 0) &&
-      (errno != EINVAL)) {
-    perror("plpa setaffinity");
-  }
-  FREE(cpuset, sizeof(plpa_cpu_set_t));
-} /*}}} */
-
-qthread_worker_id_t INTERNAL
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */
-  return 1;
-} /*}}} */
-
-int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps,
-                                  qthread_shepherd_id_t nshepherds) { /*{{{*/
-  for (size_t i = 0; i < nshepherds; ++i) {
-    sheps[i].node = i * qlib->nworkerspershep;
-    sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int));
-    sheps[i].sorted_sheplist =
-      qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t));
-    for (size_t j = 0, k = 0; j < nshepherds; ++j) {
-      if (j != i) {
-        sheps[i].shep_dists[j] = 10;
-        sheps[i].sorted_sheplist[k++] = j;
-      }
-    }
-    // no need to sort; they're all equidistant
-    shuffle_sheps(sheps[i].sorted_sheplist, nshepherds - 1);
-  }
-  /* there is no inherent way to detect distances, so unfortunately we must
-   * assume that they're all equidistant */
-  return QTHREAD_SUCCESS;
-} /*}}}*/
-
-/* vim:set expandtab: */
diff --git a/src/affinity/sys.c b/src/affinity/sys.c
deleted file mode 100644
index 2f5ab512..00000000
--- a/src/affinity/sys.c
+++ /dev/null
@@ -1,112 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#if defined(HAVE_SYSCONF) && defined(HAVE_SC_NPROCESSORS_CONF) /* Linux */
-#include <unistd.h>
-#elif defined(HAVE_SYSCTL) && defined(HAVE_HW_NCPU)
-#include <linux/sysctl.h>
-#include <unistd.h>
-#endif
-
-#include "qt_affinity.h"
-#include "qt_asserts.h"
-#include "qt_debug.h"
-#include "shufflesheps.h"
-
-qthread_shepherd_id_t guess_num_shepherds(void);
-qthread_worker_id_t
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds);
-
-void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
-                               qthread_worker_id_t *nbworkers,
-                               size_t *hw_par) { /*{{{ */
-  if (*nbshepherds == 0) {
-    *nbshepherds = guess_num_shepherds();
-    if (*nbshepherds <= 0) { *nbshepherds = 1; }
-  }
-  if (*nbworkers == 0) {
-    *nbworkers = guess_num_workers_per_shep(*nbshepherds);
-    if (*nbworkers <= 0) { *nbworkers = 1; }
-  }
-} /*}}} */
-
-qthread_shepherd_id_t INTERNAL guess_num_shepherds(void) {     /*{{{ */
-#if defined(HAVE_SYSCONF) && defined(HAVE_SC_NPROCESSORS_CONF) /* Linux */
-  long ret = sysconf(_SC_NPROCESSORS_CONF);
-  qthread_debug(
-    AFFINITY_CALLS, "based on sysconf(), guessing %i shepherds\n", (int)ret);
-  return (ret > 0) ? ret : 1;
-
-#elif defined(HAVE_SYSCTL) && defined(HAVE_HW_NCPU)
-  int name[2] = {CTL_HW, HW_NCPU};
-  uint32_t oldv;
-  size_t oldvlen = sizeof(oldv);
-  if (sysctl(name, 2, &oldv, &oldvlen, NULL, 0) >= 0) {
-    assert(oldvlen == sizeof(oldv));
-    qthread_debug(
-      AFFINITY_CALLS, "based on sysctl(), guessing %i shepherds\n", (int)oldv);
-    return oldv;
-  } else {
-    qthread_debug(AFFINITY_CALLS,
-                  "sysctl() returned an error, assuming 1 shepherd\n");
-    return 1;
-  }
-#endif /* if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) */
-  qthread_debug(AFFINITY_CALLS,
-                "no useful interfaces present; assuming a single shepherd\n");
-  return 1;
-} /*}}} */
-
-void INTERNAL qt_affinity_set(qthread_worker_t *me, unsigned int Q_UNUSED(nw)) {
-}
-
-qthread_worker_id_t INTERNAL
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */
-  size_t num_procs = 1;
-  size_t guess = 1;
-
-  qthread_debug(
-    AFFINITY_CALLS, "guessing workers for %i shepherds\n", (int)nshepherds);
-#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) /* Linux */
-  long ret = sysconf(_SC_NPROCESSORS_CONF);
-  qthread_debug(AFFINITY_DETAILS, "sysconf() says %i processors\n", (int)ret);
-  num_procs = (ret > 0) ? (size_t)ret : 1;
-#elif defined(HAVE_SYSCTL) && defined(CTL_HW) && defined(HW_NCPU)
-  int name[2] = {CTL_HW, HW_NCPU};
-  uint32_t oldv;
-  size_t oldvlen = sizeof(oldv);
-  if (sysctl(name, 2, &oldv, &oldvlen, NULL, 0) >= 0) {
-    assert(oldvlen == sizeof(oldv));
-    qthread_debug(AFFINITY_DETAILS, "sysctl() says %i CPUs\n", (int)oldv);
-    num_procs = (size_t)oldv;
-  }
-#endif /* if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) */
-  guess = num_procs / nshepherds;
-  if (guess == 0) { guess = 1; }
-  qthread_debug(
-    AFFINITY_DETAILS, "guessing %i workers per shepherd\n", (int)guess);
-  return (qthread_shepherd_id_t)guess;
-} /*}}} */
-
-int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps,
-                                  qthread_shepherd_id_t nshepherds) { /*{{{ */
-  qthread_debug(AFFINITY_CALLS, "start (%p, %i)\n", sheps, (int)nshepherds);
-  for (size_t i = 0; i < nshepherds; ++i) {
-    sheps[i].sorted_sheplist =
-      qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t));
-    sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int));
-    for (size_t j = 0, k = 0; j < nshepherds; ++j) {
-      if (j != i) {
-        assert(k < (nshepherds - 1));
-        sheps[i].shep_dists[j] = 10;
-        sheps[i].sorted_sheplist[k++] = j;
-      }
-    }
-    // no need to sort; they're all equidistant
-    shuffle_sheps(sheps[i].sorted_sheplist, nshepherds - 1);
-  }
-  return QTHREAD_SUCCESS;
-} /*}}} */
-
-/* vim:set expandtab: */
diff --git a/src/ds/qarray.c b/src/ds/qarray.c
index 3329e04f..109e4337 100644
--- a/src/ds/qarray.c
+++ b/src/ds/qarray.c
@@ -317,9 +317,6 @@ static qarray *qarray_create_internal(size_t const count,
       break;
   }
   if (ret->base_ptr == NULL) {
-#ifdef QTHREAD_HAVE_LIBNUMA
-    numa_error("allocating qarray body");
-#endif
   }
 #else  /* ifdef QTHREAD_HAVE_MEM_AFFINITY */
   /* For speed, we want page-aligned memory, if we can get it */
diff --git a/src/ds/qdqueue.c b/src/ds/qdqueue.c
index 884e34a2..c170c78e 100644
--- a/src/ds/qdqueue.c
+++ b/src/ds/qdqueue.c
@@ -8,10 +8,6 @@
 #include <sys/lgrp_user.h>
 #endif
 
-#ifdef QTHREAD_HAVE_LIBNUMA
-#include <numa.h>
-#endif
-
 #include <qthread/qdqueue.h>
 #include <qthread/qlfqueue.h>
 #include <qthread/qthread.h>
diff --git a/test/benchmarks/pmea09/time_qpool.c b/test/benchmarks/pmea09/time_qpool.c
index 29912e79..86b5c602 100644
--- a/test/benchmarks/pmea09/time_qpool.c
+++ b/test/benchmarks/pmea09/time_qpool.c
@@ -9,9 +9,6 @@
 #include <qthread/qtimer.h>
 #include <stdio.h>
 #include <stdlib.h>
-#ifdef QTHREAD_HAVE_LIBNUMA
-#include <numa.h>
-#endif
 #include "argparsing.h"
 
 #define ELEMENT_COUNT 10000
@@ -140,11 +137,7 @@ int main(int argc, char *argv[]) {
   numa_size = iterations * 48 / numshep;
   iprintf("numa_size = %i\n", (int)numa_size);
   for (i = 0; i < numshep; i++) {
-#ifdef QTHREAD_HAVE_LIBNUMA
-    numa_allocs[i] = numa_alloc_onnode(numa_size, i);
-#else
     numa_allocs[i] = malloc(numa_size);
-#endif
     pthread_mutex_init(ptr_lock + i, NULL);
   }
   memcpy(numa_pools, numa_allocs, sizeof(void *) * numshep);
@@ -171,11 +164,7 @@ int main(int argc, char *argv[]) {
          iterations,
          qtimer_secs(timer));
   for (i = 0; i < numshep; i++) {
-#ifdef QTHREAD_HAVE_LIBNUMA
-    numa_free(numa_pools[i], numa_size);
-#else
     free(numa_pools[i]);
-#endif
   }
   free(numa_pools);
   free(numa_allocs);

From 3448a5864abdf60ed369615008893a1648807fba Mon Sep 17 00:00:00 2001
From: Ian Henriksen <iandhenriksen@gmail.com>
Date: Thu, 19 Sep 2024 10:10:52 -0600
Subject: [PATCH 03/11] Require autoconf 2.71 since that's the only one that
 can configure properly for the apple M1 chips which we support. This should
 give us a warning and hopefully prevent running into #262 again.

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 014ba743..ed359ebf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -6,7 +6,7 @@
 ## --------------------- ##
 ## Autoconf Requirements ##
 ## --------------------- ##
-AC_PREREQ(2.59)
+AC_PREREQ(2.71)
 
 ## ---------------------------------- ##
 ## Autoconf / Automake Initialization ##

From 08bf7b71b7b8455cc3c1ff638dbb27637d5f3862 Mon Sep 17 00:00:00 2001
From: Ian Henriksen <iandhenriksen@gmail.com>
Date: Thu, 19 Sep 2024 10:22:02 -0600
Subject: [PATCH 04/11] Remove check for colored tests options. It's on by
 default in all modern versions.

---
 configure.ac | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/configure.ac b/configure.ac
index ed359ebf..a370d65b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -18,19 +18,7 @@ AC_CONFIG_AUX_DIR([config])
 AC_CONFIG_MACRO_DIR([config])
 AC_CONFIG_SRCDIR([src/qthread.c])
 
-dnl Require at least AM 1.7.  Prior versions require AM_CONFIG_HEADER
-dnl and have no equivalent to AC_CONFIG_HEADERS (the plural version),
-dnl which is a pain.  AM 1.7 also added support for putting target
-dnl information in AM_CONDITIONAL blocks and using the += modifier.
-dnl ... removed "check-news" because of automatic version generation
-# Automake's silent rules were implemented in the same version that
-# color-tests was implemented, so we can use one to detect the other.
-# This nasty, dirty, unreliable trick is strongly discouraged by its author:
-# http://blog.flameeyes.eu/trackbacks?article_id=5155
-m4_ifdef([AM_SILENT_RULES],
-         [m4_define([qt_color_tests], [color-tests])],
-         [m4_define([qt_color_tests], [])])
-AM_INIT_AUTOMAKE([foreign subdir-objects dist-bzip2 no-define 1.7 ]qt_color_tests)
+AM_INIT_AUTOMAKE([foreign subdir-objects dist-bzip2 no-define 1.7 ])
 # If Automake supports silent rules, enable them (credit to Brian)
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 

From 4072f9c327a4886d0841080df4be1ca7c5721fd8 Mon Sep 17 00:00:00 2001
From: Ian Henriksen <iandhenriksen@gmail.com>
Date: Thu, 19 Sep 2024 10:26:57 -0600
Subject: [PATCH 05/11] No longer check for whether automake supports silent
 rules. It always does these days.

---
 configure.ac | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index a370d65b..a3961172 100644
--- a/configure.ac
+++ b/configure.ac
@@ -19,8 +19,7 @@ AC_CONFIG_MACRO_DIR([config])
 AC_CONFIG_SRCDIR([src/qthread.c])
 
 AM_INIT_AUTOMAKE([foreign subdir-objects dist-bzip2 no-define 1.7 ])
-# If Automake supports silent rules, enable them (credit to Brian)
-m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
+AM_SILENT_RULES([yes])
 
 # clean some extra things...
 CLEANFILES="*~ .\#* .gdb_history"

From 4758d15d0013c7608ef1137618569c5ae3844e17 Mon Sep 17 00:00:00 2001
From: Ian Henriksen <iandhenriksen@gmail.com>
Date: Thu, 19 Sep 2024 10:30:09 -0600
Subject: [PATCH 06/11] Require a more recent automake.

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index a3961172..8022b7f0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -18,7 +18,7 @@ AC_CONFIG_AUX_DIR([config])
 AC_CONFIG_MACRO_DIR([config])
 AC_CONFIG_SRCDIR([src/qthread.c])
 
-AM_INIT_AUTOMAKE([foreign subdir-objects dist-bzip2 no-define 1.7 ])
+AM_INIT_AUTOMAKE([foreign subdir-objects dist-bzip2 no-define 1.16 ])
 AM_SILENT_RULES([yes])
 
 # clean some extra things...

From bc8cdc5faff3198436097a75049fe82833e4d98a Mon Sep 17 00:00:00 2001
From: Ian Henriksen <iandhenriksen@gmail.com>
Date: Thu, 19 Sep 2024 10:37:22 -0600
Subject: [PATCH 07/11] Get rid of timelimit check in configure. We just do
 this in CI now.

---
 configure.ac              | 2 --
 test/basics/Makefile.am   | 4 ----
 test/features/Makefile.am | 4 ----
 test/stress/Makefile.am   | 4 ----
 4 files changed, 14 deletions(-)

diff --git a/configure.ac b/configure.ac
index 8022b7f0..a786f71d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -26,7 +26,6 @@ CLEANFILES="*~ .\#* .gdb_history"
 AC_SUBST(CLEANFILES)
 
 AC_CANONICAL_HOST
-AC_PATH_PROG([timelimit_path],[timelimit])
 
 ## ----------------- ##
 ## Check the options ##
@@ -887,7 +886,6 @@ AM_CONDITIONAL([COMPILE_COMPAT_ATOMIC], [test "x$compile_compat_atomic" = "xyes"
 AM_CONDITIONAL([COMPILE_SPAWNCACHE], [test "x$enable_spawn_cache" = "xyes"])
 AM_CONDITIONAL([COMPILE_EUREKAS], [test "x$enable_eurekas" = "xyes"])
 AM_CONDITIONAL([HAVE_GUARD_PAGES], [test "x$enable_guard_pages" = "xyes"])
-AM_CONDITIONAL([HAVE_PROG_TIMELIMIT], [test "x$timelimit_path" != "x"])
 AM_CONDITIONAL([QTHREAD_PERFORMANCE], [test "$enable_performance_monitoring" = "yes"])
 AM_CONDITIONAL([WANT_SINGLE_WORKER_SCHEDULER], [test "x$with_scheduler" = "xnemesis" -o "x$with_scheduler" = "xlifo" -o "x$with_scheduler" = "xmutexfifo" -o "x$with_scheduler" = "xmtsfifo" -o "x$with_scheduler" = "xmdlifo"])
 AM_CONDITIONAL([COMPILE_OMP_BENCHMARKS], [test "x$have_openmp" = "xyes"])
diff --git a/test/basics/Makefile.am b/test/basics/Makefile.am
index adc017da..8035ce92 100644
--- a/test/basics/Makefile.am
+++ b/test/basics/Makefile.am
@@ -51,10 +51,6 @@ check_PROGRAMS = $(TESTS)
 
 TESTS_ENVIRONMENT =
 
-if HAVE_PROG_TIMELIMIT
-TESTS_ENVIRONMENT += @timelimit_path@ -T 1
-endif
-
 if WANT_SINGLE_WORKER_SCHEDULER
 TESTS_ENVIRONMENT += env QT_NUM_SHEPHERDS=2 QT_NUM_WORKERS_PER_SHEPHERD=1
 endif
diff --git a/test/features/Makefile.am b/test/features/Makefile.am
index 70ad3182..eb5eedf4 100644
--- a/test/features/Makefile.am
+++ b/test/features/Makefile.am
@@ -46,10 +46,6 @@ check_PROGRAMS = $(TESTS)
 
 TESTS_ENVIRONMENT =
 
-if HAVE_PROG_TIMELIMIT
-TESTS_ENVIRONMENT += @timelimit_path@ -T 2
-endif
-
 if WANT_SINGLE_WORKER_SCHEDULER
 TESTS_ENVIRONMENT += env QT_NUM_SHEPHERDS=2 QT_NUM_WORKERS_PER_SHEPHERD=1
 endif
diff --git a/test/stress/Makefile.am b/test/stress/Makefile.am
index c68d7d49..712673da 100644
--- a/test/stress/Makefile.am
+++ b/test/stress/Makefile.am
@@ -25,10 +25,6 @@ check_PROGRAMS = $(TESTS)
 
 TESTS_ENVIRONMENT =
 
-if HAVE_PROG_TIMELIMIT
-TESTS_ENVIRONMENT += @timelimit_path@ -T 30
-endif 
-
 if WANT_SINGLE_WORKER_SCHEDULER
 TESTS_ENVIRONMENT += env QT_NUM_SHEPHERDS=2 QT_NUM_WORKERS_PER_SHEPHERD=1
 endif

From f150394774c9df024c7dba51941e495c47917017 Mon Sep 17 00:00:00 2001
From: Ian Henriksen <iandhenriksen@gmail.com>
Date: Thu, 19 Sep 2024 10:54:14 -0600
Subject: [PATCH 08/11] Get rid of QINLINE macro for toggling inline behavior.
 The inline keyword in C has nothing to do with actual inlining and is all
 about linkage.

---
 configure.ac                             | 11 -----------
 include/56reader-rwlock.h                | 10 +++++-----
 include/qt_addrstat.h                    |  2 +-
 include/qt_atomics.h                     | 16 ++++++++--------
 include/qt_blocking_structs.h            |  4 ++--
 include/qt_debug.h                       |  4 ++--
 include/qt_gcd.h                         |  4 ++--
 include/qt_profiling.h                   |  2 +-
 include/qt_shepherd_innards.h            |  4 ++--
 include/qt_threadqueue_stack.h           | 12 ++++++------
 include/qthread/common.h.in              |  3 ---
 include/qthread/qarray.h                 |  2 +-
 include/qthread/qthread.h                | 18 +++++++++---------
 include/qthread/syncvar.hpp              |  8 ++++----
 src/alloc/base.c                         |  2 +-
 src/alloc/chapel.c                       |  2 +-
 src/ds/qarray.c                          |  6 +++---
 src/feb.c                                | 20 ++++++++++----------
 src/mpool.c                              |  4 ++--
 src/qloop.c                              | 16 ++++++++--------
 src/qthread.c                            | 18 +++++++++---------
 src/syncvar.c                            | 14 +++++++-------
 src/threadqueues/distrib_threadqueues.c  |  6 +++---
 src/threadqueues/sherwood_threadqueues.c |  8 ++++----
 24 files changed, 91 insertions(+), 105 deletions(-)

diff --git a/configure.ac b/configure.ac
index a786f71d..8abe4aab 100644
--- a/configure.ac
+++ b/configure.ac
@@ -39,12 +39,6 @@ AC_ARG_ENABLE([debugging],
               [AS_HELP_STRING([--enable-debugging],
                               [turns off optimization and turns on debug flags])])
 
-AC_ARG_ENABLE([inlined-functions],
-              [AS_HELP_STRING([--disable-inlined-functions],
-                              [prevent functions from being inlined; this
-                               sometimes is useful for analysis of the code in
-                               a debugger or some other tool])])
-
 AC_ARG_ENABLE([hardware-atomics],
               [AS_HELP_STRING([--disable-hardware-atomics],
                               [use mutexes to do the internal atomics;
@@ -835,11 +829,6 @@ AS_IF([test "x$enable_lazy_threadids" = "xno" -o "x$enable_debug" != "xno"],
        enable_lazy_threadids=no],
       [enable_lazy_threadids=yes])
 
-AS_IF([test "x$enable_inlined_functions" != "xno"],
-      [qinline_define=inline],
-      [qinline_define=""])
-AC_DEFINE_UNQUOTED([QINLINE], [$qinline_define], [Allow function inlining to be toggled])
-
 AS_IF([test "x$enable_header_syscall_interception" == xyes],
       [AC_DEFINE([USE_HEADER_SYSCALLS], [1],
                  [Define to allow blocking syscalls to be mangled into qthread-specific variants])])
diff --git a/include/56reader-rwlock.h b/include/56reader-rwlock.h
index 9a8238f0..58660972 100644
--- a/include/56reader-rwlock.h
+++ b/include/56reader-rwlock.h
@@ -30,14 +30,14 @@ struct tlrw_lock {
 
 typedef struct tlrw_lock rwlock_t;
 
-static QINLINE void rwlock_init(rwlock_t *l) {
+static inline void rwlock_init(rwlock_t *l) {
   unsigned int i;
 
   l->owner = 0;
   for (i = 0; i < sizeof l->readers; i++) l->readers[i] = 0;
 }
 
-static QINLINE void rwlock_rdlock(rwlock_t *l, int id) {
+static inline void rwlock_rdlock(rwlock_t *l, int id) {
   assert(id >= 0);
   for (;;) {
     l->readers[id] = 1;
@@ -53,12 +53,12 @@ static QINLINE void rwlock_rdlock(rwlock_t *l, int id) {
   }
 }
 
-static QINLINE void rwlock_rdunlock(rwlock_t *l, int id) {
+static inline void rwlock_rdunlock(rwlock_t *l, int id) {
   assert(id >= 0);
   l->readers[id] = 0;
 }
 
-static QINLINE void rwlock_wrlock(rwlock_t *l, int id) {
+static inline void rwlock_wrlock(rwlock_t *l, int id) {
   assert(id >= 0);
   id = id + 1;
 
@@ -73,7 +73,7 @@ static QINLINE void rwlock_wrlock(rwlock_t *l, int id) {
   }
 }
 
-static QINLINE void rwlock_wrunlock(rwlock_t *l) {
+static inline void rwlock_wrunlock(rwlock_t *l) {
   l->owner = 0;
   MACHINE_FENCE;
 }
diff --git a/include/qt_addrstat.h b/include/qt_addrstat.h
index 9163d6e6..857ed332 100644
--- a/include/qt_addrstat.h
+++ b/include/qt_addrstat.h
@@ -6,7 +6,7 @@
 /* This allocates a new, initialized addrstat structure, which is used for
  * keeping track of the FEB status of an address. It expects a shepherd pointer
  * to use to find the right memory pool to use. */
-static QINLINE qthread_addrstat_t *qthread_addrstat_new(void) { /*{{{ */
+static inline qthread_addrstat_t *qthread_addrstat_new(void) { /*{{{ */
   qthread_addrstat_t *ret = ALLOC_ADDRSTAT();
   QTHREAD_FASTLOCK_INIT_PTR(&ret->lock);
   QTHREAD_FASTLOCK_LOCK(&ret->lock);
diff --git a/include/qt_atomics.h b/include/qt_atomics.h
index ba83161c..0a97aae8 100644
--- a/include/qt_atomics.h
+++ b/include/qt_atomics.h
@@ -325,7 +325,7 @@ extern pthread_mutexattr_t _fastlock_attr;
 #define QT_CAS_(var, oldv, newv, caslock)                                      \
   qt_cas((void **)&(var), (void *)(oldv), (void *)(newv), &(caslock))
 
-static QINLINE void *qt_cas(void **const ptr,
+static inline void *qt_cas(void **const ptr,
                             void *const oldv,
                             void *const newv,
                             QTHREAD_FASTLOCK_TYPE *lock) {
@@ -338,7 +338,7 @@ static QINLINE void *qt_cas(void **const ptr,
   return ret;
 }
 
-static QINLINE uintptr_t qt_cas_read_ui(uintptr_t *const ptr,
+static inline uintptr_t qt_cas_read_ui(uintptr_t *const ptr,
                                         QTHREAD_FASTLOCK_TYPE *mutex) {
   uintptr_t ret;
 
@@ -364,7 +364,7 @@ static QINLINE uintptr_t qt_cas_read_ui(uintptr_t *const ptr,
 #ifdef QTHREAD_ATOMIC_CAS_PTR
 #define qt_cas(P, O, N) (void *)__sync_val_compare_and_swap((P), (O), (N))
 #else
-static QINLINE void *
+static inline void *
 qt_cas(void **const ptr, void *const oldv, void *const newv) { /*{{{*/
 #if defined(HAVE_GCC_INLINE_ASSEMBLY)
 #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
@@ -437,7 +437,7 @@ qt_cas(void **const ptr, void *const oldv, void *const newv) { /*{{{*/
   qthread_internal_incr_mod_(op, m, lock)
 #define QTHREAD_OPTIONAL_LOCKARG , QTHREAD_FASTLOCK_TYPE *lock
 
-static QINLINE aligned_t qthread_internal_incr(aligned_t *operand,
+static inline aligned_t qthread_internal_incr(aligned_t *operand,
                                                QTHREAD_FASTLOCK_TYPE *lock,
                                                int val) { /*{{{ */
   aligned_t retval;
@@ -449,7 +449,7 @@ static QINLINE aligned_t qthread_internal_incr(aligned_t *operand,
   return retval;
 } /*}}} */
 
-static QINLINE saligned_t qthread_internal_incr_s(saligned_t *operand,
+static inline saligned_t qthread_internal_incr_s(saligned_t *operand,
                                                   QTHREAD_FASTLOCK_TYPE *lock,
                                                   int val) { /*{{{ */
   saligned_t retval;
@@ -461,7 +461,7 @@ static QINLINE saligned_t qthread_internal_incr_s(saligned_t *operand,
   return retval;
 } /*}}} */
 
-static QINLINE saligned_t qthread_internal_atomic_read_s(
+static inline saligned_t qthread_internal_atomic_read_s(
   saligned_t *operand, QTHREAD_FASTLOCK_TYPE *lock) { /*{{{ */
   saligned_t retval;
 
@@ -473,7 +473,7 @@ static QINLINE saligned_t qthread_internal_atomic_read_s(
 
 #endif /* ifndef QTHREAD_MUTEX_INCREMENT */
 
-static QINLINE aligned_t qthread_internal_incr_mod_(
+static inline aligned_t qthread_internal_incr_mod_(
   aligned_t *operand,
   unsigned int const max QTHREAD_OPTIONAL_LOCKARG) { /*{{{ */
   aligned_t retval;
@@ -651,7 +651,7 @@ static QINLINE aligned_t qthread_internal_incr_mod_(
   return retval;
 } /*}}} */
 
-static QINLINE void *qt_internal_atomic_swap_ptr(void **addr,
+static inline void *qt_internal_atomic_swap_ptr(void **addr,
                                                  void *newval) { /*{{{*/
   void *oldval =
     atomic_load_explicit((void *_Atomic *)addr, memory_order_relaxed);
diff --git a/include/qt_blocking_structs.h b/include/qt_blocking_structs.h
index 15de7181..6a0cf1f8 100644
--- a/include/qt_blocking_structs.h
+++ b/include/qt_blocking_structs.h
@@ -82,14 +82,14 @@ extern qt_mpool generic_addrstat_pool;
 #else
 extern qt_mpool generic_addrres_pool;
 
-static QINLINE qthread_addrres_t *ALLOC_ADDRRES(void) { /*{{{ */
+static inline qthread_addrres_t *ALLOC_ADDRRES(void) { /*{{{ */
   qthread_addrres_t *tmp =
     (qthread_addrres_t *)qt_mpool_alloc(generic_addrres_pool);
 
   return tmp;
 } /*}}} */
 
-static QINLINE void FREE_ADDRRES(qthread_addrres_t *t) { /*{{{ */
+static inline void FREE_ADDRRES(qthread_addrres_t *t) { /*{{{ */
   qt_mpool_free(generic_addrres_pool, t);
 } /*}}} */
 
diff --git a/include/qt_debug.h b/include/qt_debug.h
index f72faf93..e0ae4dac 100644
--- a/include/qt_debug.h
+++ b/include/qt_debug.h
@@ -12,7 +12,7 @@
 #define ALLOC_SCRIBBLE(ptr, sz) memset((ptr), 0x55, (sz))
 #define FREE_SCRIBBLE(ptr, sz) memset((ptr), 0x77, (sz))
 
-static QINLINE void *MALLOC(size_t sz) {
+static inline void *MALLOC(size_t sz) {
   void *ret = qt_malloc(sz);
 
   ALLOC_SCRIBBLE(ret, sz);
@@ -254,7 +254,7 @@ extern QTHREAD_FASTLOCK_TYPE output_lock;
 #define qthread_debug(level, format, ...)                                      \
   qthread_debug_(level, "%s(%u): " format, __FUNCTION__, __LINE__, __VA_ARGS__)
 
-static QINLINE void qthread_debug_(int level, char const *format, ...)
+static inline void qthread_debug_(int level, char const *format, ...)
 {      /*{{{ */
   va_list args;
 
diff --git a/include/qt_gcd.h b/include/qt_gcd.h
index 1b9e4e0b..be94ccc3 100644
--- a/include/qt_gcd.h
+++ b/include/qt_gcd.h
@@ -4,7 +4,7 @@
 
 #include <stdint.h>
 
-static QINLINE size_t qt_gcd(size_t a, size_t b) {
+static inline size_t qt_gcd(size_t a, size_t b) {
 #ifdef QTHREAD_SHIFT_GCD
   size_t k = 0;
   if (a == 0) return b;
@@ -32,7 +32,7 @@ static QINLINE size_t qt_gcd(size_t a, size_t b) {
 #endif
 }
 
-static QINLINE size_t qt_lcm(size_t a, size_t b) { /*{{{ */
+static inline size_t qt_lcm(size_t a, size_t b) { /*{{{ */
   size_t tmp = qt_gcd(a, b);
   /* on 32 bit platforms, it's pretty easy for a * b to overflow so we force
    * 64 bit multiplication*/
diff --git a/include/qt_profiling.h b/include/qt_profiling.h
index 67153653..55bfebfe 100644
--- a/include/qt_profiling.h
+++ b/include/qt_profiling.h
@@ -73,7 +73,7 @@
 #define QTHREAD_FEB_UNIQUERECORD2(TYPE, ADDR, SHEP)                            \
   qt_hash_put((SHEP)->unique##TYPE##addrs, (void *)(ADDR), (void *)(ADDR))
 
-static QINLINE void
+static inline void
 qthread_unique_collect(qt_key_t const key, void *value, void *id) { /*{{{*/
   qt_hash_put_locked((qt_hash)id, key, value);
 } /*}}}*/
diff --git a/include/qt_shepherd_innards.h b/include/qt_shepherd_innards.h
index 2c9b4a1e..6b0eceef 100644
--- a/include/qt_shepherd_innards.h
+++ b/include/qt_shepherd_innards.h
@@ -132,7 +132,7 @@ struct qthread_shepherd_s {
 
 extern TLS_DECL(qthread_shepherd_t *, shepherd_structs);
 
-static QINLINE qthread_shepherd_t *qthread_internal_getshep(void) {
+static inline qthread_shepherd_t *qthread_internal_getshep(void) {
   qthread_worker_t *w = (qthread_worker_t *)TLS_GET(shepherd_structs);
   if (w == NULL) {
     return NULL;
@@ -141,7 +141,7 @@ static QINLINE qthread_shepherd_t *qthread_internal_getshep(void) {
   }
 }
 
-static QINLINE qthread_worker_t *qthread_internal_getworker(void) {
+static inline qthread_worker_t *qthread_internal_getworker(void) {
   return (qthread_worker_t *)TLS_GET(shepherd_structs);
 }
 
diff --git a/include/qt_threadqueue_stack.h b/include/qt_threadqueue_stack.h
index e58105eb..65dc68c6 100644
--- a/include/qt_threadqueue_stack.h
+++ b/include/qt_threadqueue_stack.h
@@ -34,15 +34,15 @@ static void qt_stack_free(qt_stack_t *stack) {
   stack->base = stack->top = stack->capacity = 0;
 }
 
-static QINLINE int qt_stack_is_empty(qt_stack_t *stack) {
+static inline int qt_stack_is_empty(qt_stack_t *stack) {
   return (stack->empty);
 }
 
-static QINLINE int qt_stack_is_full(qt_stack_t *stack) {
+static inline int qt_stack_is_full(qt_stack_t *stack) {
   return (stack->base == ((stack->top + 1) % stack->capacity));
 }
 
-static QINLINE int qt_stack_size(qt_stack_t *stack) {
+static inline int qt_stack_size(qt_stack_t *stack) {
   if (stack->top >= stack->base) {
     return (stack->top - stack->base);
   } else {
@@ -70,21 +70,21 @@ static void qt_stack_resize(qt_stack_t *stack) {
   free(old_storage);
 }
 
-static QINLINE void qt_stack_push(qt_stack_t *stack, qthread_t *t) {
+static inline void qt_stack_push(qt_stack_t *stack, qthread_t *t) {
   if (qt_stack_is_full(stack)) { qt_stack_resize(stack); }
   stack->top = (stack->top + 1) % (stack->capacity);
   stack->storage[stack->top] = t;
   stack->empty = 0;
 }
 
-static QINLINE void qt_stack_enq_base(qt_stack_t *stack, qthread_t *t) {
+static inline void qt_stack_enq_base(qt_stack_t *stack, qthread_t *t) {
   if (qt_stack_is_full(stack)) { qt_stack_resize(stack); }
   stack->storage[stack->base] = t;
   stack->base = (stack->base - 1 + stack->capacity) % (stack->capacity);
   stack->empty = 0;
 }
 
-static QINLINE qthread_t *qt_stack_pop(qt_stack_t *stack) {
+static inline qthread_t *qt_stack_pop(qt_stack_t *stack) {
   if (qt_stack_is_empty(stack)) { return (NULL); }
   qthread_t *t = stack->storage[stack->top];
   assert(t != NULL);
diff --git a/include/qthread/common.h.in b/include/qthread/common.h.in
index 79f04743..83b097f6 100644
--- a/include/qthread/common.h.in
+++ b/include/qthread/common.h.in
@@ -26,9 +26,6 @@
 /* if the compiler supports __attribute__((deprecated)) */
 #undef Q_DEPRECATED
 
-/* Allow functions to be inlined */
-#undef QINLINE
-
 #ifndef __powerpc
 #define BITFIELD_ORDER_REVERSE
 #else
diff --git a/include/qthread/qarray.h b/include/qthread/qarray.h
index 4a5fdae8..f45b3e34 100644
--- a/include/qthread/qarray.h
+++ b/include/qthread/qarray.h
@@ -105,7 +105,7 @@ void qarray_dist_like(qarray const *ref, qarray *mod);
 #define qarray_elem(a, i) qarray_elem_nomigrate(a, i)
 void *qarray_elem_migrate(qarray const *a, size_t const index);
 
-QINLINE static void *qarray_elem_nomigrate(qarray const *a,
+inline static void *qarray_elem_nomigrate(qarray const *a,
                                            size_t const index) {
   if ((a == NULL) || (index > a->count)) { return NULL; }
 
diff --git a/include/qthread/qthread.h b/include/qthread/qthread.h
index d8971f28..e6a4b5ef 100644
--- a/include/qthread/qthread.h
+++ b/include/qthread/qthread.h
@@ -692,7 +692,7 @@ uint64_t qthread_cas64_(uint64_t *, uint64_t, uint64_t);
  * All of these functions return the value of the contents of the operand
  * *after* incrementing.
  */
-static QINLINE float qthread_fincr(float *operand, float incr) { /*{{{ */
+static inline float qthread_fincr(float *operand, float incr) { /*{{{ */
 #if defined(QTHREAD_MUTEX_INCREMENT)
   return qthread_fincr_(operand, incr);
 
@@ -806,7 +806,7 @@ static QINLINE float qthread_fincr(float *operand, float incr) { /*{{{ */
 #endif // if defined(QTHREAD_MUTEX_INCREMENT)
 } /*}}} */
 
-static QINLINE double qthread_dincr(double *operand, double incr) { /*{{{ */
+static inline double qthread_dincr(double *operand, double incr) { /*{{{ */
 #if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
   return qthread_dincr_(operand, incr);
@@ -1001,7 +1001,7 @@ static QINLINE double qthread_dincr(double *operand, double incr) { /*{{{ */
        // QTHREAD_POWERPC32)
 } /*}}} */
 
-static QINLINE uint32_t qthread_incr32(uint32_t *operand,
+static inline uint32_t qthread_incr32(uint32_t *operand,
                                        uint32_t incr) { /*{{{ */
 #ifdef QTHREAD_MUTEX_INCREMENT
   return qthread_incr32_(operand, incr);
@@ -1055,7 +1055,7 @@ static QINLINE uint32_t qthread_incr32(uint32_t *operand,
 #endif // ifdef QTHREAD_MUTEX_INCREMENT
 } /*}}} */
 
-static QINLINE uint64_t qthread_incr64(uint64_t *operand,
+static inline uint64_t qthread_incr64(uint64_t *operand,
                                        uint64_t incr) { /*{{{ */
 #if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
@@ -1170,7 +1170,7 @@ static QINLINE uint64_t qthread_incr64(uint64_t *operand,
 #endif // if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32
 } /*}}} */
 
-static QINLINE int64_t qthread_incr_xx(void *addr,
+static inline int64_t qthread_incr_xx(void *addr,
                                        int64_t incr,
                                        size_t length) { /*{{{ */
   switch (length) {
@@ -1186,7 +1186,7 @@ static QINLINE int64_t qthread_incr_xx(void *addr,
 uint64_t qthread_syncvar_incrF(syncvar_t *restrict operand, uint64_t inc);
 
 #if !defined(QTHREAD_ATOMIC_CAS) || defined(QTHREAD_MUTEX_INCREMENT)
-static QINLINE uint32_t qthread_cas32(uint32_t *operand,
+static inline uint32_t qthread_cas32(uint32_t *operand,
                                       uint32_t oldval,
                                       uint32_t newval) { /*{{{ */
 #ifdef QTHREAD_MUTEX_INCREMENT // XXX: this is only valid if you don't read
@@ -1233,7 +1233,7 @@ static QINLINE uint32_t qthread_cas32(uint32_t *operand,
 #endif // ifdef QTHREAD_MUTEX_INCREMENT
 } /*}}} */
 
-static QINLINE uint64_t qthread_cas64(uint64_t *operand,
+static inline uint64_t qthread_cas64(uint64_t *operand,
                                       uint64_t oldval,
                                       uint64_t newval) { /*{{{ */
 #ifdef QTHREAD_MUTEX_INCREMENT
@@ -1325,7 +1325,7 @@ static QINLINE uint64_t qthread_cas64(uint64_t *operand,
 #endif // ifdef QTHREAD_MUTEX_INCREMENT
 } /*}}} */
 
-static QINLINE aligned_t qthread_cas_xx(aligned_t *addr,
+static inline aligned_t qthread_cas_xx(aligned_t *addr,
                                         aligned_t oldval,
                                         aligned_t newval,
                                         size_t length) { /*{{{ */
@@ -1342,7 +1342,7 @@ static QINLINE aligned_t qthread_cas_xx(aligned_t *addr,
   return 0; /* compiler check */
 } /*}}} */
 
-static QINLINE void *
+static inline void *
 qthread_cas_ptr_(void **addr, void *oldval, void *newval) { /*{{{*/
 #if (QTHREAD_BITS == 32)
   return (void *)(uintptr_t)qthread_cas32(
diff --git a/include/qthread/syncvar.hpp b/include/qthread/syncvar.hpp
index 17404006..6d8bf6d4 100644
--- a/include/qthread/syncvar.hpp
+++ b/include/qthread/syncvar.hpp
@@ -9,22 +9,22 @@ class syncvar;
 
 class syncvar {
 public:
-  QINLINE syncvar(void) noexcept {
+  inline syncvar(void) noexcept {
     // Doing it this way because extended initializers (e.g.
     // SYNCVAR_STATIC_INITIALIZER) are not (yet) supported by C++
     the_syncvar_t.u.w = 0;
   }
 
-  QINLINE syncvar(uint64_t const &val) noexcept {
+  inline syncvar(uint64_t const &val) noexcept {
     assert(!(val & 0xf000000000000000ull));
     the_syncvar_t.u.s.data = val;
   }
 
-  QINLINE syncvar(syncvar const &val) noexcept {
+  inline syncvar(syncvar const &val) noexcept {
     the_syncvar_t.u.w = val.the_syncvar_t.u.w;
   }
 
-  QINLINE syncvar(syncvar_t const &val) { the_syncvar_t.u.w = val.u.w; }
+  inline syncvar(syncvar_t const &val) { the_syncvar_t.u.w = val.u.w; }
 
   int empty(void) { return qthread_syncvar_empty(&the_syncvar_t); }
 
diff --git a/src/alloc/base.c b/src/alloc/base.c
index 451cfa30..dec40440 100644
--- a/src/alloc/base.c
+++ b/src/alloc/base.c
@@ -12,7 +12,7 @@
 #ifdef HAVE_GETPAGESIZE
 #include <unistd.h>
 #else
-static QINLINE int getpagesize() { return 4096; }
+static inline int getpagesize() { return 4096; }
 #endif
 
 /* Internal Headers */
diff --git a/src/alloc/chapel.c b/src/alloc/chapel.c
index 66a7c9ed..135f42dc 100644
--- a/src/alloc/chapel.c
+++ b/src/alloc/chapel.c
@@ -9,7 +9,7 @@
 #ifdef HAVE_GETPAGESIZE
 #include <unistd.h>
 #else
-static QINLINE int getpagesize() { return 4096; }
+static inline int getpagesize() { return 4096; }
 #endif
 
 #include "chpl-linefile-support.h"
diff --git a/src/ds/qarray.c b/src/ds/qarray.c
index 109e4337..e83fed74 100644
--- a/src/ds/qarray.c
+++ b/src/ds/qarray.c
@@ -32,7 +32,7 @@ static aligned_t *chunk_distribution_tracker = NULL;
 /* this function is for DIST *ONLY*; it returns a pointer to the location that
  * the bookkeeping data is stored (i.e. the record of where this segment is
  * stored) */
-static QINLINE qthread_shepherd_id_t *
+static inline qthread_shepherd_id_t *
 qarray_internal_segment_shep(qarray const *a,
                              void const *segment_head) { /*{{{ */
   char *ptr = (((char *)segment_head) + (a->segment_size * a->unit_size));
@@ -47,7 +47,7 @@ qarray_internal_segment_shep(qarray const *a,
   return (qthread_shepherd_id_t *)ptr;
 } /*}}} */
 
-static QINLINE qthread_shepherd_id_t qarray_internal_segment_shep_read(
+static inline qthread_shepherd_id_t qarray_internal_segment_shep_read(
   qarray const *a, void const *segment_head) { /*{{{*/
   qthread_shepherd_id_t retval;
   qthread_shepherd_id_t *ptr = qarray_internal_segment_shep(a, segment_head);
@@ -58,7 +58,7 @@ static QINLINE qthread_shepherd_id_t qarray_internal_segment_shep_read(
   return retval;
 } /*}}}*/
 
-static QINLINE void
+static inline void
 qarray_internal_segment_shep_write(qarray const *a,
                                    void const *segment_head,
                                    qthread_shepherd_id_t shep) { /*{{{*/
diff --git a/src/feb.c b/src/feb.c
index ae0c4ba1..7c9d8e2d 100644
--- a/src/feb.c
+++ b/src/feb.c
@@ -73,19 +73,19 @@ typedef struct {
 /********************************************************************
  * Local Prototypes
  *********************************************************************/
-static QINLINE void qthread_gotlock_fill(qthread_shepherd_t *shep,
+static inline void qthread_gotlock_fill(qthread_shepherd_t *shep,
                                          qthread_addrstat_t *m,
                                          void *maddr);
-static QINLINE void
+static inline void
 qthread_gotlock_fill_inner(qthread_shepherd_t *shep,
                            qthread_addrstat_t *m,
                            void *maddr,
                            uint_fast8_t const recursive,
                            qthread_addrres_t **precond_tasks);
-static QINLINE void qthread_gotlock_empty(qthread_shepherd_t *shep,
+static inline void qthread_gotlock_empty(qthread_shepherd_t *shep,
                                           qthread_addrstat_t *m,
                                           void *maddr);
-static QINLINE void
+static inline void
 qthread_gotlock_empty_inner(qthread_shepherd_t *shep,
                             qthread_addrstat_t *m,
                             void *maddr,
@@ -317,7 +317,7 @@ int API_FUNC qthread_feb_status(aligned_t const *addr) { /*{{{ */
 
 /* this function removes the FEB data structure for the address maddr from the
  * hash table */
-static QINLINE void qthread_FEB_remove(void *maddr) { /*{{{ */
+static inline void qthread_FEB_remove(void *maddr) { /*{{{ */
   qthread_addrstat_t *m;
   int const lockbin = QTHREAD_CHOOSE_STRIPE2(maddr);
 
@@ -406,7 +406,7 @@ static QINLINE void qthread_FEB_remove(void *maddr) { /*{{{ */
   }
 } /*}}} */
 
-static QINLINE void
+static inline void
 qthread_precond_launch(qthread_shepherd_t *shep,
                        qthread_addrres_t *precond_tasks) { /*{{{*/
   qthread_addrres_t *precond_tail =
@@ -435,7 +435,7 @@ qthread_precond_launch(qthread_shepherd_t *shep,
   }
 } /*}}}*/
 
-static QINLINE void
+static inline void
 qthread_gotlock_empty_inner(qthread_shepherd_t *shep,
                             qthread_addrstat_t *m,
                             void *maddr,
@@ -484,7 +484,7 @@ qthread_gotlock_empty_inner(qthread_shepherd_t *shep,
   }
 } /*}}} */
 
-static QINLINE void qthread_gotlock_empty(qthread_shepherd_t *shep,
+static inline void qthread_gotlock_empty(qthread_shepherd_t *shep,
                                           qthread_addrstat_t *m,
                                           void *maddr) {
   qthread_addrres_t *tmp = NULL;
@@ -492,7 +492,7 @@ static QINLINE void qthread_gotlock_empty(qthread_shepherd_t *shep,
   qthread_gotlock_empty_inner(shep, m, maddr, 0, &tmp);
 }
 
-static QINLINE void
+static inline void
 qthread_gotlock_fill_inner(qthread_shepherd_t *shep,
                            qthread_addrstat_t *m,
                            void *maddr,
@@ -645,7 +645,7 @@ qthread_gotlock_fill_inner(qthread_shepherd_t *shep,
   }
 } /*}}} */
 
-static QINLINE void qthread_gotlock_fill(qthread_shepherd_t *shep,
+static inline void qthread_gotlock_fill(qthread_shepherd_t *shep,
                                          qthread_addrstat_t *m,
                                          void *maddr) {
   qthread_addrres_t *tmp = NULL;
diff --git a/src/mpool.c b/src/mpool.c
index bc26992a..dd9a7ed5 100644
--- a/src/mpool.c
+++ b/src/mpool.c
@@ -108,7 +108,7 @@ void INTERNAL qt_mpool_subsystem_init(void) {
 }
 
 /* local funcs */
-static QINLINE void *
+static inline void *
 qt_mpool_internal_aligned_alloc(size_t alloc_size, size_t alignment) { /*{{{ */
   void *ret = qt_internal_aligned_alloc(alloc_size, alignment);
 
@@ -116,7 +116,7 @@ qt_mpool_internal_aligned_alloc(size_t alloc_size, size_t alignment) { /*{{{ */
   return ret;
 } /*}}} */
 
-static QINLINE void qt_mpool_internal_aligned_free(void *freeme,
+static inline void qt_mpool_internal_aligned_free(void *freeme,
                                                    size_t alignment) { /*{{{ */
   qt_internal_aligned_free(freeme, alignment);
 } /*}}} */
diff --git a/src/qloop.c b/src/qloop.c
index fa320501..c4a6cf04 100644
--- a/src/qloop.c
+++ b/src/qloop.c
@@ -41,7 +41,7 @@ struct qloop_wrapper_args {
   void *sync;
 };
 
-static QINLINE void qt_loop_balance_inner(size_t const start,
+static inline void qt_loop_balance_inner(size_t const start,
                                           size_t const stop,
                                           qt_loop_f const func,
                                           void *argptr,
@@ -320,7 +320,7 @@ void API_FUNC qt_loop_sinc(size_t start,
 
 #define QT_LOOP_BALANCE_SIMPLE (1 << 0)
 
-static QINLINE void qt_loop_balance_inner(size_t const start,
+static inline void qt_loop_balance_inner(size_t const start,
                                           size_t const stop,
                                           qt_loop_f const func,
                                           void *argptr,
@@ -561,7 +561,7 @@ static aligned_t qloopaccum_wrapper(void *restrict arg_void) { /*{{{ */
   return 0;
 } /*}}} */
 
-static QINLINE void qt_loopaccum_balance_inner(size_t const start,
+static inline void qt_loopaccum_balance_inner(size_t const start,
                                                size_t const stop,
                                                size_t const size,
                                                void *restrict out,
@@ -798,7 +798,7 @@ static int qqloop_get_iterations_guided(
   }
 } /*}}} */
 
-static QINLINE int qqloop_get_iterations_factored(
+static inline int qqloop_get_iterations_factored(
   qqloop_iteration_queue_t *restrict const iq,
   struct qqloop_static_args *restrict const sa,
   struct qqloop_wrapper_range *restrict const range) { /*{{{ */
@@ -856,7 +856,7 @@ static QINLINE int qqloop_get_iterations_factored(
   }
 } /*}}} */
 
-static QINLINE int qqloop_get_iterations_chunked(
+static inline int qqloop_get_iterations_chunked(
   qqloop_iteration_queue_t *restrict const iq,
   struct qqloop_static_args *restrict const sa,
   struct qqloop_wrapper_range *restrict const range) { /*{{{ */
@@ -878,7 +878,7 @@ static QINLINE int qqloop_get_iterations_chunked(
   return retval;
 } /*}}} */
 
-static QINLINE int qqloop_get_iterations_timed(
+static inline int qqloop_get_iterations_timed(
   qqloop_iteration_queue_t *restrict const iq,
   struct qqloop_static_args *restrict const sa,
   struct qqloop_wrapper_range *restrict const range) { /*{{{ */
@@ -936,7 +936,7 @@ static QINLINE int qqloop_get_iterations_timed(
   }
 } /*}}} */
 
-static QINLINE qqloop_iteration_queue_t *
+static inline qqloop_iteration_queue_t *
 qqloop_create_iq(size_t const startat,
                  size_t const stopat,
                  size_t const step,
@@ -971,7 +971,7 @@ qqloop_create_iq(size_t const startat,
   return iq;
 } /*}}} */
 
-static QINLINE void qqloop_destroy_iq(qqloop_iteration_queue_t *iq) { /*{{{ */
+static inline void qqloop_destroy_iq(qqloop_iteration_queue_t *iq) { /*{{{ */
   assert(iq);
   switch (iq->type) {
     case TIMED: {
diff --git a/src/qthread.c b/src/qthread.c
index 819dcacf..f5424d41 100644
--- a/src/qthread.c
+++ b/src/qthread.c
@@ -133,13 +133,13 @@ static void qthread_wrapper(unsigned int high, unsigned int low);
 static void qthread_wrapper(void *ptr);
 #endif
 
-static QINLINE void qthread_makecontext(qt_context_t *const c,
+static inline void qthread_makecontext(qt_context_t *const c,
                                         void *const stack,
                                         size_t const stacksize,
                                         void (*func)(void),
                                         void const *const arg,
                                         qt_context_t *const returnc);
-static QINLINE qthread_t *qthread_thread_new(qthread_f f,
+static inline qthread_t *qthread_thread_new(qthread_f f,
                                              void const *arg,
                                              size_t arg_size,
                                              void *ret,
@@ -175,7 +175,7 @@ qt_mpool generic_big_qthread_pool = NULL;
 
 #if defined(UNPOOLED_STACKS) || defined(UNPOOLED)
 #ifdef QTHREAD_GUARD_PAGES
-static QINLINE void *ALLOC_STACK(void) { /*{{{ */
+static inline void *ALLOC_STACK(void) { /*{{{ */
   if (GUARD_PAGES) {
     uint8_t *tmp = qt_internal_aligned_alloc(
       qlib->qthread_stack_size + sizeof(struct qthread_runtime_data_s) +
@@ -203,7 +203,7 @@ static QINLINE void *ALLOC_STACK(void) { /*{{{ */
   }
 } /*}}} */
 
-static QINLINE void FREE_STACK(void *t) { /*{{{ */
+static inline void FREE_STACK(void *t) { /*{{{ */
   if (GUARD_PAGES) {
     uint8_t *tmp = t;
 
@@ -234,7 +234,7 @@ static QINLINE void FREE_STACK(void *t) { /*{{{ */
 #else /* if defined(UNPOOLED_STACKS) || defined(UNPOOLED) */
 static qt_mpool generic_stack_pool = NULL;
 #ifdef QTHREAD_GUARD_PAGES
-static QINLINE void *ALLOC_STACK(void) { /*{{{ */
+static inline void *ALLOC_STACK(void) { /*{{{ */
   if (GUARD_PAGES) {
     uint8_t *tmp = qt_mpool_alloc(generic_stack_pool);
 
@@ -254,7 +254,7 @@ static QINLINE void *ALLOC_STACK(void) { /*{{{ */
   }
 } /*}}} */
 
-static QINLINE void FREE_STACK(void *t) { /*{{{ */
+static inline void FREE_STACK(void *t) { /*{{{ */
   if (GUARD_PAGES) {
     assert(t);
     t = (uint8_t *)t - getpagesize();
@@ -350,7 +350,7 @@ int qthread_library_initialized = 0;
 void *shep0arg = NULL;
 #endif
 
-static QINLINE void alloc_rdata(qthread_shepherd_t *me, qthread_t *t) { /*{{{*/
+static inline void alloc_rdata(qthread_shepherd_t *me, qthread_t *t) { /*{{{*/
   void *stack = NULL;
   struct qthread_runtime_data_s *rdata;
 
@@ -1348,7 +1348,7 @@ int API_FUNC qthread_initialize(void) { /*{{{ */
 /* This initializes a context (c) to run the function (func) with a single
  * argument (arg). This is just a wrapper around makecontext that isolates some
  * of the portability garbage. */
-static QINLINE void qthread_makecontext(qt_context_t *const c,
+static inline void qthread_makecontext(qt_context_t *const c,
                                         void *const stack,
                                         size_t const stacksize,
                                         void (*func)(void),
@@ -2130,7 +2130,7 @@ aligned_t API_FUNC *qthread_retloc(void) { /*{{{ */
 /************************************************************/
 /* functions to manage thread stack allocation/deallocation */
 /************************************************************/
-static QINLINE qthread_t *qthread_thread_new(qthread_f const f,
+static inline qthread_t *qthread_thread_new(qthread_f const f,
                                              void const *arg,
                                              size_t arg_size,
                                              void *ret,
diff --git a/src/syncvar.c b/src/syncvar.c
index 3bb69967..32762657 100644
--- a/src/syncvar.c
+++ b/src/syncvar.c
@@ -31,15 +31,15 @@
 #endif /* QTHREAD_USE_EUREKAS */
 
 /* Internal Prototypes */
-static QINLINE void qthread_syncvar_gotlock_fill(qthread_shepherd_t *shep,
+static inline void qthread_syncvar_gotlock_fill(qthread_shepherd_t *shep,
                                                  qthread_addrstat_t *m,
                                                  syncvar_t *maddr,
                                                  uint64_t const ret);
-static QINLINE void qthread_syncvar_gotlock_empty(qthread_shepherd_t *shep,
+static inline void qthread_syncvar_gotlock_empty(qthread_shepherd_t *shep,
                                                   qthread_addrstat_t *m,
                                                   syncvar_t *maddr,
                                                   uint64_t const ret);
-static QINLINE void qthread_syncvar_remove(void *maddr);
+static inline void qthread_syncvar_remove(void *maddr);
 
 /* Internal Structs */
 typedef struct {
@@ -1018,7 +1018,7 @@ int API_FUNC qthread_syncvar_readFE_nb(uint64_t *restrict dest,
   return QTHREAD_SUCCESS;
 } /*}}} */
 
-static QINLINE void qthread_syncvar_schedule(qthread_t *waiter,
+static inline void qthread_syncvar_schedule(qthread_t *waiter,
                                              qthread_shepherd_t *shep) { /*{{{*/
   assert(waiter);
   assert(shep);
@@ -1037,7 +1037,7 @@ static QINLINE void qthread_syncvar_schedule(qthread_t *waiter,
   }
 } /*}}}*/
 
-static QINLINE void qthread_syncvar_remove(void *maddr) { /*{{{*/
+static inline void qthread_syncvar_remove(void *maddr) { /*{{{*/
   int const lockbin = QTHREAD_CHOOSE_STRIPE(maddr);
   qthread_addrstat_t *m;
 
@@ -1114,7 +1114,7 @@ static QINLINE void qthread_syncvar_remove(void *maddr) { /*{{{*/
   }
 } /*}}}*/
 
-static QINLINE void qthread_syncvar_gotlock_empty(qthread_shepherd_t *shep,
+static inline void qthread_syncvar_gotlock_empty(qthread_shepherd_t *shep,
                                                   qthread_addrstat_t *m,
                                                   syncvar_t *maddr,
                                                   uint64_t const sf) { /*{{{ */
@@ -1145,7 +1145,7 @@ static QINLINE void qthread_syncvar_gotlock_empty(qthread_shepherd_t *shep,
   if (removeable) { qthread_syncvar_remove(maddr); }
 } /*}}} */
 
-static QINLINE void qthread_syncvar_gotlock_fill(qthread_shepherd_t *shep,
+static inline void qthread_syncvar_gotlock_fill(qthread_shepherd_t *shep,
                                                  qthread_addrstat_t *m,
                                                  syncvar_t *maddr,
                                                  uint64_t const ret) { /*{{{ */
diff --git a/src/threadqueues/distrib_threadqueues.c b/src/threadqueues/distrib_threadqueues.c
index 5b0b7a16..ec4d02c0 100644
--- a/src/threadqueues/distrib_threadqueues.c
+++ b/src/threadqueues/distrib_threadqueues.c
@@ -100,18 +100,18 @@ static void free_threadqueue(qt_threadqueue_t *t) {
   qt_mpool_free(generic_threadqueue_pools.queues, t);
 }
 
-static QINLINE qt_threadqueue_node_t *alloc_tqnode(void) {
+static inline qt_threadqueue_node_t *alloc_tqnode(void) {
   return (qt_threadqueue_node_t *)qt_mpool_alloc(
     generic_threadqueue_pools.nodes);
 }
 
-static QINLINE void free_tqnode(qt_threadqueue_node_t *t) {
+static inline void free_tqnode(qt_threadqueue_node_t *t) {
   qt_mpool_free(generic_threadqueue_pools.nodes, t);
 }
 
 extern qt_mpool generic_qthread_pool;
 
-static QINLINE void free_qthread(qthread_t *t) {
+static inline void free_qthread(qthread_t *t) {
   return qt_mpool_free(generic_qthread_pool, t);
 }
 
diff --git a/src/threadqueues/sherwood_threadqueues.c b/src/threadqueues/sherwood_threadqueues.c
index 64293747..86483252 100644
--- a/src/threadqueues/sherwood_threadqueues.c
+++ b/src/threadqueues/sherwood_threadqueues.c
@@ -198,7 +198,7 @@ qt_threadqueue_pools_t generic_threadqueue_pools;
   (qt_threadqueue_t *)qt_mpool_alloc(generic_threadqueue_pools.queues)
 #define FREE_THREADQUEUE(t) qt_mpool_free(generic_threadqueue_pools.queues, t)
 
-static QINLINE qt_threadqueue_node_t *ALLOC_TQNODE(void) { /*{{{ */
+static inline qt_threadqueue_node_t *ALLOC_TQNODE(void) { /*{{{ */
   return (qt_threadqueue_node_t *)qt_mpool_alloc(
     generic_threadqueue_pools.nodes);
 } /*}}} */
@@ -244,7 +244,7 @@ ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/
 /* functions to manage the thread queues */
 /*****************************************/
 
-static QINLINE qt_threadqueue_node_t *
+static inline qt_threadqueue_node_t *
 qthread_steal(qthread_shepherd_t *thief_shepherd);
 
 qt_threadqueue_t INTERNAL *qt_threadqueue_new(void) { /*{{{*/
@@ -306,7 +306,7 @@ void INTERNAL qt_threadqueue_free(qt_threadqueue_t *q) { /*{{{*/
   FREE_THREADQUEUE(q);
 } /*}}}*/
 
-static QINLINE int qt_threadqueue_isstealable(qthread_t *t) { /*{{{*/
+static inline int qt_threadqueue_isstealable(qthread_t *t) { /*{{{*/
   return ((atomic_load_explicit(&t->flags, memory_order_relaxed) &
            QTHREAD_UNSTEALABLE) == 0)
            ? 1
@@ -1148,7 +1148,7 @@ qt_threadqueue_dequeue_steal(qt_threadqueue_t *h,
 /*  Steal work from another shepherd's queue
  *  Returns the work stolen
  */
-static QINLINE qt_threadqueue_node_t *
+static inline qt_threadqueue_node_t *
 qthread_steal(qthread_shepherd_t *thief_shepherd) { /*{{{*/
   qt_threadqueue_node_t *stolen = NULL;
 

From 204869a82ee6b3b2999674b2c89439b205610596 Mon Sep 17 00:00:00 2001
From: Ian Henriksen <iandhenriksen@gmail.com>
Date: Thu, 19 Sep 2024 13:16:35 -0600
Subject: [PATCH 09/11] Remove old perl wrapper for the autotools build that's
 no longer used or supported.

---
 scripts/build.pl | 378 -----------------------------------------------
 1 file changed, 378 deletions(-)
 delete mode 100755 scripts/build.pl

diff --git a/scripts/build.pl b/scripts/build.pl
deleted file mode 100755
index 158eeb19..00000000
--- a/scripts/build.pl
+++ /dev/null
@@ -1,378 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use warnings;
-
-use Cwd qw/getcwd/;
-
-# Setup configuration options
-my @default_conf_names = ('compat', 'unpooled', 'opt', 'nemesis', 'lifo', 'mutexfifo', 'slowcontext', 'shep_profile', 'lock_profile', 'steal_profile', 'tc_profile', 'hi_st', 'hi_mt', 'dev');
-
-my %config = (
-    default       => '',
-    icc           => 'CC=icc CXX=icc',
-    oldgcc        => 'CC=gcc34 CXX=g++34',
-    compat        => 'CFLAGS="-m32" CXXFLAGS="-m32" LDFLAGS="-m32" CPPFLAGS="-m32"',
-    unpooled      => '--disable-pooled-memory',
-    opt           => 'CFLAGS="-O3" CXXFLAGS="-O3"',
-    nemesis       => '--with-scheduler=nemesis',
-    lifo          => '--with-scheduler=lifo',
-    mutexfifo     => '--with-scheduler=mutexfifo',
-    mtsfifo       => '--with-scheduler=mtsfifo',
-    nottingham    => '--with-scheduler=nottingham',
-    slowcontext   => '--disable-fastcontext',
-    shavit        => '--with-dict=shavit',
-    shep_profile  => '--enable-profiling=shepherd',
-    lock_profile  => '--enable-profiling=feb',
-    steal_profile => '--enable-profiling=steal',
-    tc_profile    => '--enable-profiling=threadc',
-    hi_st         => '--disable-hardware-atomics --with-scheduler=nemesis',
-    hi_mt         => '--disable-hardware-atomics --with-scheduler=sherwood',
-    dev           => 'CFLAGS="-g -O0" CXXFLAGS="-g -O0" --enable-debug --enable-guard-pages --enable-asserts --enable-static --disable-shared --enable-valgrind --disable-pooled-memory --enable-aligncheck',
-    debug         => 'CFLAGS="-g -O0" CXXFLAGS="-g -O0" --enable-debug --enable-static --disable-shared',
-    hwloc         => '--with-topology=hwloc',
-    sinc_stats    => '--enable-profiling=sincs',
-    oversubscription => '--enable-oversubscription',
-    guard_pages => '--enable-guard-pages',
-    chapel_default => '--enable-static --disable-shared --enable-condwait-queue --disable-spawn-cache --with-scheduler=nemesis',
-);
-
-my @summaries;
-
-# Collect command-line options
-my @conf_names;
-my @check_tests;
-my @user_configs;
-my $qt_src_dir = '';
-my $qt_bld_dir = '';
-my $qt_install_dir = '';
-my $repeat = 1;
-my $make_flags = '';
-my $force_configure = 0;
-my $force_clean = 0;
-my $print_info = 0;
-my $dry_run = 0;
-my $quietly = 0;
-my $need_help = 0;
-
-if (scalar @ARGV == 0) {
-    $need_help = 1;
-} else {
-    while (@ARGV) {
-        my $flag = shift @ARGV;
-    
-        if ($flag =~ m/--configs=(.*)/) {
-            @conf_names = split(/,/, $1);
-        } elsif ($flag =~ m/--with-config=(.*)/) {
-            push @user_configs, $1;
-        } elsif ($flag =~ m/--source-dir=(.*)/) {
-            $qt_src_dir = $1;
-        } elsif ($flag =~ m/--build-dir=(.*)/) {
-            $qt_bld_dir = $1;
-        } elsif ($flag =~ m/--install-dir=(.*)/) {
-            $qt_install_dir = $1;
-        } elsif ($flag =~ m/--repeat=(.*)/) {
-            $repeat = int($1);
-        } elsif ($flag =~ m/--make-flags=(.*)/) {
-            $make_flags = $1;
-        } elsif ($flag eq '--force-configure') {
-            $force_configure = 1;
-        } elsif ($flag eq '--force-clean') {
-            $force_clean = 1;
-        } elsif ($flag eq '--verbose' || $flag eq '-v') {
-            $print_info = 1;
-        } elsif ($flag eq '--dry-run') {
-            $dry_run = 1;
-        } elsif ($flag eq '--quietly') {
-            $quietly = 1;
-        } elsif ($flag =~ m/--tests=(.*)/) {
-            @check_tests = split(/,/,$1) unless ($1 eq 'all')
-        } elsif ($flag eq '--help' || $flag eq '-h') {
-            $need_help = 1;
-        } else {
-            print "Unsupported option '$flag'.\n";
-            exit(1);
-        }
-    }
-}
-
-# Aggregate configuration options
-while (@user_configs) {
-    my $user_config = pop @user_configs;
-    my $id = scalar @user_configs;
-    my $name = "userConfig$id";
-
-    push @conf_names, $name;
-    $config{$name} = $user_config;
-}
-if (scalar @conf_names == 0) { push @conf_names, 'default' };
-@conf_names = sort @conf_names;
-
-if ($need_help) {
-    print "usage: perl build.pl [options]\n";
-    print "Options:\n";
-    print "\t--configs=<config-name> comma-separated list of configurations.\n";
-    print "\t                        configuration options can be concatenated using\n";
-    print "\t                        the '+' operator (e.g., 'conf1+conf2').\n";
-    print "\t                        'all' may be used as an alias for all known\n";
-    print "\t                        configurations.\n";
-    print "\t--with-config=<string>  a user-specified string of configuration\n";
-    print "\t                        options. Essentially, this is used to define\n";
-    print "\t                        an unnamed 'config', whereas the previous\n";
-    print "\t                        uses pre-defined, named configs. This option\n";
-    print "\t                        can be used multiple times.\n";
-    print "\t--tests=<test-suite>    comma-separated list of test suites. Valid\n";
-    print "\t                        test suites are 'basics', 'features', and\n";
-    print "\t                        'stress'. The default is to run all three.\n";
-    print "\t--source-dir=<dir>      absolute path to Qthreads source.\n";
-    print "\t--build-dir=<dir>       absolute path to target build directory.\n";
-    print "\t--install-dir=<dir>     absolute path to target installation directory.\n";
-    print "\t--repeat=<n>            run `make check` <n> times per configuration.\n";
-    print "\t--make-flags=<options>  options to pass to make (e.g. '-j 4').\n";
-    print "\t--force-configure       run `configure` again.\n";
-    print "\t--force-clean           run `make clean` before rebuilding.\n";
-    print "\t--quietly               only report warnings, errors, and summary stats.\n";
-    print "\t--verbose\n";
-    print "\t--dry-run\n";
-    print "\t--help\n";
-
-    print "Configurations:\n";
-    my @names = sort keys %config;
-    for my $name (@names) {
-        print "\t$name:\n\t\t'$config{$name}'\n";
-    }
-
-    exit(1);
-}
-
-# Clean up and sanity check script options
-my $use_all = 0;
-foreach my $name (@conf_names) {
-    if ($name eq 'all') {
-        $use_all = 1;
-    } elsif (not exists $config{$name}) {
-        my @subconf_names = split(/\+/, $name);
-        my @subconf_profiles = ();
-        foreach my $subname (@subconf_names) {
-            if (exists $config{$subname}) {
-                push @subconf_profiles, $config{$subname};
-            } else {
-                print "Invalid configuration option '$subname'\n";
-                exit(1);
-            }
-        }
-
-        $config{$name} = join(' ', @subconf_profiles);
-    }
-}
-if ($use_all) {
-    @conf_names = @default_conf_names;
-}
-
-if ($qt_src_dir eq '') {
-    $qt_src_dir = getcwd;
-    if ((not -e "$qt_src_dir/README") || 
-        (my_system("grep -q 'QTHREADS!' $qt_src_dir/README") != 0)) {
-        print "Could not find the source directory; try using --source-dir.\n";
-        exit(1);
-    }
-} elsif (not $qt_src_dir =~ m/^\//) {
-    print "Specify full path for source dir '$qt_src_dir'\n";
-    exit(1);
-}
-
-if ($qt_bld_dir eq '') {
-    $qt_bld_dir = "$qt_src_dir/build";
-} elsif (not $qt_bld_dir =~ m/^\//) {
-    print "Specify full path for build dir '$qt_bld_dir'\n";
-    exit(1);
-}
-
-if ($qt_install_dir eq '') {
-} elsif (not $qt_install_dir =~ m/^\//) {
-    print "Specify full path for installation dir '$qt_install_dir'\n";
-    exit(1);
-} else {
-    foreach my $name (@conf_names) {
-        $config{$name} = join(' ', "--prefix=$qt_install_dir/$name");
-    }
-}
-
-# Optionally print information about the configuration
-if ($print_info) {
-    print "Configurations:   @conf_names\n";
-    print "Source directory: $qt_src_dir\n";
-    print "Build directory:  $qt_bld_dir\n";
-}
-if (not $qt_install_dir eq '') {
-    print "Install directory: $qt_install_dir\n";
-}
-
-# Run the test configurations
-foreach my $conf_name (@conf_names) {
-    run_tests($conf_name);
-}
-
-# Print a summary report
-print "\n" . '=' x 50;
-print "\nSummary:\n";
-foreach my $summary (@summaries) {
-    print "$summary\n";
-}
-print '=' x 50 . "\n";
-
-exit(0);
-
-################################################################################
-
-sub run_tests {
-    my $conf_name = $_[0];
-    my $test_dir = "$qt_bld_dir/$conf_name";
-    
-    print "\n### Test: $conf_name\n" unless $quietly;
-    print "### Build directory: $test_dir\n" unless $quietly;
-
-    # Setup for configuration
-    if (not -e "$qt_src_dir/configure") {
-        print "###\tGenerating configure script ...\n" if ($print_info);
-        my_system("cd $qt_src_dir && sh ./autogen.sh");
-    }
-    
-    # Setup build space
-    print "###\tConfiguring '$conf_name' ...\n" unless $quietly;
-    my $configure_log = "$test_dir/build.configure.log";
-    my_system("mkdir -p $test_dir") if (not -e $test_dir);
-    if (not $qt_install_dir eq '') {
-        my_system("mkdir -p $qt_install_dir/$conf_name") if (not -e "$qt_install_dir/$conf_name");
-    }
-    my_system("cd $test_dir && $qt_src_dir/configure $config{$conf_name} 2>&1 | tee $configure_log")
-        if ($force_configure || not -e "$test_dir/config.log");
-    print "### Log: $configure_log\n" unless $quietly;
-
-    # Build library
-    print "###\tBuilding '$conf_name' ...\n" unless $quietly;
-    my $build_log = "$test_dir/build.make.log";
-    my $build_command = "cd $test_dir";
-    $build_command .= " && make clean > /dev/null" if ($force_clean);
-    $build_command .= " && make $make_flags 2>&1 | tee $build_log";
-    if (not $qt_install_dir eq '') {
-        print "###\tInstalling '$conf_name' ...\n" unless $quietly;
-        $build_command .= " && make $make_flags install 2>&1 | tee $build_log";
-    }
-    my_system($build_command);
-    if (not $dry_run) {
-        my $build_warnings = qx/awk '\/warning:\/' $build_log/;
-        if (length $build_warnings > 0) {
-            print "Build warnings in config $conf_name! Check log and/or run again with --force-clean and --verbose for more information.\n";
-            print $build_warnings;
-        }
-        my $build_errors = qx/awk '\/error:\/' $build_log/;
-        if (length $build_errors > 0) {
-            print "Build error in config $conf_name! Check log and/or run again with --verbose for more information.\n";
-            print $build_errors;
-            exit(1);
-        }
-    }
-
-    # Build testsuite
-    my %failcounts;
-    my $failing_tests = 0;
-    my $passing_tests = 0;
-    my $pass = 1;
-    while ($pass <= $repeat) {
-        print "###\tBuilding and testing '$conf_name' pass $pass ...\n"
-            unless $quietly;
-        my $results_log = "$test_dir/build.$pass.results.log";
-        print "### Log: $results_log\n" unless $quietly;
-        print "### Results for '$conf_name'\n" unless $quietly;
-        my $banner = '=' x 50;
-        print "$banner\n" unless $quietly;
-
-        my @make_test_suites = ('basics', 'features', 'stress');
-        if (scalar @check_tests == 0) { @check_tests = @make_test_suites};
-        foreach my $make_test_suite (@check_tests) {
-            my $check_command = "cd $test_dir";
-            $check_command .= " && make clean > /dev/null" if ($force_clean);
-            $check_command .= " && make $make_flags -C test/$make_test_suite check 2>&1 | tee $results_log";
-            my_system($check_command);
-            if (not $dry_run) {
-                my $check_warnings = qx/awk '\/warning:\/' $results_log/;
-                if (length $check_warnings > 0) {
-                    print "Build warnings in config $conf_name! Check log and/or run again with --force-clean and --verbose for more information.\n";
-                    print $check_warnings;
-                }
-                my $check_errors = qx/awk '\/error:\/' $results_log/;
-                if (length $check_errors > 0) {
-                    print "Build error in config $conf_name! Check log and/or run again with --verbose for more information.\n";
-                    print $check_errors;
-                    exit(1);
-                }
-
-                # Display filtered results
-                my $digest = qx/grep 'tests passed' $results_log/;
-                my $digest_msg = '';
-                if ($digest eq '') {
-                    $digest = qx/grep '# PASS:' $results_log/;
-                }
-                if ($digest eq '') {
-                    $digest = qx/grep 'tests failed' $results_log/; chomp($digest);
-                    $digest =~ /([0-9]+) of ([0-9]+) tests failed/;
-                    $failing_tests += $1;
-                    $passing_tests += $2 - $1;
-                    my $fails = qx/awk '\/FAIL\/{print \$2}' $results_log/;
-                    my $fail_list .= join(',', split(/\n/, $fails));
-                    foreach my $test (split(/\n/, $fails)) {
-                        $failcounts{$test} ++;
-                    }
-                    $digest_msg = $failing_tests . ' tests failed';
-                    $digest_msg .= " ($fail_list)";
-                } else {
-                    chomp $digest;
-                    $digest = qx/grep 'All .* tests passed' $results_log/;
-                    if ($digest eq '') {
-                        $digest = qx/grep 'TOTAL:' $results_log/;
-                        $digest =~ /TOTAL: ([0-9]+)/;
-                        $passing_tests += $1;
-                    } else {
-                        $digest =~ /All ([0-9]+) tests passed/;
-                        $passing_tests += $1;
-                    }
-                    $digest_msg = $passing_tests . ' tests passed';
-                }
-                print "$digest_msg - $make_test_suite\n" unless $quietly;
-            }
-        }
-        print "$banner\n" unless $quietly;
-
-        $pass++;
-    }
-    if (not $dry_run) {
-        my $summary = sprintf("%17s: ", $conf_name);
-        if ($failing_tests eq 0) {
-            $summary .= "All $passing_tests tests passed";
-        } elsif ($passing_tests eq 0) {
-            $summary .= "All $failing_tests tests FAILED!!!";
-        } else {
-            $summary .= "$passing_tests test".(($passing_tests!=1)?"s":"")." passed, ";
-            $summary .= "$failing_tests test".(($failing_tests!=1)?"s":"")." failed (";
-            foreach my $test (keys(%failcounts)) {
-                $summary .= "$test:$failcounts{$test} ";
-            }
-            chop($summary);
-            $summary .= ")";
-        }
-        push @summaries, $summary;
-    }
-}
-
-sub my_system {
-    my $command = $_[0];
-
-    $command .= " > /dev/null" if (not $print_info);
-    print "\t\$ $command\n" if ($print_info);
-
-    my $status = system($command) if (not $dry_run);
-
-    return $status;
-}
-# vim:expandtab

From e00942e77d3b0ea77ee9bf7206bf476a4d5fb5ed Mon Sep 17 00:00:00 2001
From: Ian Henriksen <iandhenriksen@gmail.com>
Date: Fri, 20 Sep 2024 13:19:14 -0600
Subject: [PATCH 10/11] Get rid of option to disable hardware atomics. Also get
 rid of detection for different sizes of atomic intrinsics. All the compilers
 we currently use support them and we'll be replacing them with C11 atomics
 soon anyway. Also prefer intrinsics to our hand-implemented versions since
 our versions will be removed soon too.

---
 config/qthread_check_atomics.m4 | 197 --------------------------------
 configure.ac                    |  63 ++--------
 include/qt_atomics.h            |  95 +--------------
 include/qt_shepherd_innards.h   |   7 --
 include/qthread/common.h.in     |  16 +--
 include/qthread/qthread.h       |  69 +++--------
 include/qthread_innards.h       |  14 ---
 src/Makefile.am                 |   4 -
 src/compat_atomics.c            | 116 -------------------
 src/feb.c                       |  18 ---
 src/qthread.c                   |  59 ----------
 src/syncvar.c                   |   3 -
 12 files changed, 34 insertions(+), 627 deletions(-)
 delete mode 100644 config/qthread_check_atomics.m4
 delete mode 100644 src/compat_atomics.c

diff --git a/config/qthread_check_atomics.m4 b/config/qthread_check_atomics.m4
deleted file mode 100644
index cd0628a8..00000000
--- a/config/qthread_check_atomics.m4
+++ /dev/null
@@ -1,197 +0,0 @@
-# -*- Autoconf -*-
-#
-# Copyright (c)      2008  Sandia Corporation
-#
-
-# QTHREAD_CHECK_ATOMICS([action-if-found], [action-if-not-found])
-# ------------------------------------------------------------------------------
-AC_DEFUN([QTHREAD_CHECK_ATOMICS], [
-AC_REQUIRE([QTHREAD_DETECT_COMPILER_TYPE])
-AC_ARG_ENABLE([builtin-atomics],
-     [AS_HELP_STRING([--disable-builtin-atomics],
-	                 [force the use of inline-assembly (if possible) rather than compiler-builtins for atomics. This is useful for working around some compiler bugs; normally, it's preferable to use compiler builtins.])])
-AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-32],
-  [qthread_cv_atomic_CAS32],
-  [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#include <stdlib.h>
-#include <stdint.h> /* for uint32_t */
-
-int main(void)
-{
-uint32_t bar=1, old=1, new=2;
-uint32_t foo = __sync_val_compare_and_swap(&bar, old, new);
-return (int)foo;
-}]])],
-		[qthread_cv_atomic_CAS32="yes"],
-		[qthread_cv_atomic_CAS32="no"])])
-AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-64],
-  [qthread_cv_atomic_CAS64],
-  [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#include <stdlib.h>
-#include <stdint.h> /* for uint64_t */
-
-int main(void)
-{
-uint64_t bar=1, old=1, new=2;
-uint64_t foo = __sync_val_compare_and_swap(&bar, old, new);
-return foo;
-}]])],
-		[qthread_cv_atomic_CAS64="yes"],
-		[qthread_cv_atomic_CAS64="no"])])
-AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-ptr],
-  [qthread_cv_atomic_CASptr],
-  [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#include <stdlib.h>
-
-int main(void)
-{
-void *bar=(void*)1, *old=(void*)1, *new=(void*)2;
-void *foo = __sync_val_compare_and_swap(&bar, old, new);
-return (int)(long)foo;
-}]])],
-		[qthread_cv_atomic_CASptr="yes"],
-		[qthread_cv_atomic_CASptr="no"])])
-AS_IF([test "x$qthread_cv_atomic_CAS32" = "xyes" && test "x$qthread_cv_atomic_CAS64" = "xyes" && test "x$qthread_cv_atomic_CASptr" = "xyes"],
-	  [qthread_cv_atomic_CAS=yes],
-	  [qthread_cv_atomic_CAS=no])
-AC_ARG_ENABLE([cmpxchg16b],
-              [AS_HELP_STRING([--enable-cmpxchg16b],
-			                  [forces acceptance or rejection of the cmpxchg16b instruction; useful primarily for cross-compiling])])
-AC_CACHE_CHECK([whether the compiler supports CMPXCHG16B],
-  [qthread_cv_cmpxchg16b],
-  [AS_IF([test "x$qthread_cv_asm_arch" = xAMD64],
-         [AC_RUN_IFELSE([AC_LANG_SOURCE([[
-#include <stdint.h> /* for uint64_t and intptr_t (C99) */
-struct m128 {
-uint64_t a,b;
-};
-int main(void)
-{
-char blob[sizeof(struct m128)*4];
-intptr_t b2 = (intptr_t)blob;
-struct m128 *one, *two, *three;
-if (b2 & 0xf) { // fix alignment
-b2 += 0xf;
-b2 -= (b2 & 0xf);
-}
-one = (struct m128*)b2;
-two = one+1;
-three = two+1;
-one->a = 1;
-one->b = 2;
-two->a = 3;
-two->b = 4;
-three->a = 5;
-three->b = 6;
-__asm__ __volatile__ ("lock cmpxchg16b %2"
-:"=a"(three->a),"=d"(three->b),"+m"(*two)
-:"a"(two->a),"d"(two->b),"b"(one->a),"c"(one->b)
-:"cc", "memory");
-if (three->a != 3) {
-return -1;
-} else {
-return 0;
-}
-}]])],
-                        [qthread_cv_cmpxchg16b="yes"],
-	                    [qthread_cv_cmpxchg16b="no"],
-	                    [AS_IF([test "x$enable_cmpxchg16b" = x],
-	                           [case "$host" in # for vim: ( (
-		                         x86_64-*) qthread_cv_cmpxchg16b="yes" ;;
-			                     *) qthread_cv_cmpxchg16b="no" ;;
-			                    esac],
-		                       [qthread_cv_cmpxchg16b="$enable_cmpxchg16b"])])],
-         [qthread_cv_cmpxchg16b="no"])])
-qthread_cv_atomic_CAS128="$qthread_cv_cmpxchg16b"
-AC_CACHE_CHECK([whether compiler supports builtin atomic incr],
-  [qthread_cv_atomic_incr],
-  [AS_IF([test "$1" -eq 8],
-         [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#include <stdlib.h>
-#include <stdint.h> /* for uint64_t */
-
-int main(void)
-{
-uint64_t bar=1;
-uint64_t foo = __sync_fetch_and_add(&bar, 1);
-return foo;
-}]])],
-		   [qthread_cv_atomic_incr="yes"],
-		   [qthread_cv_atomic_incr="no"])],
-         [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#include <stdlib.h>
-#include <stdint.h> /* for uint32_t */
-
-int main(void)
-{
-uint32_t bar=1;
-uint32_t foo = __sync_fetch_and_add(&bar, 1);
-return foo;
-}]])],
-		   [qthread_cv_atomic_incr="yes"],
-		   [qthread_cv_atomic_incr="no"])])
-   ])
-AS_IF([test "$qthread_cv_atomic_incr" = "yes"],
-	  [AC_CACHE_CHECK([whether builtin atomic increment works correctly],
-	      [qt_cv_atomic_incr_works],
-		  [AS_IF([test "$1" -eq 8],
-         [AC_RUN_IFELSE([AC_LANG_SOURCE([[
-#include <stdlib.h>
-#include <stdint.h> /* for uint64_t */
-
-int main(int argc, char *argv[])
-{
-uint64_t master = 0;
-if ((__sync_fetch_and_add(&master, 1) != 0) || (master != 1)) {
-  return -1;
-}
-master = 0xFFFFFFFF;
-if ((__sync_fetch_and_add(&master, 1) != 0xFFFFFFFF) ||
-    (master != 0x100000000ULL)) {
-	return -2;
-}
-master = 0;
-if ((__sync_fetch_and_add(&master, 0x100000000ULL) != 0) ||
-    (master != 0x100000000ULL)) {
-	return -3;
-}
-master = 0;
-__sync_fetch_and_add(&master, 0x100000000ULL);
-if (master != 0x100000000ULL) {
-	return -4;
-}
-return 0;
-}]])],
-		   [qt_cv_atomic_incr_works="yes"],
-		   [qt_cv_atomic_incr_works="no"],
-		   [qt_cv_atomic_incr_works="assuming yes"])],
-         [AC_RUN_IFELSE([AC_LANG_SOURCE([[
-#include <stdlib.h>
-#include <stdint.h> /* for uint32_t */
-
-int main(void)
-{
-uint64_t master = 0;
-if ((__sync_fetch_and_add(&master, 1) != 0) || (master != 1)) {
-  return -1;
-}
-return 0;
-}]])],
-		   [qt_cv_atomic_incr_works="yes"],
-		   [qt_cv_atomic_incr_works="no"],
-		   [qt_cv_atomic_incr_works="assuming yes"])])
-   ])])
-AS_IF([test "x$qthread_cv_atomic_CASptr" = "xyes"],
-      [AC_DEFINE([QTHREAD_ATOMIC_CAS_PTR],[1],
-	  	[if the compiler supports __sync_val_compare_and_swap on pointers])])
-AS_IF([test "x$qthread_cv_atomic_CAS32" = "xyes"],
-      [AC_DEFINE([QTHREAD_ATOMIC_CAS32],[1],
-	  	[if the compiler supports __sync_val_compare_and_swap on 32-bit ints])])
-AS_IF([test "x$qthread_cv_atomic_CAS64" = "xyes"],
-      [AC_DEFINE([QTHREAD_ATOMIC_CAS64],[1],
-	  	[if the compiler supports __sync_val_compare_and_swap on 64-bit ints])])
-AS_IF([test "x$qthread_cv_atomic_CAS" = "xyes"],
-	[AC_DEFINE([QTHREAD_ATOMIC_CAS],[1],[if the compiler supports __sync_val_compare_and_swap])])
-AS_IF([test "$qthread_cv_atomic_incr" = "yes" -a "$qt_cv_atomic_incr_works" != "no"],
-	[AC_DEFINE([QTHREAD_ATOMIC_INCR],[1],[if the compiler supports __sync_fetch_and_add])])
-])
diff --git a/configure.ac b/configure.ac
index 8abe4aab..ec2bee63 100644
--- a/configure.ac
+++ b/configure.ac
@@ -39,15 +39,6 @@ AC_ARG_ENABLE([debugging],
               [AS_HELP_STRING([--enable-debugging],
                               [turns off optimization and turns on debug flags])])
 
-AC_ARG_ENABLE([hardware-atomics],
-              [AS_HELP_STRING([--disable-hardware-atomics],
-                              [use mutexes to do the internal atomics;
-                               compatible with more compilers and more
-                               platforms, but slow.])])
-AC_ARG_ENABLE([hardware-increments],[],
-              [AS_IF([test "x$enable_hardware_atomics" != x],
-                     [enable_hardware_atomics="$enable_hardware_increments"])])
-
 AC_ARG_ENABLE([hardware-timer],
               [AS_HELP_STRING([--disable-hardware-timer],
                               [force the use of gettimeofday even if there is a
@@ -434,11 +425,6 @@ AS_IF([test "x$enable_picky" = xyes],
          esac])
 
 QTHREAD_CHECK_ASSEMBLY([have_assembly=1], [have_assembly=0])
-case "$qthread_cv_asm_arch" in
-    POWERPC32)
-    compile_compat_atomic=yes
-    ;;
-esac
 case "$qthread_cv_asm_arch" in
     POWERPC*)
     AS_IF([test "x$qthread_cv_c_compiler_type" = "xApple-GNU4.0"],
@@ -559,24 +545,8 @@ case "$qthread_cv_c_compiler_type" in
     ;;
 esac
 
-# Figure out whether the compiler has builtin atomic operations
-AS_IF([test "x$enable_hardware_atomics" != xno],
-      [QTHREAD_CHECK_ATOMICS($sizeof_aligned_t)])
-
 QTHREAD_BUILTIN_SYNCHRONIZE
 
-AS_IF([test "x$have_assembly" = "x0" -a "x$qthread_cv_atomic_CAS32" = "xno" -a "x$qthread_cv_atomic_CAS64" = "xno" -a "x$qthread_cv_atomic_incr" = "xno"],
-          [AC_MSG_NOTICE(Compiling on a compiler without inline assembly support and without builtin atomics. This will be slow!)
-           AS_IF([test "x$enable_hardware_atomics" = x],
-                     [AC_MSG_NOTICE(turning on software increments ($have_assembly))
-                      enable_hardware_increments=no])
-           AS_IF([test "x$enable_hardware_atomics" = "xyes"],
-                     [AC_MSG_ERROR([No assembly available and software increments disabled.])])],
-      [AS_IF([test "x$enable_hardware_atomics" = x], [enable_hardware_atomics="yes"])])
-
-AS_IF([test "x$enable_hardware_atomics" = "xno"],
-      [compile_compat_atomic=yes
-       AC_DEFINE([QTHREAD_MUTEX_INCREMENT], [1], [Use mutexes instead of assembly for atomic increment])])
 AS_IF([test "x$enable_guard_pages" = "xyes"],
       [AC_DEFINE([QTHREAD_GUARD_PAGES], [1], [Use guard pages to detect stack overruns])],
       [enable_guard_pages="no"])
@@ -668,16 +638,10 @@ AS_IF([test "x$enable_internal_spinlock" = "x"],
        esac])
 
 
-AS_IF([test "x$enable_hardware_atomics" = "xno"],
-      [AS_IF([test "x$with_scheduler" != "xsherwood"],
-             [with_scheduler="sherwood"
-              AC_MSG_WARN([Forcing scheduler to be sherwood, since hardware atomic support is lacking.])])
-       AS_IF([test "x$enable_lf_fegs" = "xyes"],
-             [AC_MSG_ERROR([FEBs cannot use a lock-free hash table, since hardware atomic support is lacking.])])],
-      [AS_IF([test "x$enable_internal_spinlock" = x],
-             [enable_internal_spinlock=yes])
-       AS_IF([test "x$enable_internal_spinlock" = xyes],
-             [AC_DEFINE([USE_INTERNAL_SPINLOCK], [1], [Use Porterfield spinlock])])])
+AS_IF([test "x$enable_internal_spinlock" = x],
+      [enable_internal_spinlock=yes])
+AS_IF([test "x$enable_internal_spinlock" = xyes],
+      [AC_DEFINE([USE_INTERNAL_SPINLOCK], [1], [Use Porterfield spinlock])])
 
 AS_IF([test "x$enable_steal_profiling" = xyes],
       [AC_DEFINE([STEAL_PROFILE], [1], [Support dynamic profile of steal infomation])],
@@ -871,7 +835,6 @@ AM_CONDITIONAL([QTHREAD_TIMER_TYPE_GETTIME], [test "x$qthread_timer_type" = "xcl
 AM_CONDITIONAL([QTHREAD_TIMER_TYPE_MACH], [test "x$qthread_timer_type" = "xmach"])
 AM_CONDITIONAL([QTHREAD_TIMER_TYPE_GETHRTIME], [test "x$qthread_timer_type" = "xgethrtime"])
 AM_CONDITIONAL([QTHREAD_TIMER_TYPE_GETTIMEOFDAY], [test "x$qthread_timer_type" = "xgettimeofday"])
-AM_CONDITIONAL([COMPILE_COMPAT_ATOMIC], [test "x$compile_compat_atomic" = "xyes"])
 AM_CONDITIONAL([COMPILE_SPAWNCACHE], [test "x$enable_spawn_cache" = "xyes"])
 AM_CONDITIONAL([COMPILE_EUREKAS], [test "x$enable_eurekas" = "xyes"])
 AM_CONDITIONAL([HAVE_GUARD_PAGES], [test "x$enable_guard_pages" = "xyes"])
@@ -948,17 +911,13 @@ AS_IF([test "x$enable_spawn_cache" = "xyes"],
       [AS_IF([test "x$pool_string" != "x"],
              [pool_string="$pool_string, spawns"],
              [pool_string="spawns"])])
-AS_IF([test "x$enable_hardware_atomics" = "xno"],
-      [AS_IF([test "x$ac_cv_func_pthread_spin_init" = "xyes"],
-             [incr_string="Spin (software)"],
-             [incr_string="Mutex (software)"])],
-      [AS_IF([test "x$qthread_cv_atomic_incr" = "xyes" -a "x$qthread_cv_atomic_CAS" = "xyes"],
-             [incr_string="Compiler Builtin (both)"],
-             [AS_IF([test "x$qthread_cv_atomic_incr" = "xyes"],
-                    [incr_string="Compiler Builtin/$qthread_cv_asm_arch"],
-                    [AS_IF([test "x$qthread_cv_atomic_CAS" = "xyes"],
-                           [incr_string="$qthread_cv_asm_arch/Compiler Builtin"],
-                           [incr_string="$qthread_cv_asm_arch"])])])])
+AS_IF([test "x$qthread_cv_atomic_incr" = "xyes" -a "x$qthread_cv_atomic_CAS" = "xyes"],
+      [incr_string="Compiler Builtin (both)"],
+      [AS_IF([test "x$qthread_cv_atomic_incr" = "xyes"],
+             [incr_string="Compiler Builtin/$qthread_cv_asm_arch"],
+             [AS_IF([test "x$qthread_cv_atomic_CAS" = "xyes"],
+                    [incr_string="$qthread_cv_asm_arch/Compiler Builtin"],
+                    [incr_string="$qthread_cv_asm_arch"])])])
 AS_IF([test "x$enable_lf_febs" = xno],
       [feb_string="lock-based hash"],
       [feb_string="lock-free"])
diff --git a/include/qt_atomics.h b/include/qt_atomics.h
index 0a97aae8..4b3ed715 100644
--- a/include/qt_atomics.h
+++ b/include/qt_atomics.h
@@ -138,7 +138,7 @@ extern pthread_mutexattr_t _fastlock_attr;
  * !defined(QTHREAD_ATOMIC_INCR).
  */
 #if defined(USE_INTERNAL_SPINLOCK) && USE_INTERNAL_SPINLOCK &&               \
-  defined(QTHREAD_ATOMIC_INCR) && !defined(QTHREAD_MUTEX_INCREMENT)
+  defined(QTHREAD_ATOMIC_INCR)
 
 #define QTHREAD_TRYLOCK_TYPE qt_spin_trylock_t
 #define QTHREAD_TRYLOCK_INIT(x)                                                \
@@ -303,52 +303,7 @@ extern pthread_mutexattr_t _fastlock_attr;
     t.tv_nsec -= ((t.tv_nsec >= 1000000000) ? 1000000000 : 0);                 \
     qassert(pthread_cond_timedwait(&(c), &(m), &t), 0);                        \
   } while (0)
-#ifdef QTHREAD_MUTEX_INCREMENT
-#define QTHREAD_CASLOCK(var)                                                   \
-  var;                                                                         \
-  QTHREAD_FASTLOCK_TYPE var##_caslock
-#define QTHREAD_CASLOCK_STATIC(var)                                            \
-  var;                                                                         \
-  static QTHREAD_FASTLOCK_TYPE var##_caslock
-#define QTHREAD_CASLOCK_EXPLICIT_DECL(name) QTHREAD_FASTLOCK_TYPE name;
-#define QTHREAD_CASLOCK_EXPLICIT_INIT(name) QTHREAD_FASTLOCK_INIT(name)
-#define QTHREAD_CASLOCK_INIT(var, i)                                           \
-  var = i;                                                                     \
-  QTHREAD_FASTLOCK_INIT(var##_caslock)
-#define QTHREAD_CASLOCK_DESTROY(var) QTHREAD_FASTLOCK_DESTROY(var##_caslock)
-#define QTHREAD_CASLOCK_READ(var)                                              \
-  (void *)qt_cas_read_ui((uintptr_t *)&(var), &(var##_caslock))
-#define QTHREAD_CASLOCK_READ_UI(var)                                           \
-  qt_cas_read_ui((uintptr_t *)&(var), &(var##_caslock))
-#define QT_CAS(var, oldv, newv)                                                \
-  qt_cas((void **)&(var), (void *)(oldv), (void *)(newv), &(var##_caslock))
-#define QT_CAS_(var, oldv, newv, caslock)                                      \
-  qt_cas((void **)&(var), (void *)(oldv), (void *)(newv), &(caslock))
-
-static inline void *qt_cas(void **const ptr,
-                            void *const oldv,
-                            void *const newv,
-                            QTHREAD_FASTLOCK_TYPE *lock) {
-  void *ret;
-
-  QTHREAD_FASTLOCK_LOCK(lock);
-  ret = *ptr;
-  if (*ptr == oldv) { *ptr = newv; }
-  QTHREAD_FASTLOCK_UNLOCK(lock);
-  return ret;
-}
-
-static inline uintptr_t qt_cas_read_ui(uintptr_t *const ptr,
-                                        QTHREAD_FASTLOCK_TYPE *mutex) {
-  uintptr_t ret;
 
-  QTHREAD_FASTLOCK_LOCK(mutex);
-  ret = *ptr;
-  QTHREAD_FASTLOCK_UNLOCK(mutex);
-  return ret;
-}
-
-#else /* ifdef QTHREAD_MUTEX_INCREMENT */
 #define QTHREAD_CASLOCK(var) (var)
 #define QTHREAD_CASLOCK_STATIC(var) (var)
 #define QTHREAD_CASLOCK_EXPLICIT_DECL(name)
@@ -423,55 +378,13 @@ qt_cas(void **const ptr, void *const oldv, void *const newv) { /*{{{*/
 } /*}}}*/
 
 #endif /* ATOMIC_CAS_PTR */
-#endif /* MUTEX_INCREMENT */
 
-#ifndef QTHREAD_MUTEX_INCREMENT
 #define qthread_internal_atomic_read_s(op, lock) (*op)
 #define qthread_internal_incr(op, lock, val) qthread_incr(op, val)
 #define qthread_internal_incr_s(op, lock, val) qthread_incr(op, val)
 #define qthread_internal_decr(op, lock) qthread_incr(op, -1)
 #define qthread_internal_incr_mod(op, m, lock) qthread_internal_incr_mod_(op, m)
 #define QTHREAD_OPTIONAL_LOCKARG
-#else
-#define qthread_internal_incr_mod(op, m, lock)                                 \
-  qthread_internal_incr_mod_(op, m, lock)
-#define QTHREAD_OPTIONAL_LOCKARG , QTHREAD_FASTLOCK_TYPE *lock
-
-static inline aligned_t qthread_internal_incr(aligned_t *operand,
-                                               QTHREAD_FASTLOCK_TYPE *lock,
-                                               int val) { /*{{{ */
-  aligned_t retval;
-
-  QTHREAD_FASTLOCK_LOCK(lock);
-  retval = *operand;
-  *operand += val;
-  QTHREAD_FASTLOCK_UNLOCK(lock);
-  return retval;
-} /*}}} */
-
-static inline saligned_t qthread_internal_incr_s(saligned_t *operand,
-                                                  QTHREAD_FASTLOCK_TYPE *lock,
-                                                  int val) { /*{{{ */
-  saligned_t retval;
-
-  QTHREAD_FASTLOCK_LOCK(lock);
-  retval = *operand;
-  *operand += val;
-  QTHREAD_FASTLOCK_UNLOCK(lock);
-  return retval;
-} /*}}} */
-
-static inline saligned_t qthread_internal_atomic_read_s(
-  saligned_t *operand, QTHREAD_FASTLOCK_TYPE *lock) { /*{{{ */
-  saligned_t retval;
-
-  QTHREAD_FASTLOCK_LOCK(lock);
-  retval = *operand;
-  QTHREAD_FASTLOCK_UNLOCK(lock);
-  return retval;
-} /*}}} */
-
-#endif /* ifndef QTHREAD_MUTEX_INCREMENT */
 
 static inline aligned_t qthread_internal_incr_mod_(
   aligned_t *operand,
@@ -628,12 +541,6 @@ static inline aligned_t qthread_internal_incr_mod_(
           ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) &&                     \
           (QTHREAD_BITS == 32)) */
 
-#elif defined(QTHREAD_MUTEX_INCREMENT)
-  QTHREAD_FASTLOCK_LOCK(lock);
-  retval = (*operand)++;
-  *operand *= (*operand < max);
-  QTHREAD_FASTLOCK_UNLOCK(lock);
-
 #elif QTHREAD_ATOMIC_CAS
   aligned_t oldval, newval;
 
diff --git a/include/qt_shepherd_innards.h b/include/qt_shepherd_innards.h
index 6b0eceef..bf2a6d9f 100644
--- a/include/qt_shepherd_innards.h
+++ b/include/qt_shepherd_innards.h
@@ -99,13 +99,6 @@ struct qthread_shepherd_s {
   size_t num_threads; /* number of threads handled */
 #endif
 #ifdef QTHREAD_FEB_PROFILING
-#ifdef QTHREAD_MUTEX_INCREMENT
-  qt_hash uniqueincraddrs; /* the unique addresses that are incremented */
-  double incr_maxtime;     /* maximum time spent in a single increment */
-  double incr_time;        /* total time spent incrementing */
-  size_t incr_count;       /* number of increments */
-#endif
-
   qt_hash uniquelockaddrs;   /* the unique addresses that are locked */
   double aquirelock_maxtime; /* max time spent aquiring locks */
   double aquirelock_time;    /* total time spent aquiring locks */
diff --git a/include/qthread/common.h.in b/include/qthread/common.h.in
index 83b097f6..29934d1a 100644
--- a/include/qthread/common.h.in
+++ b/include/qthread/common.h.in
@@ -8,7 +8,7 @@
 #define QTHREAD_COMMON_H
 
 /* Whether C compiler supports GCC style inline assembly */
-#undef HAVE_GCC_INLINE_ASSEMBLY
+#define HAVE_GCC_INLINE_ASSEMBLY
 
 /* if the compiler supports inline assembly, we can prevent reordering */
 #undef COMPILER_FENCE
@@ -16,10 +16,6 @@
 /* Architecture type of assembly to use */
 #undef QTHREAD_ASSEMBLY_ARCH
 
-/* use mutexes when incrementing, rather than architecture-specific assembly
-   */
-#undef QTHREAD_MUTEX_INCREMENT
-
 /* use inlined functions */
 #undef QTHREAD_INLINE
 
@@ -37,19 +33,19 @@
 #endif
 
 /* builtin cas supported */
-#undef QTHREAD_ATOMIC_CAS
+#define QTHREAD_ATOMIC_CAS 1
 
 /* if the compiler supports __sync_val_compare_and_swap on 32-bit ints */
-#undef QTHREAD_ATOMIC_CAS32
+#define QTHREAD_ATOMIC_CAS32 1
 
 /* if the compiler supports __sync_val_compare_and_swap on 64-bit ints */
-#undef QTHREAD_ATOMIC_CAS64
+#define QTHREAD_ATOMIC_CAS64 1
 
 /* if the compiler supports __sync_val_compare_and_swap on pointers */
-#undef QTHREAD_ATOMIC_CAS_PTR
+#define QTHREAD_ATOMIC_CAS_PTR 1
 
 /* builtin incr supported */
-#undef QTHREAD_ATOMIC_INCR
+#define QTHREAD_ATOMIC_INCR 1
 
 #ifdef __cplusplus
 #ifdef __GNUC__
diff --git a/include/qthread/qthread.h b/include/qthread/qthread.h
index e6a4b5ef..07b26675 100644
--- a/include/qthread/qthread.h
+++ b/include/qthread/qthread.h
@@ -675,16 +675,6 @@ int qthread_spinlocks_destroy(qthread_spinlock_t *a);
 int qthread_lock_init(aligned_t const *a, bool const is_recursive);
 int qthread_lock_destroy(aligned_t *a);
 
-#if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32
-uint32_t qthread_incr32_(uint32_t *, int32_t);
-uint64_t qthread_incr64_(uint64_t *, int64_t);
-float qthread_fincr_(float *, float);
-double qthread_dincr_(double *, double);
-uint32_t qthread_cas32_(uint32_t *, uint32_t, uint32_t);
-uint64_t qthread_cas64_(uint64_t *, uint64_t, uint64_t);
-#endif // if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH ==
-       // QTHREAD_POWERPC32)
-
 /* the following three functions implement variations on atomic increment. It
  * is done with architecture-specific assembly (on supported architectures,
  * when possible) and does NOT use FEB's or lock/unlock unless the architecture
@@ -693,10 +683,7 @@ uint64_t qthread_cas64_(uint64_t *, uint64_t, uint64_t);
  * *after* incrementing.
  */
 static inline float qthread_fincr(float *operand, float incr) { /*{{{ */
-#if defined(QTHREAD_MUTEX_INCREMENT)
-  return qthread_fincr_(operand, incr);
-
-#elif QTHREAD_ATOMIC_CAS && !defined(HAVE_GCC_INLINE_ASSEMBLY)
+#if QTHREAD_ATOMIC_CAS
   union {
     float f;
     uint32_t i;
@@ -713,7 +700,7 @@ static inline float qthread_fincr(float *operand, float incr) { /*{{{ */
 
 #elif !defined(HAVE_GCC_INLINE_ASSEMBLY)
 #error Qthreads requires either mutex increments, inline assembly, or compiler CAS builtins
-#else // if defined(QTHREAD_MUTEX_INCREMENT)
+#else // if QTHREAD_ATOMIC_CAS
 #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                            \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
   union {
@@ -803,15 +790,11 @@ static inline float qthread_fincr(float *operand, float incr) { /*{{{ */
 #error Unsupported assembly architecture for qthread_fincr
 #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||
        // (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
-#endif // if defined(QTHREAD_MUTEX_INCREMENT)
+#endif // if QTHREAD_ATOMIC_CAS
 } /*}}} */
 
 static inline double qthread_dincr(double *operand, double incr) { /*{{{ */
-#if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
-  return qthread_dincr_(operand, incr);
-
-#elif QTHREAD_ATOMIC_CAS && !defined(HAVE_GCC_INLINE_ASSEMBLY)
+#if QTHREAD_ATOMIC_CAS
   union {
     uint64_t i;
     double d;
@@ -828,8 +811,7 @@ static inline double qthread_dincr(double *operand, double incr) { /*{{{ */
 
 #elif !defined(HAVE_GCC_INLINE_ASSEMBLY)
 #error Qthreads requires either mutex increments, inline assembly, or compiler CAS builtins
-#else // if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH ==
-      // QTHREAD_POWERPC32)
+#else // if QTHREAD_ATOMIC_CAS
 #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
   uint64_t scratch_int;
   double incremented_value;
@@ -997,19 +979,15 @@ static inline double qthread_dincr(double *operand, double incr) { /*{{{ */
 #else // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
 #error Unimplemented assembly architecture for qthread_dincr
 #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
-#endif // if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH ==
-       // QTHREAD_POWERPC32)
+#endif // if QTHREAD_ATOMIC_CAS
 } /*}}} */
 
 static inline uint32_t qthread_incr32(uint32_t *operand,
                                        uint32_t incr) { /*{{{ */
-#ifdef QTHREAD_MUTEX_INCREMENT
-  return qthread_incr32_(operand, incr);
-
-#elif defined(QTHREAD_ATOMIC_INCR)
+#if defined(QTHREAD_ATOMIC_INCR)
   return __sync_fetch_and_add(operand, incr);
 
-#elif !defined(HAVE_GCC_INLINE_ASSEMBLY) && QTHREAD_ATOMIC_CAS
+#elif QTHREAD_ATOMIC_CAS
   uint32_t oldval, newval;
   do {
     oldval = *operand;
@@ -1020,7 +998,7 @@ static inline uint32_t qthread_incr32(uint32_t *operand,
 
 #elif !defined(HAVE_GCC_INLINE_ASSEMBLY)
 #error Qthreads requires either mutex increments, inline assembly, or compiler atomic builtins
-#else // ifdef QTHREAD_MUTEX_INCREMENT
+#else // if defined(QTHREAD_ATOMIC_INCR)
 #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                            \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
   uint32_t retval;
@@ -1052,19 +1030,15 @@ static inline uint32_t qthread_incr32(uint32_t *operand,
 #error Unimplemented assembly architecture for qthread_incr32
 #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||
        // (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
-#endif // ifdef QTHREAD_MUTEX_INCREMENT
+#endif // if defined(QTHREAD_ATOMIC_INCR)
 } /*}}} */
 
 static inline uint64_t qthread_incr64(uint64_t *operand,
                                        uint64_t incr) { /*{{{ */
-#if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
-  return qthread_incr64_(operand, incr);
-
-#elif defined(QTHREAD_ATOMIC_INCR)
+#if defined(QTHREAD_ATOMIC_INCR)
   return __sync_fetch_and_add(operand, incr);
 
-#elif !defined(HAVE_GCC_INLINE_ASSEMBLY) && QTHREAD_ATOMIC_CAS
+#elif QTHREAD_ATOMIC_CAS
   uint64_t oldval, newval;
   do {
     oldval = *operand;
@@ -1075,7 +1049,7 @@ static inline uint64_t qthread_incr64(uint64_t *operand,
 
 #elif !defined(HAVE_GCC_INLINE_ASSEMBLY)
 #error Qthreads requires either mutex increments, inline assembly, or compiler atomic builtins
-#else // if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32
+#else // if defined(QTHREAD_ATOMIC_CAS)
 #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
   uint64_t retval;
   uint64_t incrd = incrd; /* no initializing */
@@ -1167,7 +1141,7 @@ static inline uint64_t qthread_incr64(uint64_t *operand,
 #else // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
 #error Unimplemented assembly architecture for qthread_incr64
 #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
-#endif // if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32
+#endif // if defined(QTHREAD_ATOMIC_INCR)
 } /*}}} */
 
 static inline int64_t qthread_incr_xx(void *addr,
@@ -1185,15 +1159,10 @@ static inline int64_t qthread_incr_xx(void *addr,
 
 uint64_t qthread_syncvar_incrF(syncvar_t *restrict operand, uint64_t inc);
 
-#if !defined(QTHREAD_ATOMIC_CAS) || defined(QTHREAD_MUTEX_INCREMENT)
+#if !defined(QTHREAD_ATOMIC_CAS)
 static inline uint32_t qthread_cas32(uint32_t *operand,
                                       uint32_t oldval,
                                       uint32_t newval) { /*{{{ */
-#ifdef QTHREAD_MUTEX_INCREMENT // XXX: this is only valid if you don't read
-                               // *operand without the lock
-  return qthread_cas32_(operand, oldval, newval);
-
-#else
 #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                            \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
   uint32_t result;
@@ -1230,16 +1199,11 @@ static inline uint32_t qthread_cas32(uint32_t *operand,
 #error Unimplemented assembly architecture for qthread_cas32
 #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||
        // (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
-#endif // ifdef QTHREAD_MUTEX_INCREMENT
 } /*}}} */
 
 static inline uint64_t qthread_cas64(uint64_t *operand,
                                       uint64_t oldval,
                                       uint64_t newval) { /*{{{ */
-#ifdef QTHREAD_MUTEX_INCREMENT
-  return qthread_cas64_(operand, oldval, newval);
-
-#else
 #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
   uint64_t result;
   __asm__ __volatile__("A_%=:\n\t"
@@ -1322,7 +1286,6 @@ static inline uint64_t qthread_cas64(uint64_t *operand,
 #else // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
 #error Unimplemented assembly architecture for qthread_cas64
 #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
-#endif // ifdef QTHREAD_MUTEX_INCREMENT
 } /*}}} */
 
 static inline aligned_t qthread_cas_xx(aligned_t *addr,
@@ -1397,7 +1360,7 @@ Q_ENDCXX /* */
 
 #ifndef __cplusplus
 
-#if defined(QTHREAD_ATOMIC_INCR) && !defined(QTHREAD_MUTEX_INCREMENT)
+#if defined(QTHREAD_ATOMIC_INCR)
 #define qthread_incr(ADDR, INCVAL) __sync_fetch_and_add(ADDR, INCVAL)
 #else
 #define qthread_incr(ADDR, INCVAL)                                             \
diff --git a/include/qthread_innards.h b/include/qthread_innards.h
index 4c819e41..018c12f7 100644
--- a/include/qthread_innards.h
+++ b/include/qthread_innards.h
@@ -38,13 +38,7 @@ typedef struct uint64_strip_s {
 typedef struct qlib_s {
   unsigned int nshepherds;
   aligned_t nshepherds_active;
-#ifdef QTHREAD_MUTEX_INCREMENT
-  QTHREAD_FASTLOCK_TYPE nshepherds_active_lock;
-#endif
   aligned_t nworkers_active;
-#ifdef QTHREAD_MUTEX_INCREMENT
-  QTHREAD_FASTLOCK_TYPE nworkers_active_lock;
-#endif
   unsigned int nworkerspershep;
   struct qthread_shepherd_s *shepherds;
   qt_threadqueue_t **threadqueues;
@@ -109,14 +103,6 @@ typedef struct qlib_s {
   aligned_t sched_shepherd;
   QTHREAD_FASTLOCK_TYPE sched_shepherd_lock;
 
-#if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32
-  QTHREAD_FASTLOCK_TYPE *atomic_locks;
-#ifdef QTHREAD_COUNT_THREADS
-  aligned_t *atomic_stripes;
-  QTHREAD_FASTLOCK_TYPE *atomic_stripes_locks;
-#endif
-#endif
-
   /*AGG cost method, call method  and max cost
    * defined in qthreads or given by the user at qthread initialization
    */
diff --git a/src/Makefile.am b/src/Makefile.am
index 389e5556..0ccb1477 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -53,10 +53,6 @@ if COMPILE_EUREKAS
 libqthread_la_SOURCES += eurekas.c
 endif
 
-if COMPILE_COMPAT_ATOMIC
-libqthread_la_SOURCES += compat_atomics.c
-endif
-
 include qtimer/Makefile.inc
 include ds/Makefile.inc
 include patterns/Makefile.inc
diff --git a/src/compat_atomics.c b/src/compat_atomics.c
deleted file mode 100644
index 09b5eb3b..00000000
--- a/src/compat_atomics.c
+++ /dev/null
@@ -1,116 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <qthread/qthread.h>
-
-#include "qt_asserts.h"
-#include "qt_initialized.h"
-#include "qt_profiling.h"
-#include "qthread_innards.h"
-
-extern unsigned int QTHREAD_LOCKING_STRIPES;
-#define QTHREAD_CHOOSE_STRIPE(addr)                                            \
-  (((size_t)addr >> 4) & (QTHREAD_LOCKING_STRIPES - 1))
-
-#if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
-uint32_t qthread_incr32_(uint32_t *op, int32_t const incr) { /*{{{ */
-  unsigned int stripe = QTHREAD_CHOOSE_STRIPE(op);
-  uint32_t retval;
-
-  QTHREAD_FEB_TIMER_DECLARATION(incr);
-
-  assert(qthread_library_initialized);
-
-  QTHREAD_COUNT_THREADS_BINCOUNTER(atomic, stripe);
-  QTHREAD_FEB_UNIQUERECORD(incr, op, qthread_internal_self());
-  QTHREAD_FEB_TIMER_START(incr);
-  QTHREAD_FASTLOCK_LOCK(&(qlib->atomic_locks[stripe]));
-  retval = *op;
-  *op += incr;
-  QTHREAD_FASTLOCK_UNLOCK(&(qlib->atomic_locks[stripe]));
-  QTHREAD_FEB_TIMER_STOP(incr, qthread_internal_self());
-  return retval;
-} /*}}} */
-
-uint64_t qthread_incr64_(uint64_t *op, int64_t const incr) { /*{{{ */
-  unsigned int stripe = QTHREAD_CHOOSE_STRIPE(op);
-  uint64_t retval;
-
-  QTHREAD_FEB_TIMER_DECLARATION(incr);
-
-  assert(qthread_library_initialized);
-
-  QTHREAD_COUNT_THREADS_BINCOUNTER(atomic, stripe);
-  QTHREAD_FEB_UNIQUERECORD(incr, op, qthread_internal_self());
-  QTHREAD_FEB_TIMER_START(incr);
-  QTHREAD_FASTLOCK_LOCK(&(qlib->atomic_locks[stripe]));
-  retval = *op;
-  *op += incr;
-  QTHREAD_FASTLOCK_UNLOCK(&(qlib->atomic_locks[stripe]));
-  QTHREAD_FEB_TIMER_STOP(incr, qthread_internal_self());
-  return retval;
-} /*}}} */
-
-double qthread_dincr_(double *op, double const incr) { /*{{{ */
-  unsigned int stripe = QTHREAD_CHOOSE_STRIPE(op);
-  double retval;
-
-  assert(qthread_library_initialized);
-
-  QTHREAD_FASTLOCK_LOCK(&(qlib->atomic_locks[stripe]));
-  retval = *op;
-  *op += incr;
-  QTHREAD_FASTLOCK_UNLOCK(&(qlib->atomic_locks[stripe]));
-  return retval;
-} /*}}} */
-
-float qthread_fincr_(float *op, float const incr) { /*{{{ */
-  unsigned int stripe = QTHREAD_CHOOSE_STRIPE(op);
-  float retval;
-
-  assert(qthread_library_initialized);
-
-  QTHREAD_FASTLOCK_LOCK(&(qlib->atomic_locks[stripe]));
-  retval = *op;
-  *op += incr;
-  QTHREAD_FASTLOCK_UNLOCK(&(qlib->atomic_locks[stripe]));
-  return retval;
-} /*}}} */
-
-uint32_t qthread_cas32_(uint32_t *operand,
-                        uint32_t const oldval,
-                        uint32_t const newval) { /*{{{ */
-  uint32_t retval;
-  unsigned int stripe = QTHREAD_CHOOSE_STRIPE(operand);
-
-  assert(qthread_library_initialized);
-
-  QTHREAD_FASTLOCK_LOCK(&(qlib->atomic_locks[stripe]));
-  retval = *operand;
-  if (retval == oldval) { *operand = newval; }
-  QTHREAD_FASTLOCK_UNLOCK(&(qlib->atomic_locks[stripe]));
-  return retval;
-} /*}}} */
-
-uint64_t qthread_cas64_(uint64_t *operand,
-                        uint64_t const oldval,
-                        uint64_t const newval) { /*{{{ */
-  uint64_t retval;
-  unsigned int stripe = QTHREAD_CHOOSE_STRIPE(operand);
-
-  assert(qthread_library_initialized);
-
-  QTHREAD_FASTLOCK_LOCK(&(qlib->atomic_locks[stripe]));
-  retval = *operand;
-  if (retval == oldval) { *operand = newval; }
-  QTHREAD_FASTLOCK_UNLOCK(&(qlib->atomic_locks[stripe]));
-  return retval;
-} /*}}} */
-
-#else /* if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) */
-#error Building this file erroneously.
-#endif /* if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH ==     \
-          QTHREAD_POWERPC32) */
-/* vim:set expandtab: */
diff --git a/src/feb.c b/src/feb.c
index 7c9d8e2d..852e0626 100644
--- a/src/feb.c
+++ b/src/feb.c
@@ -38,9 +38,6 @@
 static qt_hash *FEBs;
 #ifdef QTHREAD_COUNT_THREADS
 aligned_t *febs_stripes;
-#ifdef QTHREAD_MUTEX_INCREMENT
-QTHREAD_FASTLOCK_TYPE *febs_stripes_locks;
-#endif
 #endif
 
 /********************************************************************
@@ -119,18 +116,11 @@ static void qt_feb_subsystem_shutdown(void) {
 #ifdef QTHREAD_COUNT_THREADS
     print_status(
       "bin %i used %u times for FEBs\n", i, (unsigned int)febs_stripes[i]);
-#ifdef QTHREAD_MUTEX_INCREMENT
-    QTHREAD_FASTLOCK_DESTROY(febs_stripes_locks[i]);
-#endif
 #endif
   }
   FREE(FEBs, sizeof(qt_hash) * QTHREAD_LOCKING_STRIPES);
 #ifdef QTHREAD_COUNT_THREADS
   FREE(febs_stripes, sizeof(aligned_t) * QTHREAD_LOCKING_STRIPES);
-#ifdef QTHREAD_MUTEX_INCREMENT
-  FREE(febs_stripes_locks,
-       sizeof(QTHREAD_FASTLOCK_TYPE) * QTHREAD_LOCKING_STRIPES);
-#endif
 #endif
 #if !defined(UNPOOLED_ADDRSTAT) && !defined(UNPOOLED)
   qt_mpool_destroy(generic_addrstat_pool);
@@ -156,18 +146,10 @@ void INTERNAL qt_feb_subsystem_init(uint_fast8_t need_sync) {
 #ifdef QTHREAD_COUNT_THREADS
   febs_stripes = MALLOC(sizeof(aligned_t) * QTHREAD_LOCKING_STRIPES);
   assert(febs_stripes);
-#ifdef QTHREAD_MUTEX_INCREMENT
-  febs_stripes_locks =
-    MALLOC(sizeof(QTHREAD_FASTLOCK_TYPE) * QTHREAD_LOCKING_STRIPES);
-  assert(febs_stripes_locks);
-#endif
 #endif /* ifdef QTHREAD_COUNT_THREADS */
   for (unsigned i = 0; i < QTHREAD_LOCKING_STRIPES; i++) {
 #ifdef QTHREAD_COUNT_THREADS
     febs_stripes[i] = 0;
-#ifdef QTHREAD_MUTEX_INCREMENT
-    QTHREAD_FASTLOCK_INIT(febs_stripes_locks[i]);
-#endif
 #endif
     FEBs[i] = qt_hash_create(need_sync);
     assert(FEBs[i]);
diff --git a/src/qthread.c b/src/qthread.c
index f5424d41..ee9fdd42 100644
--- a/src/qthread.c
+++ b/src/qthread.c
@@ -83,14 +83,6 @@
 #include "qt_output_macros.h"
 #include "qt_subsystems.h"
 
-#if !(defined(HAVE_GCC_INLINE_ASSEMBLY) &&                                     \
-      (QTHREAD_SIZEOF_ALIGNED_T == 4 ||                                        \
-       QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32)) &&                      \
-  !defined(QTHREAD_ATOMIC_CAS) && !defined(QTHREAD_MUTEX_INCREMENT)
-#warning QTHREAD_MUTEX_INCREMENT not defined. It probably should be.
-#define QTHREAD_MUTEX_INCREMENT 1
-#endif
-
 #ifdef QTHREAD_PERFORMANCE
 #define WKR_DBG 1
 #include "qthread/logging.h"
@@ -911,16 +903,6 @@ int API_FUNC qthread_initialize(void) { /*{{{ */
   qlib = (qlib_t)MALLOC(sizeof(struct qlib_s));
   qassert_ret(qlib, QTHREAD_MALLOC_ERROR);
 
-#if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
-  qlib->atomic_locks =
-    MALLOC(sizeof(QTHREAD_FASTLOCK_TYPE) * QTHREAD_LOCKING_STRIPES);
-  qassert_ret(qlib->atomic_locks, QTHREAD_MALLOC_ERROR);
-  for (i = 0; i < QTHREAD_LOCKING_STRIPES; i++) {
-    QTHREAD_FASTLOCK_INIT(qlib->atomic_locks[i]);
-  }
-#endif
-
   qt_internal_alignment_init();
   qt_hash_initialize_subsystem();
 
@@ -970,10 +952,6 @@ int API_FUNC qthread_initialize(void) { /*{{{ */
     (qt_threadqueue_t **)MALLOC(nshepherds * sizeof(qt_threadqueue_t *));
 #endif /* ifdef QTHREAD_LOCAL_PRIORITY */
   qassert_ret(qlib->shepherds, QTHREAD_MALLOC_ERROR);
-#ifdef QTHREAD_MUTEX_INCREMENT
-  QTHREAD_FASTLOCK_INIT(qlib->nshepherds_active_lock);
-  QTHREAD_FASTLOCK_INIT(qlib->nworkers_active_lock);
-#endif
 
   qt_mpool_subsystem_init();
 
@@ -1112,9 +1090,6 @@ int API_FUNC qthread_initialize(void) { /*{{{ */
     qlib->local_priority_queues[i] = qlib->shepherds[i].local_priority_queue;
 #endif /* ifdef QTHREAD_LOCAL_PRIORITY */
 #ifdef QTHREAD_FEB_PROFILING
-#ifdef QTHREAD_MUTEX_INCREMENT
-    qlib->shepherds[i].uniqueincraddrs = qt_hash_create(need_sync);
-#endif
     qlib->shepherds[i].uniquelockaddrs = qt_hash_create(need_sync);
     qlib->shepherds[i].uniquefebaddrs = qt_hash_create(need_sync);
 #endif
@@ -1701,11 +1676,6 @@ void API_FUNC qthread_finalize(void) { /*{{{ */
       shep->idle_maxtime);
 #endif
 #ifdef QTHREAD_FEB_PROFILING
-#ifdef QTHREAD_MUTEX_INCREMENT
-    QTHREAD_ACCUM_MAX(shep0->incr_maxtime, shep->incr_maxtime);
-    shep0->incr_time += shep->incr_time;
-    shep0->incr_count += shep->incr_count;
-#endif
     QTHREAD_ACCUM_MAX(shep0->aquirelock_maxtime, shep->aquirelock_maxtime);
     shep0->aquirelock_time += shep->aquirelock_time;
     shep0->aquirelock_count += shep->aquirelock_count;
@@ -1724,11 +1694,6 @@ void API_FUNC qthread_finalize(void) { /*{{{ */
     shep0->empty_time += shep->empty_time;
     shep0->empty_count += shep->empty_count;
     qthread_debug(CORE_DETAILS, "destroying hashes\n");
-#ifdef QTHREAD_MUTEX_INCREMENT
-    qt_hash_callback(
-      shep->uniqueincraddrs, qthread_unique_collect, shep0->uniqueincraddrs);
-    qt_hash_destroy(shep->uniqueincraddrs);
-#endif
     qt_hash_callback(
       shep->uniquelockaddrs, qthread_unique_collect, shep0->uniquelockaddrs);
     qt_hash_destroy(shep->uniquelockaddrs);
@@ -1751,15 +1716,6 @@ void API_FUNC qthread_finalize(void) { /*{{{ */
   }
 
 #ifdef QTHREAD_FEB_PROFILING
-#ifdef QTHREAD_MUTEX_INCREMENT
-  print_status(
-    "%llu increments performed (%ld unique), average %g secs, max %g secs\n",
-    (unsigned long long)shep0->incr_count,
-    qt_hash_count(shep0->uniqueincraddrs),
-    (shep0->incr_count == 0) ? 0 : (shep0->incr_time / shep0->incr_count),
-    shep0->incr_maxtime);
-  qt_hash_destroy(shep0->uniqueincraddrs);
-#endif
   print_status("%ld unique addresses used with FEB, blocked %g secs\n",
                qt_hash_count(shep0->uniquefebaddrs),
                (shep0->febblock_count == 0) ? 0 : shep0->febblock_time);
@@ -1787,16 +1743,6 @@ void API_FUNC qthread_finalize(void) { /*{{{ */
 #ifdef LOCK_FREE_FEBS
   extern unsigned int QTHREAD_LOCKING_STRIPES;
   QTHREAD_LOCKING_STRIPES = 1;
-#elif defined(QTHREAD_MUTEX_INCREMENT) ||                                      \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
-  extern unsigned int QTHREAD_LOCKING_STRIPES;
-  for (i = 0; i < QTHREAD_LOCKING_STRIPES; i++) {
-    QTHREAD_FASTLOCK_DESTROY(qlib->atomic_locks[i]);
-  }
-#endif
-#ifdef QTHREAD_MUTEX_INCREMENT
-  QTHREAD_FASTLOCK_DESTROY(qlib->nshepherds_active_lock);
-  QTHREAD_FASTLOCK_DESTROY(qlib->nworkers_active_lock);
 #endif
 #ifdef QTHREAD_COUNT_THREADS
   print_status("spawned %lu threads, max realized concurrency %lu, avg "
@@ -1838,11 +1784,6 @@ void API_FUNC qthread_finalize(void) { /*{{{ */
     tmp->func();
     FREE(tmp, sizeof(struct qt_cleanup_funcs_s));
   }
-#if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
-  FREE((void *)qlib->atomic_locks,
-       sizeof(QTHREAD_FASTLOCK_TYPE) * QTHREAD_LOCKING_STRIPES);
-#endif
 
   for (i = 0; i < qlib->nshepherds; ++i) {
     qthread_debug(
diff --git a/src/syncvar.c b/src/syncvar.c
index 32762657..f3df80fe 100644
--- a/src/syncvar.c
+++ b/src/syncvar.c
@@ -77,9 +77,6 @@ typedef struct {
 static qt_hash *syncvars;
 #ifdef QTHREAD_COUNT_THREADS
 extern aligned_t *febs_stripes;
-#ifdef QTHREAD_MUTEX_INCREMENT
-extern QTHREAD_FASTLOCK_TYPE *febs_stripes_locks;
-#endif
 #endif
 extern unsigned int QTHREAD_LOCKING_STRIPES;
 

From 0bf21a8186f7b799db4b3311adcc9595988dbea2 Mon Sep 17 00:00:00 2001
From: Ian Henriksen <iandhenriksen@gmail.com>
Date: Fri, 20 Sep 2024 13:23:23 -0600
Subject: [PATCH 11/11] Remove some unused defines from the generated header
 file.

---
 include/qthread/common.h.in | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/qthread/common.h.in b/include/qthread/common.h.in
index 29934d1a..aa1bc1ef 100644
--- a/include/qthread/common.h.in
+++ b/include/qthread/common.h.in
@@ -16,12 +16,6 @@
 /* Architecture type of assembly to use */
 #undef QTHREAD_ASSEMBLY_ARCH
 
-/* use inlined functions */
-#undef QTHREAD_INLINE
-
-/* if the compiler supports __attribute__((deprecated)) */
-#undef Q_DEPRECATED
-
 #ifndef __powerpc
 #define BITFIELD_ORDER_REVERSE
 #else