Attempt to fix flaky Abseil waiter/sleep tests

The existing implementation uses wall-clock time. However, wall clock can drastically differ from the internal system clock, because the system can be suspended and then resumed. We want to account for at least some kinds of suspensions that might occur during automated testing, such as VM suspension or hypervisor preemption ("steal time"). These are tricky cases, because the physical (host) CPU is still running -- just the logical (guest) virtual CPU isn't. Therefore, we need to ensure that our time measurements exclude elapsed host-only time. Unfortunately the correctness of a method depends on the nature & configuration of each VM and the guest. For example, it can depend whether RDTSC is virtualized, or on whether the host and guest support accounting for steal time. Windows, for example, appears to only support steal time measurements if the hypervisor is Hyper-V. Since this is all for the sake of testing, we use a simpler trick that we hope will work around the problem on our systems: we subtract the so-called "interrupt time bias" from the system uptime in Windows. The interrupt time bias includes sleep/hibernation time, and seems to advance during for VM suspensions as well, so it may take care of the problem. PiperOrigin-RevId: 675654840 Change-Id: I66150b18912175fa72609d3f137e3ea4fee8fc43
abseil · Sep 17, 2024 · abc9b91 · abc9b91
1 parent 9a18cc1
commit abc9b91
Showing 1 changed file with 52 additions and 12 deletions.
diff --git a/absl/synchronization/internal/waiter_test.cc b/absl/synchronization/internal/waiter_test.cc
@@ -32,6 +32,10 @@
 #include "absl/time/time.h"
 #include "gtest/gtest.h"
 
+#ifdef ABSL_INTERNAL_HAVE_WIN32_WAITER
+#include <windows.h>
+#endif
+
 // Test go/btm support by randomizing the value of clock_gettime() for
 // CLOCK_MONOTONIC. This works by overriding a weak symbol in glibc.
 // We should be resistant to this randomization when !SupportsSteadyClock().
@@ -53,6 +57,42 @@ extern "C" int clock_gettime(clockid_t c, struct timespec* ts) {
 }
 #endif
 
+#ifdef ABSL_INTERNAL_HAVE_WIN32_WAITER
+// Returns the "interrupt time bias" from KUSER_SHARED_DATA, which is in units
+// of 100ns.
+static uint64_t GetSuspendTime() {
+  return *reinterpret_cast<uint64_t volatile*>(
+      0x7FFE0000 /* KUSER_SHARED_DATA */ + 0x3B0);
+}
+
+// Like GetTickCount(), but excludes suspend time.
+static unsigned int GetTickCountExcludingSuspend() {
+  unsigned int result;
+  uint64_t prev_bias;
+  uint64_t bias = GetSuspendTime();
+  do {
+    prev_bias = bias;
+    result = GetTickCount();
+    bias = GetSuspendTime();
+  } while (bias != prev_bias);
+  return result - bias / 10000;
+}
+#endif
+
+struct BenchmarkTime {
+  absl::Time time;
+  absl::Time vtime;
+};
+
+static BenchmarkTime BenchmarkNow() {
+  absl::Time now = absl::Now();
+  absl::Time vnow = now;
+#ifdef ABSL_INTERNAL_HAVE_WIN32_WAITER
+  vnow = absl::UnixEpoch() + absl::Milliseconds(GetTickCountExcludingSuspend());
+#endif
+  return {now, vnow};
+}
+
 namespace {
 
 TEST(Waiter, PrintPlatformImplementation) {
@@ -86,10 +126,10 @@ TYPED_TEST_P(WaiterTest, WaitNoTimeout) {
     absl::SleepFor(absl::Seconds(1));
     waiter.Post();
   });
-  absl::Time start = absl::Now();
+  BenchmarkTime start = BenchmarkNow();
   EXPECT_TRUE(
       waiter.Wait(absl::synchronization_internal::KernelTimeout::Never()));
-  absl::Duration waited = absl::Now() - start;
+  absl::Duration waited = BenchmarkNow().vtime - start.vtime;
   EXPECT_GE(waited, WithTolerance(absl::Seconds(2)));
 }
 
@@ -103,10 +143,10 @@ TYPED_TEST_P(WaiterTest, WaitDurationWoken) {
     absl::SleepFor(absl::Milliseconds(500));
     waiter.Post();
   });
-  absl::Time start = absl::Now();
+  BenchmarkTime start = BenchmarkNow();
   EXPECT_TRUE(waiter.Wait(
       absl::synchronization_internal::KernelTimeout(absl::Seconds(10))));
-  absl::Duration waited = absl::Now() - start;
+  absl::Duration waited = BenchmarkNow().vtime - start.vtime;
   EXPECT_GE(waited, WithTolerance(absl::Milliseconds(500)));
   EXPECT_LT(waited, absl::Seconds(2));
 }
@@ -121,30 +161,30 @@ TYPED_TEST_P(WaiterTest, WaitTimeWoken) {
     absl::SleepFor(absl::Milliseconds(500));
     waiter.Post();
   });
-  absl::Time start = absl::Now();
+  BenchmarkTime start = BenchmarkNow();
   EXPECT_TRUE(waiter.Wait(absl::synchronization_internal::KernelTimeout(
-      start + absl::Seconds(10))));
-  absl::Duration waited = absl::Now() - start;
+      start.time + absl::Seconds(10))));
+  absl::Duration waited = BenchmarkNow().vtime - start.vtime;
   EXPECT_GE(waited, WithTolerance(absl::Milliseconds(500)));
   EXPECT_LT(waited, absl::Seconds(2));
 }
 
 TYPED_TEST_P(WaiterTest, WaitDurationReached) {
   TypeParam waiter;
-  absl::Time start = absl::Now();
+  BenchmarkTime start = BenchmarkNow();
   EXPECT_FALSE(waiter.Wait(
       absl::synchronization_internal::KernelTimeout(absl::Milliseconds(500))));
-  absl::Duration waited = absl::Now() - start;
+  absl::Duration waited = BenchmarkNow().vtime - start.vtime;
   EXPECT_GE(waited, WithTolerance(absl::Milliseconds(500)));
   EXPECT_LT(waited, absl::Seconds(1));
 }
 
 TYPED_TEST_P(WaiterTest, WaitTimeReached) {
   TypeParam waiter;
-  absl::Time start = absl::Now();
+  BenchmarkTime start = BenchmarkNow();
   EXPECT_FALSE(waiter.Wait(absl::synchronization_internal::KernelTimeout(
-      start + absl::Milliseconds(500))));
-  absl::Duration waited = absl::Now() - start;
+      start.time + absl::Milliseconds(500))));
+  absl::Duration waited = BenchmarkNow().vtime - start.vtime;
   EXPECT_GE(waited, WithTolerance(absl::Milliseconds(500)));
   EXPECT_LT(waited, absl::Seconds(1));
 }