Skip to content

Commit

Permalink
Attempt to fix flaky Abseil waiter/sleep tests
Browse files Browse the repository at this point in the history
The existing implementation uses wall-clock time. However, wall clock can drastically differ from the internal system clock, because the system can be suspended and then resumed.

We want to account for at least some kinds of suspensions that might occur during automated testing, such as VM suspension or hypervisor preemption ("steal time"). These are tricky cases, because the physical (host) CPU is still running -- just the logical (guest) virtual CPU isn't. Therefore, we need to ensure that our time measurements exclude elapsed host-only time.

Unfortunately the correctness of a method depends on the nature & configuration of each VM and the guest. For example, it can depend whether RDTSC is virtualized, or on whether the host and guest support accounting for steal time. Windows, for example, appears to only support steal time measurements if the hypervisor is Hyper-V.

Since this is all for the sake of testing, we use a simpler trick that we hope will work around the problem on our systems: we subtract the so-called "interrupt time bias" from the system uptime in Windows. The interrupt time bias includes sleep/hibernation time, and seems to advance during for VM suspensions as well, so it may take care of the problem.

PiperOrigin-RevId: 675654840
Change-Id: I66150b18912175fa72609d3f137e3ea4fee8fc43
  • Loading branch information
Abseil Team authored and copybara-github committed Sep 17, 2024
1 parent 9a18cc1 commit abc9b91
Showing 1 changed file with 52 additions and 12 deletions.
64 changes: 52 additions & 12 deletions absl/synchronization/internal/waiter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
#include "absl/time/time.h"
#include "gtest/gtest.h"

#ifdef ABSL_INTERNAL_HAVE_WIN32_WAITER
#include <windows.h>
#endif

// Test go/btm support by randomizing the value of clock_gettime() for
// CLOCK_MONOTONIC. This works by overriding a weak symbol in glibc.
// We should be resistant to this randomization when !SupportsSteadyClock().
Expand All @@ -53,6 +57,42 @@ extern "C" int clock_gettime(clockid_t c, struct timespec* ts) {
}
#endif

#ifdef ABSL_INTERNAL_HAVE_WIN32_WAITER
// Returns the "interrupt time bias" from KUSER_SHARED_DATA, which is in units
// of 100ns.
static uint64_t GetSuspendTime() {
return *reinterpret_cast<uint64_t volatile*>(
0x7FFE0000 /* KUSER_SHARED_DATA */ + 0x3B0);
}

// Like GetTickCount(), but excludes suspend time.
static unsigned int GetTickCountExcludingSuspend() {
unsigned int result;
uint64_t prev_bias;
uint64_t bias = GetSuspendTime();
do {
prev_bias = bias;
result = GetTickCount();
bias = GetSuspendTime();
} while (bias != prev_bias);
return result - bias / 10000;
}
#endif

struct BenchmarkTime {
absl::Time time;
absl::Time vtime;
};

static BenchmarkTime BenchmarkNow() {
absl::Time now = absl::Now();
absl::Time vnow = now;
#ifdef ABSL_INTERNAL_HAVE_WIN32_WAITER
vnow = absl::UnixEpoch() + absl::Milliseconds(GetTickCountExcludingSuspend());
#endif
return {now, vnow};
}

namespace {

TEST(Waiter, PrintPlatformImplementation) {
Expand Down Expand Up @@ -86,10 +126,10 @@ TYPED_TEST_P(WaiterTest, WaitNoTimeout) {
absl::SleepFor(absl::Seconds(1));
waiter.Post();
});
absl::Time start = absl::Now();
BenchmarkTime start = BenchmarkNow();
EXPECT_TRUE(
waiter.Wait(absl::synchronization_internal::KernelTimeout::Never()));
absl::Duration waited = absl::Now() - start;
absl::Duration waited = BenchmarkNow().vtime - start.vtime;
EXPECT_GE(waited, WithTolerance(absl::Seconds(2)));
}

Expand All @@ -103,10 +143,10 @@ TYPED_TEST_P(WaiterTest, WaitDurationWoken) {
absl::SleepFor(absl::Milliseconds(500));
waiter.Post();
});
absl::Time start = absl::Now();
BenchmarkTime start = BenchmarkNow();
EXPECT_TRUE(waiter.Wait(
absl::synchronization_internal::KernelTimeout(absl::Seconds(10))));
absl::Duration waited = absl::Now() - start;
absl::Duration waited = BenchmarkNow().vtime - start.vtime;
EXPECT_GE(waited, WithTolerance(absl::Milliseconds(500)));
EXPECT_LT(waited, absl::Seconds(2));
}
Expand All @@ -121,30 +161,30 @@ TYPED_TEST_P(WaiterTest, WaitTimeWoken) {
absl::SleepFor(absl::Milliseconds(500));
waiter.Post();
});
absl::Time start = absl::Now();
BenchmarkTime start = BenchmarkNow();
EXPECT_TRUE(waiter.Wait(absl::synchronization_internal::KernelTimeout(
start + absl::Seconds(10))));
absl::Duration waited = absl::Now() - start;
start.time + absl::Seconds(10))));
absl::Duration waited = BenchmarkNow().vtime - start.vtime;
EXPECT_GE(waited, WithTolerance(absl::Milliseconds(500)));
EXPECT_LT(waited, absl::Seconds(2));
}

TYPED_TEST_P(WaiterTest, WaitDurationReached) {
TypeParam waiter;
absl::Time start = absl::Now();
BenchmarkTime start = BenchmarkNow();
EXPECT_FALSE(waiter.Wait(
absl::synchronization_internal::KernelTimeout(absl::Milliseconds(500))));
absl::Duration waited = absl::Now() - start;
absl::Duration waited = BenchmarkNow().vtime - start.vtime;
EXPECT_GE(waited, WithTolerance(absl::Milliseconds(500)));
EXPECT_LT(waited, absl::Seconds(1));
}

TYPED_TEST_P(WaiterTest, WaitTimeReached) {
TypeParam waiter;
absl::Time start = absl::Now();
BenchmarkTime start = BenchmarkNow();
EXPECT_FALSE(waiter.Wait(absl::synchronization_internal::KernelTimeout(
start + absl::Milliseconds(500))));
absl::Duration waited = absl::Now() - start;
start.time + absl::Milliseconds(500))));
absl::Duration waited = BenchmarkNow().vtime - start.vtime;
EXPECT_GE(waited, WithTolerance(absl::Milliseconds(500)));
EXPECT_LT(waited, absl::Seconds(1));
}
Expand Down

0 comments on commit abc9b91

Please sign in to comment.