From fbef568133a6f8e9248ee887e5982f57edf90357 Mon Sep 17 00:00:00 2001 From: Hamidreza Zare Date: Wed, 18 Sep 2024 22:16:58 -0400 Subject: [PATCH] [PLAT-15378][localProvider][dr] Deflake testDrConfigSetup local provider test Summary: This diff adds retry logic around the check for number of rows in the target universe in an xCluster setup. Because an xCluster replication is asynchronous, it retries for 1 minute to check if the inserted row on the source universe shows up on the target universe. It also disables `com.yugabyte.yw.commissioner.tasks.local.DRDbScopedLocalTest#testDrDbScopedUpdate` local provider test. Test Plan: - Made sure the retry logic works properly when the delay is 10ms. - Made sure testDrConfigSetup passes Reviewers: cwang, vbansal, sanketh Reviewed By: vbansal Subscribers: yugaware Differential Revision: https://phorge.dev.yugabyte.com/D38190 --- .../tasks/local/DRDbScopedLocalTest.java | 2 +- .../commissioner/tasks/local/DRLocalTest.java | 11 +---- .../local/LocalProviderUniverseTestBase.java | 44 +++++++++++++++++++ .../tasks/local/XClusterLocalTestBase.java | 27 ++++++++++-- 4 files changed, 70 insertions(+), 14 deletions(-) diff --git a/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/DRDbScopedLocalTest.java b/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/DRDbScopedLocalTest.java index 363e2b65ac69..e51cb5cac220 100644 --- a/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/DRDbScopedLocalTest.java +++ b/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/DRDbScopedLocalTest.java @@ -217,7 +217,7 @@ public void testDrDbScopedSetupWithBootstrap() throws InterruptedException { deleteDrConfig(drConfigUUID, sourceUniverse, targetUniverse); } - @Test + // @Test public void testDrDbScopedUpdate() throws InterruptedException { Universe sourceUniverse = createDRUniverse(DB_SCOPED_STABLE_VERSION, "source-universe", true, 1, 1); diff --git a/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/DRLocalTest.java b/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/DRLocalTest.java index 3d1989c2eb1e..397f20bf9457 100644 --- a/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/DRLocalTest.java +++ b/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/DRLocalTest.java @@ -80,8 +80,7 @@ public void testDrConfigSetup() throws InterruptedException { assertEquals(TaskInfo.State.Success, taskInfo.getTaskState()); verifyUniverseState(Universe.getOrBadRequest(source.getUniverseUUID())); verifyUniverseState(Universe.getOrBadRequest(target.getUniverseUUID())); - Thread.sleep(3000); - verifyYSQL(target); + assertYsqlOutputEqualsWithRetry(target, "select count(*) from some_table", "3"); NodeDetails details = source.getUniverseDetails().nodeDetailsSet.iterator().next(); ShellResponse ysqlResponse = @@ -93,13 +92,7 @@ public void testDrConfigSetup() throws InterruptedException { 10); assertTrue(ysqlResponse.isSuccess()); - Thread.sleep(2500); - details = target.getUniverseDetails().nodeDetailsSet.iterator().next(); - ysqlResponse = - localNodeUniverseManager.runYsqlCommand( - details, target, YUGABYTE_DB, "select count(*) from some_table", 10); - assertTrue(ysqlResponse.isSuccess()); - assertEquals("5", CommonUtils.extractJsonisedSqlResponse(ysqlResponse).trim()); + assertYsqlOutputEqualsWithRetry(target, "select count(*) from some_table", "5"); verifyPayload(); Result deleteResult = deleteDrConfig(UUID.fromString(json.get("resourceUUID").asText())); diff --git a/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/LocalProviderUniverseTestBase.java b/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/LocalProviderUniverseTestBase.java index d1d414fe77bf..d825feaaa80d 100644 --- a/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/LocalProviderUniverseTestBase.java +++ b/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/LocalProviderUniverseTestBase.java @@ -13,6 +13,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.api.client.util.Throwables; import com.google.common.base.Stopwatch; import com.google.common.net.HostAndPort; import com.yugabyte.yw.cloud.PublicCloudConstants; @@ -34,6 +35,7 @@ import com.yugabyte.yw.common.ReleaseManager; import com.yugabyte.yw.common.RetryTaskUntilCondition; import com.yugabyte.yw.common.ShellResponse; +import com.yugabyte.yw.common.UnrecoverableException; import com.yugabyte.yw.common.Util; import com.yugabyte.yw.common.YcqlQueryExecutor; import com.yugabyte.yw.common.backuprestore.BackupHelper; @@ -100,6 +102,7 @@ import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; +import java.util.function.Function; import java.util.stream.Collectors; import kamon.instrumentation.play.GuiceModule; import lombok.extern.slf4j.Slf4j; @@ -1156,4 +1159,45 @@ protected void verifyMasterLBStatus( assertEquals(resp.isEnabled, isEnabled); assertEquals(resp.isIdle, isLoadBalancerIdle); } + + public void doWithRetry( + Function waitBeforeRetryFunct, Duration timeout, Runnable funct) + throws RuntimeException { + long currentDelayMs = 0; + long timeoutMs = timeout.toMillis(); + long startTime = System.currentTimeMillis(); + while (true) { + currentDelayMs = waitBeforeRetryFunct.apply(Duration.ofMillis(currentDelayMs)).toMillis(); + try { + funct.run(); + return; + } catch (UnrecoverableException e) { + log.error( + "Won't retry; Unrecoverable error while running the function: {}", e.getMessage()); + throw e; + } catch (Exception e) { + if (System.currentTimeMillis() < startTime + timeoutMs - currentDelayMs) { + log.warn("Will retry; Error while running the function: {}", e.getMessage()); + } else { + log.error("Retry timed out; Error while running the function: {}", e.getMessage()); + Throwables.propagate(e); + } + } + log.debug( + "Waiting for {} ms between retry, total delay remaining {} ms", + currentDelayMs, + timeoutMs - (System.currentTimeMillis() - startTime)); + try { + // Busy waiting is okay here since this is being used in tests. + Thread.sleep(currentDelayMs); + } catch (InterruptedException e) { + log.error("Interrupted while waiting for retry", e); + throw new RuntimeException(e); + } + } + } + + protected void doWithRetry(Duration waitBeforeRetry, Duration timeout, Runnable funct) { + doWithRetry((prevDelay) -> waitBeforeRetry, timeout, funct); + } } diff --git a/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/XClusterLocalTestBase.java b/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/XClusterLocalTestBase.java index 61e0787eda20..9822941e22ee 100644 --- a/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/XClusterLocalTestBase.java +++ b/managed/src/test/java/com/yugabyte/yw/commissioner/tasks/local/XClusterLocalTestBase.java @@ -15,6 +15,7 @@ import com.yugabyte.yw.models.Universe; import com.yugabyte.yw.models.helpers.CommonUtils; import com.yugabyte.yw.models.helpers.NodeDetails; +import java.time.Duration; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -153,10 +154,8 @@ public void validateRowCount(Universe universe, Table table, int expectedRows) { return -1; } }, - rowCount -> { - return rowCount == expectedRows; - }); - boolean success = condition.retryUntilCond(5, TimeUnit.MINUTES.toSeconds(1)); + rowCount -> rowCount == expectedRows); + boolean success = condition.retryUntilCond(1, TimeUnit.MINUTES.toSeconds(1)); if (!success) { throw new RuntimeException( String.format( @@ -236,4 +235,24 @@ public Result editXClusterConfig(XClusterConfigEditFormData formData, UUID xClus user.createAuthToken(), Json.toJson(formData)); } + + protected void assertYsqlOutputEqualsWithRetry( + Universe universe, String ysqlCommand, String expectedValue) { + NodeDetails targetNodeDetails = universe.getUniverseDetails().nodeDetailsSet.iterator().next(); + doWithRetry( + Duration.ofSeconds(1), + Duration.ofMinutes(1), + () -> { + ShellResponse targetYsqlResponse = + localNodeUniverseManager.runYsqlCommand( + targetNodeDetails, universe, YUGABYTE_DB, ysqlCommand, 10); + if (!targetYsqlResponse.isSuccess()) { + throw new RuntimeException("Failed to run ysql command"); + } + String response = CommonUtils.extractJsonisedSqlResponse(targetYsqlResponse).trim(); + if (!response.equals(expectedValue)) { + throw new RuntimeException("Expected " + expectedValue + ", got " + response); + } + }); + } }