From f2a8aac2c512b21df09147b2eacf0b537f1ebda8 Mon Sep 17 00:00:00 2001 From: Zhuoran Dong Date: Fri, 6 Sep 2024 13:40:56 -0700 Subject: [PATCH] Improve delete unused tags to be able to process in chunks --- .../GenieClientIntegrationTestBase.java | 2 +- ...istenceServiceImplTagsIntegrationTest.java | 2 +- .../web/data/services/PersistenceService.java | 18 ++++--- .../impl/jpa/JpaPersistenceServiceImpl.java | 12 +++-- .../jpa/repositories/JpaFileRepository.java | 4 +- .../jpa/repositories/JpaTagRepository.java | 17 +++--- .../properties/DatabaseCleanupProperties.java | 22 ++++++++ .../web/tasks/leader/DatabaseCleanupTask.java | 52 +++++++++++++++---- .../DatabaseCleanupPropertiesTest.java | 2 + .../tasks/leader/DatabaseCleanupTaskTest.java | 11 ++-- 10 files changed, 105 insertions(+), 37 deletions(-) diff --git a/genie-client/src/integTest/java/com/netflix/genie/client/GenieClientIntegrationTestBase.java b/genie-client/src/integTest/java/com/netflix/genie/client/GenieClientIntegrationTestBase.java index 1da0836e6c2..1a5a25cb47c 100644 --- a/genie-client/src/integTest/java/com/netflix/genie/client/GenieClientIntegrationTestBase.java +++ b/genie-client/src/integTest/java/com/netflix/genie/client/GenieClientIntegrationTestBase.java @@ -65,7 +65,7 @@ abstract class GenieClientIntegrationTestBase { // leverage their layered jars to produce less changing images. @Container private static final GenericContainer GENIE = new GenericContainer("netflixoss/genie-app:latest.release") - .waitingFor(Wait.forHttp("/admin/health").forStatusCode(200).withStartupTimeout(Duration.ofMinutes(1L))) + .waitingFor(Wait.forHttp("/admin/health").forStatusCode(200).withStartupTimeout(Duration.ofMinutes(5L))) .withExposedPorts(8080); protected ApplicationClient applicationClient; diff --git a/genie-web/src/integTest/java/com/netflix/genie/web/data/services/impl/jpa/JpaPersistenceServiceImplTagsIntegrationTest.java b/genie-web/src/integTest/java/com/netflix/genie/web/data/services/impl/jpa/JpaPersistenceServiceImplTagsIntegrationTest.java index d588e40f3b7..660d65a14a0 100644 --- a/genie-web/src/integTest/java/com/netflix/genie/web/data/services/impl/jpa/JpaPersistenceServiceImplTagsIntegrationTest.java +++ b/genie-web/src/integTest/java/com/netflix/genie/web/data/services/impl/jpa/JpaPersistenceServiceImplTagsIntegrationTest.java @@ -61,7 +61,7 @@ void canDeleteUnusedTags() throws GenieCheckedException { Assertions.assertThat(this.tagRepository.existsByTag(tag1)).isTrue(); Assertions.assertThat(this.tagRepository.existsByTag(tag2)).isTrue(); - Assertions.assertThat(this.service.deleteUnusedTags(Instant.now(), 10)).isEqualTo(1L); + Assertions.assertThat(this.service.deleteUnusedTags(Instant.EPOCH, Instant.now(), 10)).isEqualTo(1L); Assertions.assertThat(this.tagRepository.existsByTag(tag1)).isFalse(); Assertions.assertThat(this.tagRepository.existsByTag(tag2)).isTrue(); diff --git a/genie-web/src/main/java/com/netflix/genie/web/data/services/PersistenceService.java b/genie-web/src/main/java/com/netflix/genie/web/data/services/PersistenceService.java index 86e84e51e0c..b7a743243c5 100644 --- a/genie-web/src/main/java/com/netflix/genie/web/data/services/PersistenceService.java +++ b/genie-web/src/main/java/com/netflix/genie/web/data/services/PersistenceService.java @@ -1121,22 +1121,24 @@ void removeTagForResource( //region Tag APIs /** - * Delete all tags from the database that aren't referenced which were created before the supplied created - * threshold. + * Delete all tags from the database that aren't referenced which were created between the supplied thresholds. * - * @param createdThreshold The instant in time where tags created before this time that aren't referenced - * will be deleted. Inclusive - * @param batchSize The maximum number of tags to delete in a single transaction + * @param createdThresholdLowerBound The instant in time when tags created after this time that aren't referenced + * will be selected. Inclusive. + * @param createdThresholdUpperBound The instant in time when tags created before this time that aren't referenced + * will be selected. Inclusive. + * @param batchSize The maximum number of tags to delete in a single transaction * @return The number of tags deleted */ - long deleteUnusedTags(@NotNull Instant createdThreshold, @Min(1) int batchSize); + long deleteUnusedTags(@NotNull Instant createdThresholdLowerBound, + @NotNull Instant createdThresholdUpperBound, + @Min(1) int batchSize); //endregion //region File APIs /** - * Delete all files from the database that aren't referenced which were created before the supplied created - * threshold. + * Delete all files from the database that aren't referenced which were created between the supplied thresholds. * * @param createdThresholdLowerBound The instant in time when files created after this time that aren't referenced * will be selected. Inclusive. diff --git a/genie-web/src/main/java/com/netflix/genie/web/data/services/impl/jpa/JpaPersistenceServiceImpl.java b/genie-web/src/main/java/com/netflix/genie/web/data/services/impl/jpa/JpaPersistenceServiceImpl.java index cbb802e9423..3d52e883833 100644 --- a/genie-web/src/main/java/com/netflix/genie/web/data/services/impl/jpa/JpaPersistenceServiceImpl.java +++ b/genie-web/src/main/java/com/netflix/genie/web/data/services/impl/jpa/JpaPersistenceServiceImpl.java @@ -2301,11 +2301,17 @@ public void removeTagForResource( */ @Override @Transactional(isolation = Isolation.READ_COMMITTED) - public long deleteUnusedTags(@NotNull final Instant createdThreshold, @Min(1) final int batchSize) { - log.info("[deleteUnusedTags] Called to delete unused tags created before {}", createdThreshold); + public long deleteUnusedTags(@NotNull final Instant createdThresholdLowerBound, + @NotNull final Instant createdThresholdUpperBound, + @Min(1) final int batchSize) { + log.debug( + "[deleteUnusedTags] Called to delete unused tags created between {} and {}", + createdThresholdLowerBound, + createdThresholdUpperBound + ); return this.tagRepository.deleteByIdIn( this.tagRepository - .findUnusedTags(createdThreshold, batchSize) + .findUnusedTags(createdThresholdLowerBound, createdThresholdUpperBound, batchSize) .stream() .map(Number::longValue) .collect(Collectors.toSet()) diff --git a/genie-web/src/main/java/com/netflix/genie/web/data/services/impl/jpa/repositories/JpaFileRepository.java b/genie-web/src/main/java/com/netflix/genie/web/data/services/impl/jpa/repositories/JpaFileRepository.java index 179f22878c9..7b3d9841501 100644 --- a/genie-web/src/main/java/com/netflix/genie/web/data/services/impl/jpa/repositories/JpaFileRepository.java +++ b/genie-web/src/main/java/com/netflix/genie/web/data/services/impl/jpa/repositories/JpaFileRepository.java @@ -82,8 +82,8 @@ public interface JpaFileRepository extends JpaIdRepository { Set findByFileIn(Set files); /** - * Find the ids of all files from the database that aren't referenced which were created before the supplied created - * threshold. + * Find the ids of all files from the database that aren't referenced which were created between the supplied + * thresholds. * * @param createdThresholdLowerBound The instant in time when files created after this time that aren't referenced * will be selected. Inclusive. diff --git a/genie-web/src/main/java/com/netflix/genie/web/data/services/impl/jpa/repositories/JpaTagRepository.java b/genie-web/src/main/java/com/netflix/genie/web/data/services/impl/jpa/repositories/JpaTagRepository.java index 617d3b6378a..a3ecccc4bd6 100644 --- a/genie-web/src/main/java/com/netflix/genie/web/data/services/impl/jpa/repositories/JpaTagRepository.java +++ b/genie-web/src/main/java/com/netflix/genie/web/data/services/impl/jpa/repositories/JpaTagRepository.java @@ -45,7 +45,8 @@ public interface JpaTagRepository extends JpaIdRepository { + "AND id NOT IN (SELECT DISTINCT(tag_id) FROM commands_tags) " + "AND id NOT IN (SELECT DISTINCT(tag_id) FROM criteria_tags) " + "AND id NOT IN (SELECT DISTINCT(tag_id) FROM jobs_tags) " - + "AND created <= :createdThreshold " + + "AND created <= :createdThresholdUpperBound " + + "AND created >= :createdThresholdLowerBound " + "LIMIT :limit " + "FOR UPDATE;"; @@ -74,17 +75,19 @@ public interface JpaTagRepository extends JpaIdRepository { Set findByTagIn(Set tags); /** - * Find all tags from the database that aren't referenced which were created before the supplied created - * threshold. + * Find all tags from the database that aren't referenced which were created between the supplied thresholds. * - * @param createdThreshold The instant in time where tags created before this time that aren't referenced - * will be returned. Inclusive - * @param limit Maximum number of IDs to return + * @param createdThresholdLowerBound The instant in time when tags created after this time that aren't referenced + * will be selected. Inclusive. + * @param createdThresholdUpperBound The instant in time when tags created before this time that aren't referenced + * will be selected. Inclusive. + * @param limit Maximum number of IDs to return * @return The number of tags deleted */ @Query(value = SELECT_FOR_UPDATE_UNUSED_TAGS_SQL, nativeQuery = true) Set findUnusedTags( - @Param("createdThreshold") Instant createdThreshold, + @Param("createdThresholdLowerBound") Instant createdThresholdLowerBound, + @Param("createdThresholdUpperBound") Instant createdThresholdUpperBound, @Param("limit") int limit ); diff --git a/genie-web/src/main/java/com/netflix/genie/web/properties/DatabaseCleanupProperties.java b/genie-web/src/main/java/com/netflix/genie/web/properties/DatabaseCleanupProperties.java index 73c148cfd5d..713e5d7694e 100644 --- a/genie-web/src/main/java/com/netflix/genie/web/properties/DatabaseCleanupProperties.java +++ b/genie-web/src/main/java/com/netflix/genie/web/properties/DatabaseCleanupProperties.java @@ -369,9 +369,31 @@ public static class TagDatabaseCleanupProperties { */ public static final String SKIP_PROPERTY = TAG_CLEANUP_PROPERTY_PREFIX + ".skip"; + /** + * The number of days within current day that the unused tags deletion will be running in batch mode. + */ + public static final String BATCH_DAYS_WITHIN_PROPERTY = TAG_CLEANUP_PROPERTY_PREFIX + ".batchDaysWithin"; + + /** + * The size of the rolling window used to delete unused tags, units in hours. + */ + public static final String ROLLING_WINDOW_HOURS_PROPERTY = TAG_CLEANUP_PROPERTY_PREFIX + ".rollingWindowHours"; + /** * Skip the Tags table when performing database cleanup. */ private boolean skip; + + /** + * The number of days within current day that the unused tags deletion will be running in batch mode. + */ + @Min(1) + private int batchDaysWithin = 30; + + /** + * The size of the rolling window used to delete unused tags, units in hours. + */ + @Min(1) + private int rollingWindowHours = 12; } } diff --git a/genie-web/src/main/java/com/netflix/genie/web/tasks/leader/DatabaseCleanupTask.java b/genie-web/src/main/java/com/netflix/genie/web/tasks/leader/DatabaseCleanupTask.java index a91216c4b88..0cbeca9689b 100644 --- a/genie-web/src/main/java/com/netflix/genie/web/tasks/leader/DatabaseCleanupTask.java +++ b/genie-web/src/main/java/com/netflix/genie/web/tasks/leader/DatabaseCleanupTask.java @@ -350,9 +350,12 @@ private void deleteFiles(final Instant creationThreshold) { this.cleanupProperties.getFileCleanup().getBatchDaysWithin() ); log.info( - "Attempting to delete unused files from before {} in batches of {}", + "Attempting to delete unused files older than {} in batches of {}. Using a rolling window" + + "of {} hours within last {} days", creationThreshold, - batchSize + batchSize, + rollingWindowHours, + batchDaysWithin ); long totalDeleted = 0L; @@ -364,7 +367,7 @@ private void deleteFiles(final Instant creationThreshold) { upperBound = lowerBound; lowerBound = lowerBound.minus(rollingWindowHours, ChronoUnit.HOURS); } - // do a final deletion of everything < batchLowerBound + // do a final deletion of everything <= batchLowerBound totalDeleted += deleteUnusedFilesBetween(Instant.EPOCH, upperBound, batchSize); log.info( "Deleted {} files that were unused by any resource and created before {}", @@ -411,19 +414,36 @@ private void deleteTags(final Instant creationThreshold) { Integer.class, this.cleanupProperties.getBatchSize() ); - + final long rollingWindowHours = this.environment.getProperty( + DatabaseCleanupProperties.TagDatabaseCleanupProperties.ROLLING_WINDOW_HOURS_PROPERTY, + Integer.class, + this.cleanupProperties.getTagCleanup().getRollingWindowHours() + ); + final long batchDaysWithin = this.environment.getProperty( + DatabaseCleanupProperties.TagDatabaseCleanupProperties.BATCH_DAYS_WITHIN_PROPERTY, + Integer.class, + this.cleanupProperties.getTagCleanup().getBatchDaysWithin() + ); log.info( - "Attempting to delete unused tags from before {} in batches of {}", + "Attempting to delete unused tags older than {} in batches of {}. Using a rolling window" + + "of {} hours within last {} days", creationThreshold, - batchSize + batchSize, + rollingWindowHours, + batchDaysWithin ); - long deleted; long totalDeleted = 0L; - do { - deleted = this.persistenceService.deleteUnusedTags(creationThreshold, batchSize); - totalDeleted += deleted; - } while (deleted > 0); + Instant upperBound = creationThreshold; + Instant lowerBound = creationThreshold.minus(rollingWindowHours, ChronoUnit.HOURS); + final Instant batchLowerBound = creationThreshold.minus(batchDaysWithin, ChronoUnit.DAYS); + while (upperBound.isAfter(batchLowerBound)) { + totalDeleted += deleteUnusedTagsBetween(lowerBound, upperBound, batchSize); + upperBound = lowerBound; + lowerBound = lowerBound.minus(rollingWindowHours, ChronoUnit.HOURS); + } + // do a final deletion of everything <= batchLowerBound + totalDeleted += deleteUnusedTagsBetween(Instant.EPOCH, upperBound, batchSize); log.info( "Deleted {} tags that were unused by any resource and created before {}", totalDeleted, @@ -441,6 +461,16 @@ private void deleteTags(final Instant creationThreshold) { } } + private long deleteUnusedTagsBetween(final Instant lowerBound, final Instant upperBound, final int batchSize) { + long deleted; + long totalDeleted = 0L; + do { + deleted = this.persistenceService.deleteUnusedTags(lowerBound, upperBound, batchSize); + totalDeleted += deleted; + } while (deleted > 0); + return totalDeleted; + } + private void deactivateCommands(final Instant runtime) { final long startTime = System.nanoTime(); final Set tags = Sets.newHashSet(); diff --git a/genie-web/src/test/java/com/netflix/genie/web/properties/DatabaseCleanupPropertiesTest.java b/genie-web/src/test/java/com/netflix/genie/web/properties/DatabaseCleanupPropertiesTest.java index 2096896dfc4..b294c209d61 100644 --- a/genie-web/src/test/java/com/netflix/genie/web/properties/DatabaseCleanupPropertiesTest.java +++ b/genie-web/src/test/java/com/netflix/genie/web/properties/DatabaseCleanupPropertiesTest.java @@ -53,6 +53,8 @@ void canGetDefaultValues() { Assertions.assertThat(this.properties.getJobCleanup().getPageSize()).isEqualTo(1000); Assertions.assertThat(this.properties.getClusterCleanup().isSkip()).isFalse(); Assertions.assertThat(this.properties.getTagCleanup().isSkip()).isFalse(); + Assertions.assertThat(this.properties.getTagCleanup().getBatchDaysWithin()).isEqualTo(30); + Assertions.assertThat(this.properties.getTagCleanup().getRollingWindowHours()).isEqualTo(12); Assertions.assertThat(this.properties.getFileCleanup().isSkip()).isFalse(); Assertions.assertThat(this.properties.getFileCleanup().getBatchDaysWithin()).isEqualTo(30); Assertions.assertThat(this.properties.getFileCleanup().getRollingWindowHours()).isEqualTo(12); diff --git a/genie-web/src/test/java/com/netflix/genie/web/tasks/leader/DatabaseCleanupTaskTest.java b/genie-web/src/test/java/com/netflix/genie/web/tasks/leader/DatabaseCleanupTaskTest.java index 7089f1ead2e..730b1df5be0 100644 --- a/genie-web/src/test/java/com/netflix/genie/web/tasks/leader/DatabaseCleanupTaskTest.java +++ b/genie-web/src/test/java/com/netflix/genie/web/tasks/leader/DatabaseCleanupTaskTest.java @@ -133,6 +133,8 @@ void canRun() { Mockito.when(this.cleanupProperties.getBatchSize()).thenReturn(batchSize); Mockito.when(this.fileCleanupProperties.getBatchDaysWithin()).thenReturn(batchDaysWithin); Mockito.when(this.fileCleanupProperties.getRollingWindowHours()).thenReturn(rollingWindow); + Mockito.when(this.tagCleanupProperties.getBatchDaysWithin()).thenReturn(batchDaysWithin); + Mockito.when(this.tagCleanupProperties.getRollingWindowHours()).thenReturn(rollingWindow); Mockito.when(this.jobCleanupProperties.getRetention()).thenReturn(days).thenReturn(negativeDays); Mockito.when(this.jobCleanupProperties.getPageSize()).thenReturn(pageSize); Mockito.when(this.commandDeactivationProperties.getCommandCreationThreshold()).thenReturn(60); @@ -169,7 +171,8 @@ void canRun() { Mockito.any(Instant.class), Mockito.any(Instant.class), Mockito.eq(batchSize))) .thenReturn(3L, 0L, 4L, 0L); Mockito - .when(this.persistenceService.deleteUnusedTags(Mockito.any(Instant.class), Mockito.eq(batchSize))) + .when(this.persistenceService.deleteUnusedTags( + Mockito.any(Instant.class), Mockito.any(Instant.class), Mockito.eq(batchSize))) .thenReturn(5L, 0L, 6L, 0L); Mockito .when(this.persistenceService.deleteUnusedApplications(Mockito.any(Instant.class), Mockito.eq(batchSize))) @@ -225,8 +228,8 @@ void canRun() { .verify(this.persistenceService, Mockito.times(16)) .deleteUnusedFiles(Mockito.any(Instant.class), Mockito.any(Instant.class), Mockito.eq(batchSize)); Mockito - .verify(this.persistenceService, Mockito.times(4)) - .deleteUnusedTags(Mockito.any(Instant.class), Mockito.eq(batchSize)); + .verify(this.persistenceService, Mockito.times(16)) + .deleteUnusedTags(Mockito.any(Instant.class), Mockito.any(Instant.class), Mockito.eq(batchSize)); Mockito .verify(this.persistenceService, Mockito.times(5)) .deleteUnusedApplications(Mockito.any(Instant.class), Mockito.eq(batchSize)); @@ -332,6 +335,6 @@ void skipAll() { .deleteUnusedFiles(Mockito.any(Instant.class), Mockito.any(Instant.class), Mockito.anyInt()); Mockito .verify(this.persistenceService, Mockito.never()) - .deleteUnusedTags(Mockito.any(Instant.class), Mockito.anyInt()); + .deleteUnusedTags(Mockito.any(Instant.class), Mockito.any(Instant.class), Mockito.anyInt()); } }