Skip to content

Commit

Permalink
[controller] Remove older backup versions after retention period (#1149
Browse files Browse the repository at this point in the history
)

If there are old lingering version and there are newer repush version in regular cadence, today Venice will not delete the old version as the getLatestVersionPromoteToCurrentTimestamp updated and never fall off retention period. This PR fixed that by checking version creation time and delete if its older than retention period.

---------

Co-authored-by: Sourav Maji <tester@linkedin.com>
  • Loading branch information
majisourav99 and Sourav Maji authored Sep 4, 2024
1 parent fa27ef0 commit 9276f5a
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -265,28 +265,19 @@ protected boolean cleanupBackupVersion(Store store, String clusterName) {
return false;
}

// If the current version is from repush, do not delete the version before previous current version,
// instead delete the repush source version from which it was repushed as they hold identical data.
// During later iteration of this thread, it will delete the older backup after threshold retention time.
// This will delete backup versions which satisfy any of the following conditions
// 1. If the current version is from repush, delete the repush source backup version
// 2. Current version is from a repush, but still a lingering version older than retention period.
// 3. Current version is not repush and is older than retention, delete any versions < current version.
int repushSourceVersion = store.getVersionOrThrow(currentVersion).getRepushSourceVersion();
List<Version> readyToBeRemovedVersions = versions.stream()
.filter(
v -> repushSourceVersion > NON_EXISTING_VERSION
? v.getNumber() == repushSourceVersion
? (v.getNumber() == repushSourceVersion || v.getNumber() < currentVersion
&& v.getCreatedTime() + defaultBackupVersionRetentionMs < time.getMilliseconds())
: v.getNumber() < currentVersion)
.collect(Collectors.toList());

// If there are still leaking versions due to consecutive repushes with some version failing in other fabric,
// there could be versions with repushSourceVersion does not match current version, delete them after backup
// retention period.
if (readyToBeRemovedVersions.isEmpty()) {
for (Version version: versions) {
if (version.getNumber() < currentVersion && store.getLatestVersionPromoteToCurrentTimestamp()
+ defaultBackupVersionRetentionMs < time.getMilliseconds()) {
readyToBeRemovedVersions.add(version);
}
}
}
if (readyToBeRemovedVersions.isEmpty()) {
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,21 @@ public void testCleanupBackupVersionRepush() {
1,
TimeUnit.SECONDS,
() -> verify(admin, atLeast(1)).deleteOldVersionInStore(clusterName, storeWithRollback.getName(), 2));

versions.clear();
versions.put(1, VersionStatus.ONLINE);
versions.put(3, VersionStatus.ONLINE);
Store storeLingeringVersion = mockStore(-1, System.currentTimeMillis() - defaultRetentionMs * 2, versions, 3);
version = storeLingeringVersion.getVersion(2);
doReturn(2).when(version).getRepushSourceVersion();
doReturn(1L).when(version).getCreatedTime();

// should delete version 1 as its lingerning and not a repush source version.
Assert.assertTrue(service.cleanupBackupVersion(storeLingeringVersion, clusterName));
TestUtils.waitForNonDeterministicAssertion(
1,
TimeUnit.SECONDS,
() -> verify(admin, atLeast(1)).deleteOldVersionInStore(clusterName, storeLingeringVersion.getName(), 1));
}

@Test
Expand Down

0 comments on commit 9276f5a

Please sign in to comment.