From c400d84f0e884217454ddfcc1503d02e0b280fa9 Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Fri, 20 Oct 2023 17:39:58 +0530 Subject: [PATCH 01/45] [Remote State] fix lock release before deletion is completed (#10611) * fix lock release before deletion is completed Signed-off-by: bansvaru --- .../remote/RemoteClusterStateService.java | 8 +++-- .../RemoteClusterStateServiceTests.java | 34 +++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index b9d06c8fbb1c1..96ce2fc779ea0 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -1072,7 +1072,8 @@ public void onFailure(Exception e) { * @param clusterUUID uuid of cluster state to refer to in remote * @param manifestsToRetain no of latest manifest files to keep in remote */ - private void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) { + // package private for testing + void deleteStaleClusterMetadata(String clusterName, String clusterUUID, int manifestsToRetain) { if (deleteStaleMetadataRunning.compareAndSet(false, true) == false) { logger.info("Delete stale cluster metadata task is already in progress."); return; @@ -1109,8 +1110,9 @@ public void onFailure(Exception e) { } } ); - } finally { + } catch (Exception e) { deleteStaleMetadataRunning.set(false); + throw e; } } @@ -1190,7 +1192,7 @@ private void deleteStalePaths(String clusterName, String clusterUUID, List { String clusterName = clusterState.getClusterName().value(); - logger.info("Deleting stale cluster UUIDs data from remote [{}]", clusterName); + logger.debug("Deleting stale cluster UUIDs data from remote [{}]", clusterName); Set allClustersUUIDsInRemote; try { allClustersUUIDsInRemote = new HashSet<>(getAllClusterUUIDs(clusterState.getClusterName().value())); diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 49b7f0ff8d1a9..433eac63e9580 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -62,6 +62,9 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; import org.mockito.ArgumentCaptor; @@ -73,6 +76,7 @@ import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; import static org.opensearch.gateway.remote.RemoteClusterStateService.METADATA_FILE_PREFIX; +import static org.opensearch.gateway.remote.RemoteClusterStateService.RETAINED_MANIFESTS; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; @@ -1004,6 +1008,36 @@ public void testFileNames() { assertThat(splittedName[3], is("P")); } + public void testSingleConcurrentExecutionOfStaleManifestCleanup() throws Exception { + BlobContainer blobContainer = mock(BlobContainer.class); + BlobPath blobPath = new BlobPath().add("random-path"); + when((blobStoreRepository.basePath())).thenReturn(blobPath); + when(blobStore.blobContainer(any())).thenReturn(blobContainer); + + CountDownLatch latch = new CountDownLatch(1); + AtomicInteger callCount = new AtomicInteger(0); + doAnswer(invocation -> { + callCount.incrementAndGet(); + if (latch.await(5000, TimeUnit.SECONDS) == false) { + throw new Exception("Timed out waiting for delete task queuing to complete"); + } + return null; + }).when(blobContainer) + .listBlobsByPrefixInSortedOrder( + any(String.class), + any(int.class), + any(BlobContainer.BlobNameSortOrder.class), + any(ActionListener.class) + ); + + remoteClusterStateService.start(); + remoteClusterStateService.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); + remoteClusterStateService.deleteStaleClusterMetadata("cluster-name", "cluster-uuid", RETAINED_MANIFESTS); + + latch.countDown(); + assertBusy(() -> assertEquals(1, callCount.get())); + } + private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { final BlobPath blobPath = mock(BlobPath.class); when((blobStoreRepository.basePath())).thenReturn(blobPath); From 6641ef8a0a39d86aadd272936cae25a406eb3485 Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Fri, 20 Oct 2023 18:06:40 +0530 Subject: [PATCH 02/45] Restore remote index shards with ExistingStoreRecoverySource after restore from remote state (#10665) * Restore remote index shards with ExistingStoreRecoverySource after restore from remote state Signed-off-by: bansvaru --- .../remotestore/BaseRemoteStoreRestoreIT.java | 5 +- .../RemoteStoreClusterStateRestoreIT.java | 28 ++- .../cluster/routing/IndexRoutingTable.java | 4 +- .../gateway/ClusterStateUpdaters.java | 17 +- .../recovery/RemoteStoreRestoreService.java | 30 +-- .../gateway/ClusterStateUpdatersTests.java | 237 ++---------------- 6 files changed, 53 insertions(+), 268 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java index b8481610869e6..99c5d7fb2bae7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java @@ -46,7 +46,10 @@ protected Collection> nodePlugins() { } protected void restore(String... indices) { - boolean restoreAllShards = randomBoolean(); + restore(randomBoolean(), indices); + } + + protected void restore(boolean restoreAllShards, String... indices) { if (restoreAllShards) { assertAcked(client().admin().indices().prepareClose(indices)); } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index 3a3e293de9b13..c2cb7cc60f152 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -65,6 +65,13 @@ private void resetCluster(int dataNodeCount, int clusterManagerNodeCount) { internalCluster().startDataOnlyNodes(dataNodeCount); } + protected void verifyRedIndicesAndTriggerRestore(Map indexStats, String indexName, boolean indexMoreDocs) + throws Exception { + ensureRed(indexName); + restore(false, indexName); + verifyRestoredData(indexStats, indexName, indexMoreDocs); + } + public void testFullClusterRestore() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -83,7 +90,7 @@ public void testFullClusterRestore() throws Exception { // Step - 3 Trigger full cluster restore and validate validateMetadata(List.of(INDEX_NAME)); - verifyRestoredData(indexStats, INDEX_NAME); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); } public void testFullClusterRestoreMultipleIndices() throws Exception { @@ -112,8 +119,8 @@ public void testFullClusterRestoreMultipleIndices() throws Exception { // Step - 3 Trigger full cluster restore validateMetadata(List.of(INDEX_NAME, secondIndexName)); - verifyRestoredData(indexStats, INDEX_NAME); - verifyRestoredData(indexStats2, secondIndexName, false); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, false); + verifyRedIndicesAndTriggerRestore(indexStats2, secondIndexName, false); assertTrue(INDEX_READ_ONLY_SETTING.get(clusterService().state().metadata().index(secondIndexName).getSettings())); assertThrows(ClusterBlockException.class, () -> indexSingleDoc(secondIndexName)); // Test is complete @@ -181,7 +188,7 @@ public void testRemoteStateFullRestart() throws Exception { String newClusterUUID = clusterService().state().metadata().clusterUUID(); assert Objects.equals(newClusterUUID, prevClusterUUID) : "Full restart not successful. cluster uuid has changed"; validateCurrentMetadata(); - verifyRestoredData(indexStats, INDEX_NAME); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); } private void validateMetadata(List indexNames) { @@ -246,19 +253,18 @@ public void testFullClusterRestoreGlobalMetadata() throws Exception { // Step - 3 Trigger full cluster restore and validate // validateCurrentMetadata(); - verifyRestoredData(indexStats, INDEX_NAME, false); - - // validate global metadata restored - verifyRestoredRepositories(); - verifyRestoredIndexTemplate(); assertEquals(Integer.valueOf(34), SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(clusterService().state().metadata().settings())); assertEquals(true, SETTING_READ_ONLY_SETTING.get(clusterService().state().metadata().settings())); assertTrue(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); - // Test is complete - // Remote the cluster read only block to ensure proper cleanup updatePersistentSettings(Settings.builder().put(SETTING_READ_ONLY_SETTING.getKey(), false).build()); assertFalse(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); + + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, false); + + // validate global metadata restored + verifyRestoredRepositories(); + verifyRestoredIndexTemplate(); } private void registerCustomRepository() { diff --git a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java index b12698c8a320e..d77d44580798a 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java +++ b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java @@ -466,12 +466,12 @@ public Builder initializeAsRemoteStoreRestore( } for (int shardNumber = 0; shardNumber < indexMetadata.getNumberOfShards(); shardNumber++) { ShardId shardId = new ShardId(index, shardNumber); - if (forceRecoverAllPrimaries == false && indexShardRoutingTableMap.containsKey(shardId) == false) { + if (indexShardRoutingTableMap.containsKey(shardId) == false) { throw new IllegalStateException("IndexShardRoutingTable is not present for shardId: " + shardId); } IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(shardId); IndexShardRoutingTable indexShardRoutingTable = indexShardRoutingTableMap.get(shardId); - if (forceRecoverAllPrimaries || indexShardRoutingTable == null || indexShardRoutingTable.primaryShard().unassigned()) { + if (forceRecoverAllPrimaries || indexShardRoutingTable.primaryShard().unassigned()) { // Primary shard to be recovered from remote store. indexShardRoutingBuilder.addShard(ShardRouting.newUnassigned(shardId, true, recoverySource, unassignedInfo)); // All the replica shards to be recovered from peer recovery. diff --git a/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java b/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java index 4c562b348f141..1563ac84bdd1c 100644 --- a/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java +++ b/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java @@ -41,7 +41,6 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; -import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.common.settings.ClusterSettings; @@ -121,21 +120,7 @@ static ClusterState updateRoutingTable(final ClusterState state) { // initialize all index routing tables as empty final RoutingTable.Builder routingTableBuilder = RoutingTable.builder(state.routingTable()); for (final IndexMetadata cursor : state.metadata().indices().values()) { - // Whether IndexMetadata is recovered from local disk or remote it doesn't matter to us at this point. - // We are only concerned about index data recovery here. Which is why we only check for remote store enabled and not for remote - // cluster state enabled. - if (cursor.getSettings().getAsBoolean(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, false) == false - || state.routingTable().hasIndex(cursor.getIndex()) == false - || state.routingTable() - .index(cursor.getIndex()) - .shardsMatchingPredicateCount( - shardRouting -> shardRouting.primary() - // We need to ensure atleast one of the primaries is being recovered from remote. - // This ensures we have gone through the RemoteStoreRestoreService and routing table is updated - && shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) == 0) { - routingTableBuilder.addAsRecovery(cursor); - } + routingTableBuilder.addAsRecovery(cursor); } // start with 0 based versions for routing table routingTableBuilder.version(0); diff --git a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java index ac9cf35d1d8e5..6692d521b8f65 100644 --- a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java +++ b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java @@ -183,6 +183,7 @@ private RemoteRestoreResult executeRestore( final String restoreUUID = UUIDs.randomBase64UUID(); List indicesToBeRestored = new ArrayList<>(); int totalShards = 0; + boolean metadataFromRemoteStore = false; ClusterState.Builder builder = ClusterState.builder(currentState); Metadata.Builder mdBuilder = Metadata.builder(currentState.metadata()); ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks()); @@ -190,7 +191,7 @@ private RemoteRestoreResult executeRestore( for (Map.Entry> indexMetadataEntry : indexMetadataMap.entrySet()) { String indexName = indexMetadataEntry.getKey(); IndexMetadata indexMetadata = indexMetadataEntry.getValue().v2(); - boolean metadataFromRemoteStore = indexMetadataEntry.getValue().v1(); + metadataFromRemoteStore = indexMetadataEntry.getValue().v1(); IndexMetadata updatedIndexMetadata = indexMetadata; if (metadataFromRemoteStore == false && restoreAllShards) { updatedIndexMetadata = IndexMetadata.builder(indexMetadata) @@ -204,27 +205,23 @@ private RemoteRestoreResult executeRestore( IndexId indexId = new IndexId(indexName, updatedIndexMetadata.getIndexUUID()); - Map indexShardRoutingTableMap = new HashMap<>(); if (metadataFromRemoteStore == false) { - indexShardRoutingTableMap = currentState.routingTable() + Map indexShardRoutingTableMap = currentState.routingTable() .index(indexName) .shards() .values() .stream() .collect(Collectors.toMap(IndexShardRoutingTable::shardId, Function.identity())); + + RecoverySource.RemoteStoreRecoverySource recoverySource = new RecoverySource.RemoteStoreRecoverySource( + restoreUUID, + updatedIndexMetadata.getCreationVersion(), + indexId + ); + + rtBuilder.addAsRemoteStoreRestore(updatedIndexMetadata, recoverySource, indexShardRoutingTableMap, restoreAllShards); } - RecoverySource.RemoteStoreRecoverySource recoverySource = new RecoverySource.RemoteStoreRecoverySource( - restoreUUID, - updatedIndexMetadata.getCreationVersion(), - indexId - ); - rtBuilder.addAsRemoteStoreRestore( - updatedIndexMetadata, - recoverySource, - indexShardRoutingTableMap, - restoreAllShards || metadataFromRemoteStore - ); blocks.updateBlocks(updatedIndexMetadata); mdBuilder.put(updatedIndexMetadata, true); indicesToBeRestored.add(indexName); @@ -239,7 +236,10 @@ private RemoteRestoreResult executeRestore( RoutingTable rt = rtBuilder.build(); ClusterState updatedState = builder.metadata(mdBuilder).blocks(blocks).routingTable(rt).build(); - return RemoteRestoreResult.build(restoreUUID, restoreInfo, allocationService.reroute(updatedState, "restored from remote store")); + if (metadataFromRemoteStore == false) { + updatedState = allocationService.reroute(updatedState, "restored from remote store"); + } + return RemoteRestoreResult.build(restoreUUID, restoreInfo, updatedState); } private void restoreGlobalMetadata(Metadata.Builder mdBuilder, Metadata remoteMetadata) { diff --git a/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java b/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java index 9b3fd45245ef7..1c43bb565ef69 100644 --- a/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java +++ b/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java @@ -41,6 +41,7 @@ import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.UnassignedInfo; @@ -52,12 +53,14 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.util.set.Sets; import org.opensearch.core.index.Index; +import org.opensearch.core.index.shard.ShardId; import org.opensearch.repositories.IndexId; import org.opensearch.test.OpenSearchTestCase; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.Map; import java.util.Set; import java.util.function.BiConsumer; import java.util.function.Function; @@ -275,7 +278,7 @@ public void testUpdateRoutingTable() { } } - public void testSkipRoutingTableUpdateWhenRemoteRecovery() { + public void testRoutingTableUpdateWhenRemoteStateRecovery() { final int numOfShards = randomIntBetween(1, 10); final IndexMetadata remoteMetadata = createIndexMetadata( @@ -286,7 +289,7 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { .build() ); - // Test remote index routing table is generated with ExistingStoreRecoverySource if no routing table is present + // Test remote index routing table is generated with ExistingStoreRecoverySource { final Index index = remoteMetadata.getIndex(); final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) @@ -322,48 +325,14 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { } - // Test remote index routing table is overridden if recovery source is not RemoteStoreRecoverySource + // Test remote index routing table is overridden if recovery source is RemoteStoreRecoverySource { - IndexRoutingTable.Builder remoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) - .initializeAsNew(remoteMetadata); final Index index = remoteMetadata.getIndex(); - final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) - .metadata(Metadata.builder().put(remoteMetadata, false).build()) - .routingTable(new RoutingTable.Builder().add(remoteBuilderWithoutRemoteRecovery.build()).build()) - .build(); - assertTrue(initialState.routingTable().hasIndex(index)); - final ClusterState newState = updateRoutingTable(initialState); - IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); - assertTrue(newState.routingTable().hasIndex(index)); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - - } - - // Test routing table update is skipped for a remote index - { + Map routingTableMap = new HashMap<>(); + for (int shardNumber = 0; shardNumber < remoteMetadata.getNumberOfShards(); shardNumber++) { + ShardId shardId = new ShardId(index, shardNumber); + routingTableMap.put(shardId, new IndexShardRoutingTable.Builder(new ShardId(remoteMetadata.getIndex(), 1)).build()); + } IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) .initializeAsRemoteStoreRestore( remoteMetadata, @@ -372,10 +341,9 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { remoteMetadata.getCreationVersion(), new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) ), - new HashMap<>(), + routingTableMap, true ); - final Index index = remoteMetadata.getIndex(); final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) .metadata(Metadata.builder().put(remoteMetadata, false).build()) .routingTable(new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()).build()) @@ -387,205 +355,28 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { assertEquals( 0, newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) - ) - ); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - - } - - // Test reset routing table for 2 indices - one remote and one non remote. - // Routing table for non remote index should be updated and remote index routing table should remain intact - { - final IndexMetadata nonRemoteMetadata = createIndexMetadata( - "test-nonremote", - Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards).build() - ); - IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) - .initializeAsRemoteStoreRestore( - remoteMetadata, - new RecoverySource.RemoteStoreRecoverySource( - UUIDs.randomBase64UUID(), - remoteMetadata.getCreationVersion(), - new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) - ), - new HashMap<>(), - true - ); - IndexRoutingTable.Builder nonRemoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder(nonRemoteMetadata.getIndex()) - .initializeAsNew(nonRemoteMetadata); - final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) - .metadata(Metadata.builder().put(remoteMetadata, false).build()) - .metadata(Metadata.builder().put(nonRemoteMetadata, false).build()) - .routingTable( - new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()) - .add(nonRemoteBuilderWithoutRemoteRecovery.build()) - .build() - ) - .build(); - assertTrue(initialState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(initialState.routingTable().hasIndex(nonRemoteMetadata.getIndex())); - final ClusterState newState = updateRoutingTable(initialState); - assertTrue(newState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(newState.routingTable().hasIndex(nonRemoteMetadata.getIndex())); - IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); - IndexRoutingTable newNonRemoteIndexRoutingTable = newState.routingTable().index(nonRemoteMetadata.getIndex()); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) - ) - ); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - 0, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) ) ); assertEquals( numOfShards, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - 0, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - } - - // Test reset routing table for 2 indices, both remote backed but only once index has RemoteStoreRecoverySource. - // Routing table for only remote index without RemoteStoreRecoverySource should be updated - { - final IndexMetadata remoteWithoutRemoteRecoveryMetadata = createIndexMetadata( - "test-remote-without-recovery", - Settings.builder() - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards) - .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) - .build() - ); - IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) - .initializeAsRemoteStoreRestore( - remoteMetadata, - new RecoverySource.RemoteStoreRecoverySource( - UUIDs.randomBase64UUID(), - remoteMetadata.getCreationVersion(), - new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) - ), - new HashMap<>(), - true - ); - IndexRoutingTable.Builder remoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder( - remoteWithoutRemoteRecoveryMetadata.getIndex() - ).initializeAsNew(remoteWithoutRemoteRecoveryMetadata); - final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) - .metadata(Metadata.builder().put(remoteMetadata, false).build()) - .metadata(Metadata.builder().put(remoteWithoutRemoteRecoveryMetadata, false).build()) - .routingTable( - new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()) - .add(remoteBuilderWithoutRemoteRecovery.build()) - .build() - ) - .build(); - assertTrue(initialState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(initialState.routingTable().hasIndex(remoteWithoutRemoteRecoveryMetadata.getIndex())); - final ClusterState newState = updateRoutingTable(initialState); - assertTrue(newState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(newState.routingTable().hasIndex(remoteWithoutRemoteRecoveryMetadata.getIndex())); - IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); - IndexRoutingTable newRemoteWithoutRemoteRecoveryIndexRoutingTable = newState.routingTable() - .index(remoteWithoutRemoteRecoveryMetadata.getIndex()); - assertEquals( - 0, newRemoteIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) ) ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) - ) - ); assertEquals( 0, newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - 0, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) - ) - ); - assertEquals( - numOfShards, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - 0, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource ) ); assertEquals( numOfShards, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource ) ); + } } From e12ab0f9ed5cfa32fbbcf654253180a72e50d0d7 Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Fri, 20 Oct 2023 18:18:36 +0530 Subject: [PATCH 03/45] Fix flaky remote cluster state UT (#10780) Signed-off-by: Dhwanil Patel --- .../gateway/remote/RemoteClusterStateServiceTests.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 433eac63e9580..bcc58789dd6fd 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -458,7 +458,7 @@ public void testGlobalMetadataOnlyUpdated() throws IOException { mockBlobStoreObjects(); final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); final ClusterState initialClusterState = ClusterState.builder(ClusterName.DEFAULT) - .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata).version(randomNonNegativeLong())) .build(); final ClusterMetadataManifest initialManifest = ClusterMetadataManifest.builder() .codecVersion(2) @@ -479,6 +479,7 @@ public void testGlobalMetadataOnlyUpdated() throws IOException { // new cluster state where only global metadata is different Metadata newMetadata = Metadata.builder(clusterState.metadata()) .persistentSettings(Settings.builder().put("cluster.blocks.read_only", true).build()) + .version(randomNonNegativeLong()) .build(); ClusterState newClusterState = ClusterState.builder(clusterState).metadata(newMetadata).build(); @@ -1281,7 +1282,7 @@ private static ClusterState.Builder generateClusterStateWithOneIndex() { .version(1L) .stateUUID("state-uuid") .metadata( - Metadata.builder().put(indexMetadata, true).clusterUUID("cluster-uuid").coordinationMetadata(coordinationMetadata).build() + Metadata.builder().version(randomNonNegativeLong()).put(indexMetadata, true).clusterUUID("cluster-uuid").coordinationMetadata(coordinationMetadata).build() ); } From 5093cc71476f5796f3b91fb18ff867cfe37588c0 Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Fri, 20 Oct 2023 19:18:25 +0530 Subject: [PATCH 04/45] Fix spotless failure (#10782) Signed-off-by: Dhwanil Patel --- .../gateway/remote/RemoteClusterStateServiceTests.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index bcc58789dd6fd..5202f31c514ed 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -1282,7 +1282,12 @@ private static ClusterState.Builder generateClusterStateWithOneIndex() { .version(1L) .stateUUID("state-uuid") .metadata( - Metadata.builder().version(randomNonNegativeLong()).put(indexMetadata, true).clusterUUID("cluster-uuid").coordinationMetadata(coordinationMetadata).build() + Metadata.builder() + .version(randomNonNegativeLong()) + .put(indexMetadata, true) + .clusterUUID("cluster-uuid") + .coordinationMetadata(coordinationMetadata) + .build() ); } From e691df09c66dcc1693897543fd7633c4b208ce48 Mon Sep 17 00:00:00 2001 From: rayshrey <121871912+rayshrey@users.noreply.github.com> Date: Fri, 20 Oct 2023 20:36:20 +0530 Subject: [PATCH 05/45] Add tracing instrumentation for indexing paths (#10273) * Add tracing instrumentation for indexing paths Signed-off-by: Shreyansh Ray * Fix failing tests and review changes Signed-off-by: Shreyansh Ray * Fix test failures due to Span not being properly closed Signed-off-by: Shreyansh Ray * Changes to spans in primary and replica actions Signed-off-by: Shreyansh Ray * Review comments fixes and refactoring Signed-off-by: Shreyansh Ray * Precommit auto-changes Signed-off-by: Shreyansh Ray * Add refresh policy as attribute Signed-off-by: Shreyansh Ray * Fix changelog entry Signed-off-by: Shreyansh Ray * Instrument primary/replica write in TransportWriteAction instead of TransportShardBulkAction Signed-off-by: Shreyansh Ray * Modify SpanBuilder Signed-off-by: Shreyansh Ray * spotlessApply and precommit Signed-off-by: Shreyansh Ray * Change span names Signed-off-by: Shreyansh Ray * Pass Noop Tracer instead of injected tracer Signed-off-by: Shreyansh Ray * Reverting previous changes Signed-off-by: Shreyansh Ray * Remove tracer variable from TransportShardBulkAction Signed-off-by: Shreyansh Ray --------- Signed-off-by: Shreyansh Ray --- CHANGELOG.md | 1 + .../action/bulk/TransportBulkAction.java | 110 +++++++++++------- .../action/bulk/TransportShardBulkAction.java | 7 +- .../TransportResyncReplicationAction.java | 7 +- .../replication/TransportWriteAction.java | 24 +++- .../index/seqno/RetentionLeaseSyncAction.java | 7 +- .../telemetry/tracing/AttributeNames.java | 25 ++++ .../telemetry/tracing/SpanBuilder.java | 20 ++++ ...ActionIndicesThatCannotBeCreatedTests.java | 4 +- .../bulk/TransportBulkActionIngestTests.java | 4 +- .../action/bulk/TransportBulkActionTests.java | 3 +- .../bulk/TransportBulkActionTookTests.java | 3 +- .../bulk/TransportShardBulkActionTests.java | 13 ++- ...TransportResyncReplicationActionTests.java | 6 +- ...rtWriteActionForIndexingPressureTests.java | 3 +- .../TransportWriteActionTests.java | 6 +- .../seqno/RetentionLeaseSyncActionTests.java | 12 +- .../snapshots/SnapshotResiliencyTests.java | 9 +- 18 files changed, 192 insertions(+), 72 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c52c43a35b8d..d7d492679c79d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -113,6 +113,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote Store] Add Remote Store backpressure rejection stats to `_nodes/stats` ([#10524](https://github.com/opensearch-project/OpenSearch/pull/10524)) - [BUG] Fix java.lang.SecurityException in repository-gcs plugin ([#10642](https://github.com/opensearch-project/OpenSearch/pull/10642)) - Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395)) +- Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273)) ### Deprecated diff --git a/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java b/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java index 726ba7ba119af..4a9b07c12821d 100644 --- a/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java +++ b/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java @@ -85,6 +85,11 @@ import org.opensearch.ingest.IngestService; import org.opensearch.node.NodeClosedException; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Span; +import org.opensearch.telemetry.tracing.SpanBuilder; +import org.opensearch.telemetry.tracing.SpanScope; +import org.opensearch.telemetry.tracing.Tracer; +import org.opensearch.telemetry.tracing.listener.TraceableActionListener; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; import org.opensearch.transport.TransportService; @@ -133,6 +138,7 @@ public class TransportBulkAction extends HandledTransportAction() { - @Override - public void onResponse(BulkShardResponse bulkShardResponse) { - for (BulkItemResponse bulkItemResponse : bulkShardResponse.getResponses()) { - // we may have no response if item failed - if (bulkItemResponse.getResponse() != null) { - bulkItemResponse.getResponse().setShardInfo(bulkShardResponse.getShardInfo()); - } - docStatusStats.inc(bulkItemResponse.status()); - responses.set(bulkItemResponse.getItemId(), bulkItemResponse); - } + final Span span = tracer.startSpan(SpanBuilder.from("bulkShardAction", nodeId, bulkShardRequest)); + try (SpanScope spanScope = tracer.withSpanInScope(span)) { + shardBulkAction.execute( + bulkShardRequest, + TraceableActionListener.create(ActionListener.runBefore(new ActionListener() { + @Override + public void onResponse(BulkShardResponse bulkShardResponse) { + for (BulkItemResponse bulkItemResponse : bulkShardResponse.getResponses()) { + // we may have no response if item failed + if (bulkItemResponse.getResponse() != null) { + bulkItemResponse.getResponse().setShardInfo(bulkShardResponse.getShardInfo()); + } - if (counter.decrementAndGet() == 0) { - finishHim(); - } - } + docStatusStats.inc(bulkItemResponse.status()); + responses.set(bulkItemResponse.getItemId(), bulkItemResponse); + } - @Override - public void onFailure(Exception e) { - // create failures for all relevant requests - for (BulkItemRequest request : requests) { - final String indexName = concreteIndices.getConcreteIndex(request.index()).getName(); - final DocWriteRequest docWriteRequest = request.request(); - final BulkItemResponse bulkItemResponse = new BulkItemResponse( - request.id(), - docWriteRequest.opType(), - new BulkItemResponse.Failure(indexName, docWriteRequest.id(), e) - ); + if (counter.decrementAndGet() == 0) { + finishHim(); + } + } - docStatusStats.inc(bulkItemResponse.status()); - responses.set(request.id(), bulkItemResponse); - } + @Override + public void onFailure(Exception e) { + // create failures for all relevant requests + for (BulkItemRequest request : requests) { + final String indexName = concreteIndices.getConcreteIndex(request.index()).getName(); + final DocWriteRequest docWriteRequest = request.request(); + final BulkItemResponse bulkItemResponse = new BulkItemResponse( + request.id(), + docWriteRequest.opType(), + new BulkItemResponse.Failure(indexName, docWriteRequest.id(), e) + ); + + docStatusStats.inc(bulkItemResponse.status()); + responses.set(request.id(), bulkItemResponse); + } - if (counter.decrementAndGet() == 0) { - finishHim(); - } - } + if (counter.decrementAndGet() == 0) { + finishHim(); + } + } - private void finishHim() { - indicesService.addDocStatusStats(docStatusStats); - listener.onResponse( - new BulkResponse(responses.toArray(new BulkItemResponse[responses.length()]), buildTookInMillis(startTimeNanos)) - ); - } - }, releasable::close)); + private void finishHim() { + indicesService.addDocStatusStats(docStatusStats); + listener.onResponse( + new BulkResponse( + responses.toArray(new BulkItemResponse[responses.length()]), + buildTookInMillis(startTimeNanos) + ) + ); + } + }, releasable::close), span, tracer) + ); + } catch (Exception e) { + span.setError(e); + span.endSpan(); + throw e; + } } bulkRequest = null; // allow memory for bulk request items to be reclaimed before all items have been completed } diff --git a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java index fddda0ef1f9a7..268a6ed6f85b8 100644 --- a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java +++ b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java @@ -99,6 +99,7 @@ import org.opensearch.indices.SystemIndices; import org.opensearch.node.NodeClosedException; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; import org.opensearch.transport.TransportChannel; @@ -161,7 +162,8 @@ public TransportShardBulkAction( IndexingPressureService indexingPressureService, SegmentReplicationPressureService segmentReplicationPressureService, RemoteStorePressureService remoteStorePressureService, - SystemIndices systemIndices + SystemIndices systemIndices, + Tracer tracer ) { super( settings, @@ -177,7 +179,8 @@ public TransportShardBulkAction( EXECUTOR_NAME_FUNCTION, false, indexingPressureService, - systemIndices + systemIndices, + tracer ); this.updateHelper = updateHelper; this.mappingUpdatedAction = mappingUpdatedAction; diff --git a/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java b/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java index 032fe83e2220b..9d60706d1f100 100644 --- a/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java +++ b/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java @@ -54,6 +54,7 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; import org.opensearch.transport.TransportException; @@ -93,7 +94,8 @@ public TransportResyncReplicationAction( ShardStateAction shardStateAction, ActionFilters actionFilters, IndexingPressureService indexingPressureService, - SystemIndices systemIndices + SystemIndices systemIndices, + Tracer tracer ) { super( settings, @@ -109,7 +111,8 @@ public TransportResyncReplicationAction( EXECUTOR_NAME_FUNCTION, true, /* we should never reject resync because of thread pool capacity on primary */ indexingPressureService, - systemIndices + systemIndices, + tracer ); } diff --git a/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java b/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java index a0b5299805868..9ebfa8cfd0df8 100644 --- a/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java +++ b/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java @@ -59,6 +59,11 @@ import org.opensearch.index.translog.Translog.Location; import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; +import org.opensearch.telemetry.tracing.Span; +import org.opensearch.telemetry.tracing.SpanBuilder; +import org.opensearch.telemetry.tracing.SpanScope; +import org.opensearch.telemetry.tracing.Tracer; +import org.opensearch.telemetry.tracing.listener.TraceableActionListener; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; @@ -82,6 +87,7 @@ public abstract class TransportWriteAction< protected final SystemIndices systemIndices; private final Function executorFunction; + private final Tracer tracer; protected TransportWriteAction( Settings settings, @@ -97,7 +103,8 @@ protected TransportWriteAction( Function executorFunction, boolean forceExecutionOnPrimary, IndexingPressureService indexingPressureService, - SystemIndices systemIndices + SystemIndices systemIndices, + Tracer tracer ) { // We pass ThreadPool.Names.SAME to the super class as we control the dispatching to the // ThreadPool.Names.WRITE/ThreadPool.Names.SYSTEM_WRITE thread pools in this class. @@ -119,6 +126,7 @@ protected TransportWriteAction( this.executorFunction = executorFunction; this.indexingPressureService = indexingPressureService; this.systemIndices = systemIndices; + this.tracer = tracer; } protected String executor(IndexShard shard) { @@ -220,7 +228,12 @@ protected void shardOperationOnPrimary( threadPool.executor(executor).execute(new ActionRunnable>(listener) { @Override protected void doRun() { - dispatchedShardOperationOnPrimary(request, primary, listener); + Span span = tracer.startSpan( + SpanBuilder.from("dispatchedShardOperationOnPrimary", clusterService.localNode().getId(), request) + ); + try (SpanScope spanScope = tracer.withSpanInScope(span)) { + dispatchedShardOperationOnPrimary(request, primary, TraceableActionListener.create(listener, span, tracer)); + } } @Override @@ -248,7 +261,12 @@ protected void shardOperationOnReplica(ReplicaRequest request, IndexShard replic threadPool.executor(executorFunction.apply(replica)).execute(new ActionRunnable(listener) { @Override protected void doRun() { - dispatchedShardOperationOnReplica(request, replica, listener); + Span span = tracer.startSpan( + SpanBuilder.from("dispatchedShardOperationOnReplica", clusterService.localNode().getId(), request) + ); + try (SpanScope spanScope = tracer.withSpanInScope(span)) { + dispatchedShardOperationOnReplica(request, replica, TraceableActionListener.create(listener, span, tracer)); + } } @Override diff --git a/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java b/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java index f74fc7eefe65c..ca3c7e1d49700 100644 --- a/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java +++ b/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java @@ -62,6 +62,7 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportException; import org.opensearch.transport.TransportResponseHandler; @@ -99,7 +100,8 @@ public RetentionLeaseSyncAction( final ShardStateAction shardStateAction, final ActionFilters actionFilters, final IndexingPressureService indexingPressureService, - final SystemIndices systemIndices + final SystemIndices systemIndices, + final Tracer tracer ) { super( settings, @@ -115,7 +117,8 @@ public RetentionLeaseSyncAction( ignore -> ThreadPool.Names.MANAGEMENT, false, indexingPressureService, - systemIndices + systemIndices, + tracer ); } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java b/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java index a9514c298ef88..b6b2cf360d1c5 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java @@ -69,4 +69,29 @@ private AttributeNames() { * Action Name. */ public static final String TRANSPORT_ACTION = "action"; + + /** + * Index Name + */ + public static final String INDEX = "index"; + + /** + * Shard ID + */ + public static final String SHARD_ID = "shard_id"; + + /** + * Number of request items in bulk request + */ + public static final String BULK_REQUEST_ITEMS = "bulk_request_items"; + + /** + * Node ID + */ + public static final String NODE_ID = "node_id"; + + /** + * Refresh Policy + */ + public static final String REFRESH_POLICY = "refresh_policy"; } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java b/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java index d97fbd371ab2a..1dce422943b7a 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java @@ -8,6 +8,8 @@ package org.opensearch.telemetry.tracing; +import org.opensearch.action.bulk.BulkShardRequest; +import org.opensearch.action.support.replication.ReplicatedWriteRequest; import org.opensearch.common.annotation.InternalApi; import org.opensearch.core.common.Strings; import org.opensearch.http.HttpRequest; @@ -68,6 +70,10 @@ public static SpanCreationContext from(String action, Transport.Connection conne return SpanCreationContext.server().name(createSpanName(action, connection)).attributes(buildSpanAttributes(action, connection)); } + public static SpanCreationContext from(String spanName, String nodeId, ReplicatedWriteRequest request) { + return SpanCreationContext.server().name(spanName).attributes(buildSpanAttributes(nodeId, request)); + } + private static String createSpanName(HttpRequest httpRequest) { return httpRequest.method().name() + SEPARATOR + httpRequest.uri(); } @@ -150,4 +156,18 @@ private static Attributes buildSpanAttributes(String action, TcpChannel tcpChann return attributes; } + private static Attributes buildSpanAttributes(String nodeId, ReplicatedWriteRequest request) { + Attributes attributes = Attributes.create() + .addAttribute(AttributeNames.NODE_ID, nodeId) + .addAttribute(AttributeNames.REFRESH_POLICY, request.getRefreshPolicy().getValue()); + if (request.shardId() != null) { + attributes.addAttribute(AttributeNames.INDEX, request.shardId().getIndexName()) + .addAttribute(AttributeNames.SHARD_ID, request.shardId().getId()); + } + if (request instanceof BulkShardRequest) { + attributes.addAttribute(AttributeNames.BULK_REQUEST_ITEMS, ((BulkShardRequest) request).items().length); + } + return attributes; + } + } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java index 0f67eff26cbde..cf7080ab2fc06 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java @@ -54,6 +54,7 @@ import org.opensearch.index.VersionType; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; import org.opensearch.threadpool.ThreadPool; @@ -155,7 +156,8 @@ private void indicesThatCannotBeCreatedTestCase( new ClusterService(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null) ), null, - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ) { @Override void executeBulk( diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java index 515f6eae28a34..141c630b94020 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java @@ -70,6 +70,7 @@ import org.opensearch.indices.SystemIndices; import org.opensearch.ingest.IngestService; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; import org.opensearch.threadpool.ThreadPool; @@ -172,7 +173,8 @@ class TestTransportBulkAction extends TransportBulkAction { new ClusterService(SETTINGS, new ClusterSettings(SETTINGS, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null) ), null, - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java index 10cad6fb147a2..6bbd740df7f9c 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java @@ -118,7 +118,8 @@ class TestTransportBulkAction extends TransportBulkAction { new AutoCreateIndex(Settings.EMPTY, clusterService.getClusterSettings(), new Resolver(), new SystemIndices(emptyMap())), new IndexingPressureService(Settings.EMPTY, clusterService), mock(IndicesService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java index 852e3837e1e7a..9d5b4430ea395 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java @@ -282,7 +282,8 @@ static class TestTransportBulkAction extends TransportBulkAction { new IndexingPressureService(Settings.EMPTY, clusterService), null, new SystemIndices(emptyMap()), - relativeTimeProvider + relativeTimeProvider, + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java index fe0fdd07025d9..b325cfa197933 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java @@ -88,6 +88,7 @@ import org.opensearch.index.translog.Translog; import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; +import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; @@ -1074,7 +1075,8 @@ public void testHandlePrimaryTermValidationRequestWithDifferentAllocationId() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); action.handlePrimaryTermValidationRequest( new TransportShardBulkAction.PrimaryTermValidationRequest(aId + "-1", 1, shardId), @@ -1105,7 +1107,8 @@ public void testHandlePrimaryTermValidationRequestWithOlderPrimaryTerm() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); action.handlePrimaryTermValidationRequest( new TransportShardBulkAction.PrimaryTermValidationRequest(aId, 1, shardId), @@ -1136,7 +1139,8 @@ public void testHandlePrimaryTermValidationRequestSuccess() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); action.handlePrimaryTermValidationRequest( new TransportShardBulkAction.PrimaryTermValidationRequest(aId, 1, shardId), @@ -1178,7 +1182,8 @@ private TransportShardBulkAction createAction() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java b/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java index 3bd8930064563..da87a0a967f53 100644 --- a/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java +++ b/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java @@ -203,7 +203,8 @@ public void testResyncDoesNotBlockOnPrimaryAction() throws Exception { shardStateAction, new ActionFilters(new HashSet<>()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); assertThat(action.globalBlockLevel(), nullValue()); @@ -256,7 +257,8 @@ private TransportResyncReplicationAction createAction() { mock(ShardStateAction.class), new ActionFilters(new HashSet<>()), mock(IndexingPressureService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } } diff --git a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java index 4a2185d1558f7..7212b1f5efe13 100644 --- a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java +++ b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java @@ -392,7 +392,8 @@ protected TestAction( ignore -> ThreadPool.Names.SAME, false, TransportWriteActionForIndexingPressureTests.this.indexingPressureService, - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java index 9d2069ac16190..b4549f82230bf 100644 --- a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java @@ -477,7 +477,8 @@ protected TestAction(boolean withDocumentFailureOnPrimary, boolean withDocumentF ignore -> ThreadPool.Names.SAME, false, new IndexingPressureService(Settings.EMPTY, TransportWriteActionTests.this.clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); this.withDocumentFailureOnPrimary = withDocumentFailureOnPrimary; this.withDocumentFailureOnReplica = withDocumentFailureOnReplica; @@ -505,7 +506,8 @@ protected TestAction( ignore -> ThreadPool.Names.SAME, false, new IndexingPressureService(settings, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); this.withDocumentFailureOnPrimary = false; this.withDocumentFailureOnReplica = false; diff --git a/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java b/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java index d9bca55a208c2..63a9ac2f2e8ec 100644 --- a/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java +++ b/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java @@ -125,7 +125,8 @@ public void testRetentionLeaseSyncActionOnPrimary() { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); final RetentionLeases retentionLeases = mock(RetentionLeases.class); final RetentionLeaseSyncAction.Request request = new RetentionLeaseSyncAction.Request(indexShard.shardId(), retentionLeases); @@ -162,7 +163,8 @@ public void testRetentionLeaseSyncActionOnReplica() throws Exception { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); final RetentionLeases retentionLeases = mock(RetentionLeases.class); final RetentionLeaseSyncAction.Request request = new RetentionLeaseSyncAction.Request(indexShard.shardId(), retentionLeases); @@ -203,7 +205,8 @@ public void testBlocks() { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); assertNull(action.indexBlockLevel()); @@ -233,7 +236,8 @@ private RetentionLeaseSyncAction createAction() { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 2f9f38d18a064..710717532ceb4 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -2124,7 +2124,8 @@ public void onFailure(final Exception e) { shardStateAction, actionFilters, new IndexingPressureService(settings, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ) ), new GlobalCheckpointSyncAction( @@ -2190,7 +2191,8 @@ public void onFailure(final Exception e) { mock(ThreadPool.class) ), mock(RemoteStorePressureService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); actions.put( BulkAction.INSTANCE, @@ -2214,7 +2216,8 @@ public void onFailure(final Exception e) { new AutoCreateIndex(settings, clusterSettings, indexNameExpressionResolver, new SystemIndices(emptyMap())), new IndexingPressureService(settings, clusterService), mock(IndicesService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ) ); final RestoreService restoreService = new RestoreService( From a1fde65fe2ad1acda4364cf5fb751e9b494327ce Mon Sep 17 00:00:00 2001 From: Ashish Date: Fri, 20 Oct 2023 21:06:42 +0530 Subject: [PATCH 06/45] [Remote Store] Fix relocation failure due to transport receive timeout (#10761) * [Remote Store] Fix relocation failure due to transport receive timeout Signed-off-by: Ashish Singh * Fix existing extended shardIdle for remote backed shards Signed-off-by: Ashish Singh * Incorporate PR review comments Signed-off-by: Ashish Singh --------- Signed-off-by: Ashish Singh --- .../opensearch/remotestore/RemoteStoreIT.java | 23 +++++++++++++++++++ .../org/opensearch/index/IndexSettings.java | 3 +++ .../opensearch/index/shard/IndexShard.java | 6 ++++- .../translog/InternalTranslogManager.java | 10 ++++---- .../index/translog/RemoteFsTranslog.java | 5 ++++ .../opensearch/index/translog/Translog.java | 4 ++++ .../recovery/PeerRecoverySourceService.java | 3 ++- .../recovery/RemoteRecoveryTargetHandler.java | 13 +++++++++-- .../index/shard/RemoteIndexShardTests.java | 9 ++++++++ .../SegmentReplicationIndexShardTests.java | 12 ++++++---- 10 files changed, 75 insertions(+), 13 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 1fb5c2052aded..b3b4f8e10fd31 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -509,4 +509,27 @@ public void testRestoreSnapshotToIndexWithSameNameDifferentUUID() throws Excepti assertHitCount(client(dataNodes.get(1)).prepareSearch(INDEX_NAME).setSize(0).get(), 50); }); } + + public void testNoSearchIdleForAnyReplicaCount() throws ExecutionException, InterruptedException { + internalCluster().startClusterManagerOnlyNode(); + String primaryShardNode = internalCluster().startDataOnlyNodes(1).get(0); + + createIndex(INDEX_NAME, remoteStoreIndexSettings(0)); + ensureGreen(INDEX_NAME); + IndexShard indexShard = getIndexShard(primaryShardNode); + assertFalse(indexShard.isSearchIdleSupported()); + + String replicaShardNode = internalCluster().startDataOnlyNodes(1).get(0); + assertAcked( + client().admin() + .indices() + .prepareUpdateSettings(INDEX_NAME) + .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)) + ); + ensureGreen(INDEX_NAME); + assertFalse(indexShard.isSearchIdleSupported()); + + indexShard = getIndexShard(replicaShardNode); + assertFalse(indexShard.isSearchIdleSupported()); + } } diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index e90e9259f6a5c..99d2b5a74c406 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -1024,6 +1024,9 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti } private void setSearchIdleAfter(TimeValue searchIdleAfter) { + if (this.isRemoteStoreEnabled) { + logger.warn("Search idle is not supported for remote backed indices"); + } if (this.replicationType == ReplicationType.SEGMENT && this.getNumberOfReplicas() > 0) { logger.warn("Search idle is not supported for indices with replicas using 'replication.type: SEGMENT'"); } diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 5ebfd3863a6cf..1b7d1b2716979 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -4425,7 +4425,6 @@ public final boolean isSearchIdle() { } /** - * * Returns true if this shard supports search idle. *

* Indices using Segment Replication will ignore search idle unless there are no replicas. @@ -4434,6 +4433,11 @@ public final boolean isSearchIdle() { * a new set of segments. */ public final boolean isSearchIdleSupported() { + // If the index is remote store backed, then search idle is not supported. This is to ensure that async refresh + // task continues to upload to remote store periodically. + if (isRemoteTranslogEnabled()) { + return false; + } return indexSettings.isSegRepEnabled() == false || indexSettings.getNumberOfReplicas() == 0; } diff --git a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java index 85c52b907d326..4d0fc13d433c6 100644 --- a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java +++ b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java @@ -430,10 +430,10 @@ public String getTranslogUUID() { * @return if the translog should be flushed */ public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long flushThreshold) { - final long translogGenerationOfLastCommit = translog.getMinGenerationForSeqNo( - localCheckpointOfLastCommit + 1 - ).translogFileGeneration; - if (translog.sizeInBytesByMinGen(translogGenerationOfLastCommit) < flushThreshold) { + // This is the minimum seqNo that is referred in translog and considered for calculating translog size + long minTranslogRefSeqNo = translog.getMinUnreferencedSeqNoInSegments(localCheckpointOfLastCommit + 1); + final long minReferencedTranslogGeneration = translog.getMinGenerationForSeqNo(minTranslogRefSeqNo).translogFileGeneration; + if (translog.sizeInBytesByMinGen(minReferencedTranslogGeneration) < flushThreshold) { return false; } /* @@ -454,7 +454,7 @@ public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long fl final long translogGenerationOfNewCommit = translog.getMinGenerationForSeqNo( localCheckpointTrackerSupplier.get().getProcessedCheckpoint() + 1 ).translogFileGeneration; - return translogGenerationOfLastCommit < translogGenerationOfNewCommit + return minReferencedTranslogGeneration < translogGenerationOfNewCommit || localCheckpointTrackerSupplier.get().getProcessedCheckpoint() == localCheckpointTrackerSupplier.get().getMaxSeqNo(); } diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index 29c825fd383c5..2dd9b1a545d4a 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -544,4 +544,9 @@ public void onUploadFailed(TransferSnapshot transferSnapshot, Exception ex) thro } } } + + @Override + public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommit) { + return minSeqNoToKeep; + } } diff --git a/server/src/main/java/org/opensearch/index/translog/Translog.java b/server/src/main/java/org/opensearch/index/translog/Translog.java index cf7f18840a03e..b44aa6e059224 100644 --- a/server/src/main/java/org/opensearch/index/translog/Translog.java +++ b/server/src/main/java/org/opensearch/index/translog/Translog.java @@ -2034,4 +2034,8 @@ public static String createEmptyTranslog( writer.close(); return uuid; } + + public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommit) { + return minUnrefCheckpointInLastCommit; + } } diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java index 6c7632a8a408d..cb2bedf00de99 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java @@ -376,7 +376,8 @@ private Tuple createRecovery transportService, request.targetNode(), recoverySettings, - throttleTime -> shard.recoveryStats().addThrottleTime(throttleTime) + throttleTime -> shard.recoveryStats().addThrottleTime(throttleTime), + shard.isRemoteTranslogEnabled() ); handler = RecoverySourceHandlerFactory.create(shard, recoveryTarget, request, recoverySettings); return Tuple.tuple(handler, recoveryTarget); diff --git a/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java b/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java index 66f5b13449f05..37227596fdfe7 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java @@ -75,6 +75,7 @@ public class RemoteRecoveryTargetHandler implements RecoveryTargetHandler { private final AtomicLong requestSeqNoGenerator = new AtomicLong(0); private final RetryableTransportClient retryableTransportClient; private final RemoteSegmentFileChunkWriter fileChunkWriter; + private final boolean remoteStoreEnabled; public RemoteRecoveryTargetHandler( long recoveryId, @@ -82,7 +83,8 @@ public RemoteRecoveryTargetHandler( TransportService transportService, DiscoveryNode targetNode, RecoverySettings recoverySettings, - Consumer onSourceThrottle + Consumer onSourceThrottle, + boolean remoteStoreEnabled ) { this.transportService = transportService; // It is safe to pass the retry timeout value here because RemoteRecoveryTargetHandler @@ -111,6 +113,7 @@ public RemoteRecoveryTargetHandler( requestSeqNoGenerator, onSourceThrottle ); + this.remoteStoreEnabled = remoteStoreEnabled; } public DiscoveryNode targetNode() { @@ -129,7 +132,13 @@ public void prepareForTranslogOperations(int totalTranslogOps, ActionListener reader = in -> TransportResponse.Empty.INSTANCE; final ActionListener responseListener = ActionListener.map(listener, r -> null); - retryableTransportClient.executeRetryableAction(action, request, responseListener, reader); + if (remoteStoreEnabled) { + // If remote store is enabled, during the prepare_translog phase, translog is also downloaded on the + // target host along with incremental segments download. + retryableTransportClient.executeRetryableAction(action, request, translogOpsRequestOptions, responseListener, reader); + } else { + retryableTransportClient.executeRetryableAction(action, request, responseListener, reader); + } } @Override diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java index fe389e3b3fcb4..703a7d457d5b6 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java @@ -471,6 +471,15 @@ public void onReplicationFailure( } } + @Override + protected void validateShardIdleWithNoReplicas(IndexShard primary) { + // ensure search idle conditions are met. + assertFalse(primary.isSearchIdleSupported()); + assertTrue(primary.isSearchIdle()); + assertTrue(primary.scheduledRefresh()); + assertFalse(primary.hasRefreshPending()); + } + private void assertSingleSegmentFile(IndexShard shard, String fileName) throws IOException { final Set segmentsFileNames = Arrays.stream(shard.store().directory().listAll()) .filter(file -> file.startsWith(IndexFileNames.SEGMENTS)) diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java index eab38bfe5c64d..7caff3e5f5479 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java @@ -436,13 +436,17 @@ public void testShardIdleWithNoReplicas() throws Exception { shards.startAll(); final IndexShard primary = shards.getPrimary(); shards.indexDocs(randomIntBetween(1, 10)); - // ensure search idle conditions are met. - assertTrue(primary.isSearchIdle()); - assertFalse(primary.scheduledRefresh()); - assertTrue(primary.hasRefreshPending()); + validateShardIdleWithNoReplicas(primary); } } + protected void validateShardIdleWithNoReplicas(IndexShard primary) { + // ensure search idle conditions are met. + assertTrue(primary.isSearchIdle()); + assertFalse(primary.scheduledRefresh()); + assertTrue(primary.hasRefreshPending()); + } + /** * here we are starting a new primary shard in PrimaryMode and testing if the shard publishes checkpoint after refresh. */ From ffe9371b38f98a2c9314bb6cfed4f8f224ff085c Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Fri, 20 Oct 2023 12:40:04 -0400 Subject: [PATCH 07/45] Update Github pull request template to have a task for inspecting failing checks (#10751) * Update Github pull request template to have a task for inspecting failing checks Signed-off-by: Andriy Redko * Add failing builds troubleshooting section to CONTRIBUTING.md Signed-off-by: Andriy Redko * Address review comments Signed-off-by: Andriy Redko --------- Signed-off-by: Andriy Redko --- .github/pull_request_template.md | 1 + CONTRIBUTING.md | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index c47b9e0b69256..908a032bf833e 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -17,6 +17,7 @@ Resolves #[Issue number to be closed when this PR is merged] - [ ] All tests pass - [ ] New functionality has been documented. - [ ] New functionality has javadoc added +- [ ] Failing checks are inspected and point to the corresponding known issue(s) (See: [Troubleshooting Failing Builds](../blob/main/CONTRIBUTING.md#troubleshooting-failing-builds)) - [ ] Commits are signed per the DCO using --signoff - [ ] Commit changes are listed out in CHANGELOG.md file (See: [Changelog](../blob/main/CONTRIBUTING.md#changelog)) - [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d379d78829318..4a1162cf2558b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,6 +8,7 @@ - [Developer Certificate of Origin](#developer-certificate-of-origin) - [Changelog](#changelog) - [Review Process](#review-process) + - [Troubleshooting Failing Builds](#troubleshooting-failing-builds) # Contributing to OpenSearch @@ -162,3 +163,14 @@ During the PR process, expect that there will be some back-and-forth. Please try If we accept the PR, a [maintainer](MAINTAINERS.md) will merge your change and usually take care of backporting it to appropriate branches ourselves. If we reject the PR, we will close the pull request with a comment explaining why. This decision isn't always final: if you feel we have misunderstood your intended change or otherwise think that we should reconsider then please continue the conversation with a comment on the PR and we'll do our best to address any further points you raise. + +## Troubleshooting Failing Builds + +The OpenSearch testing framework offers many capabilities but exhibits significant complexity (it does lot of randomization internally to cover as many edge cases and variations as possible). Unfortunately, this posses a challenge by making it harder to discover important issues/bugs in straightforward way and may lead to so called flaky tests - the tests which flip randomly from success to failure without any code changes. + +If your pull request reports a failing test(s) on one of the checks, please: + - look if there is an existing [issue](https://github.com/opensearch-project/OpenSearch/issues) reported for the test in question + - if not, please make sure this is not caused by your changes, run the failing test(s) locally for some time + - if you are sure the failure is not related, please open a new [bug](https://github.com/opensearch-project/OpenSearch/issues/new?assignees=&labels=bug%2C+untriaged&projects=&template=bug_template.md&title=%5BBUG%5D) with `flaky-test` label + - add a comment referencing the issue(s) or bug report(s) to your pull request explaining the failing build(s) + - as a bonus point, try to contribute by fixing the flaky test(s) From 1e28738b8c966011bf1ae1f00431f0377761cb0a Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Fri, 20 Oct 2023 12:17:51 -0500 Subject: [PATCH 08/45] Increase remote recovery thread pool size (#10750) The remote recovery thread pool does blocking I/O when downloading files, so the "half processor count max 10" was definitely too small. This can be shown by triggering recoveries on a node that is also doing segment replication, and the replication lag will increase due to contention on that thread pool. Some amount of contention is inevitable, but the change here to increase the download thread pool, and also limit the concurrent usage of that thread pool by any single recovery/replication to 25% of the threads does help. Long term, we can improve this even further by moving to fully async I/O to avoid blocking threads in the application on draining InputStreams. Signed-off-by: Andrew Ross --- .../org/opensearch/indices/recovery/RecoverySettings.java | 7 ++++--- .../main/java/org/opensearch/threadpool/ThreadPool.java | 7 ++++++- .../org/opensearch/threadpool/ScalingThreadPoolTests.java | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java b/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java index 44dfb2f4cb00a..0f3025369833d 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java @@ -41,6 +41,7 @@ import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.core.common.unit.ByteSizeValue; @@ -87,10 +88,10 @@ public class RecoverySettings { /** * Controls the maximum number of streams that can be started concurrently per recovery when downloading from the remote store. */ - public static final Setting INDICES_RECOVERY_MAX_CONCURRENT_REMOTE_STORE_STREAMS_SETTING = Setting.intSetting( + public static final Setting INDICES_RECOVERY_MAX_CONCURRENT_REMOTE_STORE_STREAMS_SETTING = new Setting<>( "indices.recovery.max_concurrent_remote_store_streams", - 10, - 1, + (s) -> Integer.toString(Math.max(1, OpenSearchExecutors.allocatedProcessors(s) / 2)), + (s) -> Setting.parseInt(s, 1, "indices.recovery.max_concurrent_remote_store_streams"), Property.Dynamic, Property.NodeScope ); diff --git a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java index fab7620292dd2..5f10986239300 100644 --- a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java +++ b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java @@ -273,7 +273,12 @@ public ThreadPool( ); builders.put( Names.REMOTE_RECOVERY, - new ScalingExecutorBuilder(Names.REMOTE_RECOVERY, 1, halfProcMaxAt10, TimeValue.timeValueMinutes(5)) + new ScalingExecutorBuilder( + Names.REMOTE_RECOVERY, + 1, + twiceAllocatedProcessors(allocatedProcessors), + TimeValue.timeValueMinutes(5) + ) ); if (FeatureFlags.isEnabled(FeatureFlags.CONCURRENT_SEGMENT_SEARCH)) { builders.put( diff --git a/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java b/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java index ba2d4b8c247bb..19271bbf30e80 100644 --- a/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java +++ b/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java @@ -154,7 +154,7 @@ private int expectedSize(final String threadPoolName, final int numberOfProcesso sizes.put(ThreadPool.Names.TRANSLOG_SYNC, n -> 4 * n); sizes.put(ThreadPool.Names.REMOTE_PURGE, ThreadPool::halfAllocatedProcessorsMaxFive); sizes.put(ThreadPool.Names.REMOTE_REFRESH_RETRY, ThreadPool::halfAllocatedProcessorsMaxTen); - sizes.put(ThreadPool.Names.REMOTE_RECOVERY, ThreadPool::halfAllocatedProcessorsMaxTen); + sizes.put(ThreadPool.Names.REMOTE_RECOVERY, ThreadPool::twiceAllocatedProcessors); return sizes.get(threadPoolName).apply(numberOfProcessors); } From 4f8bcff554fe3b019d4149ddcdcc634120c67dd3 Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Fri, 20 Oct 2023 15:57:17 -0700 Subject: [PATCH 09/45] Return unformatted Segment Replication metrics that take upload time into account for replication lag (#10723) * Return unformatted segrep metrics in stats Signed-off-by: Poojita Raj * Take upload time into account for replication time lag Signed-off-by: Poojita Raj * unformat segrep stats Signed-off-by: Poojita Raj * remove unused field names Signed-off-by: Poojita Raj --------- Signed-off-by: Poojita Raj --- .../main/java/org/opensearch/index/ReplicationStats.java | 8 +++----- .../main/java/org/opensearch/index/shard/IndexShard.java | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/ReplicationStats.java b/server/src/main/java/org/opensearch/index/ReplicationStats.java index 9cc6685c75f80..0ae4526365bf1 100644 --- a/server/src/main/java/org/opensearch/index/ReplicationStats.java +++ b/server/src/main/java/org/opensearch/index/ReplicationStats.java @@ -8,11 +8,9 @@ package org.opensearch.index; -import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; -import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; @@ -76,9 +74,9 @@ public void writeTo(StreamOutput out) throws IOException { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(Fields.SEGMENT_REPLICATION); - builder.field(Fields.MAX_BYTES_BEHIND, new ByteSizeValue(maxBytesBehind).toString()); - builder.field(Fields.TOTAL_BYTES_BEHIND, new ByteSizeValue(totalBytesBehind).toString()); - builder.field(Fields.MAX_REPLICATION_LAG, new TimeValue(maxReplicationLag)); + builder.field(Fields.MAX_BYTES_BEHIND, maxBytesBehind); + builder.field(Fields.TOTAL_BYTES_BEHIND, totalBytesBehind); + builder.field(Fields.MAX_REPLICATION_LAG, maxReplicationLag); builder.endObject(); return builder; } diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 1b7d1b2716979..f990a3b56e856 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -3010,7 +3010,7 @@ public ReplicationStats getReplicationStats() { long maxBytesBehind = stats.stream().mapToLong(SegmentReplicationShardStats::getBytesBehindCount).max().orElse(0L); long totalBytesBehind = stats.stream().mapToLong(SegmentReplicationShardStats::getBytesBehindCount).sum(); long maxReplicationLag = stats.stream() - .mapToLong(SegmentReplicationShardStats::getCurrentReplicationTimeMillis) + .mapToLong(SegmentReplicationShardStats::getCurrentReplicationLagMillis) .max() .orElse(0L); return new ReplicationStats(maxBytesBehind, totalBytesBehind, maxReplicationLag); From 1e9ec52dd9d5e7d4a3ffb7d37c8b7fdf4069c26d Mon Sep 17 00:00:00 2001 From: Rishikesh Pasham <62345295+Rishikesh1159@users.noreply.github.com> Date: Fri, 20 Oct 2023 16:10:31 -0700 Subject: [PATCH 10/45] [Segment Replication] Fix Flaky test SegmentReplicationRelocationIT.testPrimaryRelocation (#10701) * Add primary mode check before assserting on primary mode. Signed-off-by: Rishikesh1159 * remove unnecessary shardRouting check. Signed-off-by: Rishikesh1159 * Add test logging. Signed-off-by: Rishikesh1159 * Addressing comments on PR. Signed-off-by: Rishikesh1159 --------- Signed-off-by: Rishikesh1159 --- .../replication/SegmentReplicationRelocationIT.java | 2 ++ .../replication/SegmentReplicationSourceHandler.java | 9 --------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java index dd832a63d1e66..dbe0b43441f54 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java @@ -26,6 +26,7 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.IndicesService; import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.junit.annotations.TestLogging; import org.opensearch.test.transport.MockTransportService; import org.opensearch.transport.TransportService; @@ -55,6 +56,7 @@ private void createIndex(int replicaCount) { * This test verifies happy path when primary shard is relocated newly added node (target) in the cluster. Before * relocation and after relocation documents are indexed and documents are verified */ + @TestLogging(reason = "Getting trace logs from replication,shard and allocation package", value = "org.opensearch.indices.replication:TRACE, org.opensearch.index.shard:TRACE, org.opensearch.cluster.routing.allocation:TRACE") public void testPrimaryRelocation() throws Exception { final String oldPrimary = internalCluster().startNode(); createIndex(1); diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java index e2c47b0fb3159..674c09311c645 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java @@ -12,8 +12,6 @@ import org.opensearch.OpenSearchException; import org.opensearch.action.StepListener; import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.cluster.routing.IndexShardRoutingTable; -import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.logging.Loggers; import org.opensearch.common.util.CancellableThreads; import org.opensearch.common.util.concurrent.ListenableFuture; @@ -22,7 +20,6 @@ import org.opensearch.core.action.ActionListener; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.store.StoreFileMetadata; -import org.opensearch.indices.recovery.DelayRecoveryException; import org.opensearch.indices.recovery.FileChunkWriter; import org.opensearch.indices.recovery.MultiChunkTransfer; import org.opensearch.indices.replication.common.CopyState; @@ -146,12 +143,6 @@ public synchronized void sendFiles(GetSegmentFilesRequest request, ActionListene ); }; cancellableThreads.checkForCancel(); - final IndexShardRoutingTable routingTable = shard.getReplicationGroup().getRoutingTable(); - ShardRouting targetShardRouting = routingTable.getByAllocationId(request.getTargetAllocationId()); - if (targetShardRouting == null) { - logger.debug("delaying replication of {} as it is not listed as assigned to target node {}", shard.shardId(), targetNode); - throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node"); - } final StepListener sendFileStep = new StepListener<>(); Set storeFiles = new HashSet<>(Arrays.asList(shard.store().directory().listAll())); From 51626d03f857ca840280d1e57cfb1bdfbba75e2d Mon Sep 17 00:00:00 2001 From: Rishikesh Pasham <62345295+Rishikesh1159@users.noreply.github.com> Date: Fri, 20 Oct 2023 17:13:43 -0700 Subject: [PATCH 11/45] [Segment Replication] Add Segment Replication backpressure rejection stats to _nodes/stats (#10656) * Initial WIP for adding segrep backpressure to node stats. Signed-off-by: Rishikesh1159 * Bind SegmentReplicarionStatsTracker in Node.java Signed-off-by: Rishikesh1159 * remove additional segrep backpressure info from node stats Signed-off-by: Rishikesh1159 * fix metric name in node stats Signed-off-by: Rishikesh1159 * Fix compile error. Signed-off-by: Rishikesh1159 * Fix compile errors. Signed-off-by: Rishikesh1159 * Address comments on PR. Signed-off-by: Rishikesh1159 * Update java docs. Signed-off-by: Rishikesh1159 * Address comments on PR and fix compile errors. Signed-off-by: Rishikesh1159 * Address comments on PR. Signed-off-by: Rishikesh1159 * Update unit test. Signed-off-by: Rishikesh1159 --------- Signed-off-by: Rishikesh1159 Signed-off-by: Rishikesh Pasham <62345295+Rishikesh1159@users.noreply.github.com> --- .../admin/cluster/node/stats/NodeStats.java | 24 +++++++ .../cluster/node/stats/NodesStatsRequest.java | 1 + .../node/stats/TransportNodesStatsAction.java | 1 + .../stats/TransportClusterStatsAction.java | 1 + .../SegmentReplicationPressureService.java | 3 +- .../SegmentReplicationRejectionStats.java | 67 +++++++++++++++++++ .../index/SegmentReplicationStatsTracker.java | 8 +++ .../main/java/org/opensearch/node/Node.java | 4 ++ .../java/org/opensearch/node/NodeService.java | 7 ++ .../cluster/node/stats/NodeStatsTests.java | 18 +++++ .../opensearch/cluster/DiskUsageTests.java | 6 ++ ...egmentReplicationPressureServiceTests.java | 9 ++- .../SegmentReplicationStatsTrackerTests.java | 35 ++++++++++ .../snapshots/SnapshotResiliencyTests.java | 2 + .../MockInternalClusterInfoService.java | 1 + .../opensearch/test/InternalTestCluster.java | 1 + 16 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java create mode 100644 server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java index e9bfa358103c8..3d37056956c69 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java @@ -46,6 +46,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.discovery.DiscoveryStats; import org.opensearch.http.HttpStats; +import org.opensearch.index.SegmentReplicationRejectionStats; import org.opensearch.index.stats.IndexingPressureStats; import org.opensearch.index.stats.ShardIndexingPressureStats; import org.opensearch.index.store.remote.filecache.FileCacheStats; @@ -129,6 +130,9 @@ public class NodeStats extends BaseNodeResponse implements ToXContentFragment { @Nullable private SearchBackpressureStats searchBackpressureStats; + @Nullable + private SegmentReplicationRejectionStats segmentReplicationRejectionStats; + @Nullable private ClusterManagerThrottlingStats clusterManagerThrottlingStats; @@ -211,6 +215,12 @@ public NodeStats(StreamInput in) throws IOException { } else { resourceUsageStats = null; } + // TODO: change to V_2_12_0 on main after backport to 2.x + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + segmentReplicationRejectionStats = in.readOptionalWriteable(SegmentReplicationRejectionStats::new); + } else { + segmentReplicationRejectionStats = null; + } if (in.getVersion().onOrAfter(Version.V_2_12_0)) { repositoriesStats = in.readOptionalWriteable(RepositoriesStats::new); } else { @@ -244,6 +254,7 @@ public NodeStats( @Nullable FileCacheStats fileCacheStats, @Nullable TaskCancellationStats taskCancellationStats, @Nullable SearchPipelineStats searchPipelineStats, + @Nullable SegmentReplicationRejectionStats segmentReplicationRejectionStats, @Nullable RepositoriesStats repositoriesStats ) { super(node); @@ -271,6 +282,7 @@ public NodeStats( this.fileCacheStats = fileCacheStats; this.taskCancellationStats = taskCancellationStats; this.searchPipelineStats = searchPipelineStats; + this.segmentReplicationRejectionStats = segmentReplicationRejectionStats; this.repositoriesStats = repositoriesStats; } @@ -415,6 +427,10 @@ public SearchPipelineStats getSearchPipelineStats() { } @Nullable + public SegmentReplicationRejectionStats getSegmentReplicationRejectionStats() { + return segmentReplicationRejectionStats; + } + public RepositoriesStats getRepositoriesStats() { return repositoriesStats; } @@ -465,6 +481,10 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(resourceUsageStats); } + // TODO: change to V_2_12_0 on main after backport to 2.x + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeOptionalWriteable(segmentReplicationRejectionStats); + } if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(repositoriesStats); } @@ -561,6 +581,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (getResourceUsageStats() != null) { getResourceUsageStats().toXContent(builder, params); } + if (getSegmentReplicationRejectionStats() != null) { + getSegmentReplicationRejectionStats().toXContent(builder, params); + } + if (getRepositoriesStats() != null) { getRepositoriesStats().toXContent(builder, params); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java index 88dff20354aa2..fc72668d36413 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java @@ -215,6 +215,7 @@ public enum Metric { TASK_CANCELLATION("task_cancellation"), SEARCH_PIPELINE("search_pipeline"), RESOURCE_USAGE_STATS("resource_usage_stats"), + SEGMENT_REPLICATION_BACKPRESSURE("segment_replication_backpressure"), REPOSITORIES("repositories"); private String metricName; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java index aa02f8e580f4a..99cf42cfdc4d0 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java @@ -126,6 +126,7 @@ protected NodeStats nodeOperation(NodeStatsRequest nodeStatsRequest) { NodesStatsRequest.Metric.TASK_CANCELLATION.containedIn(metrics), NodesStatsRequest.Metric.SEARCH_PIPELINE.containedIn(metrics), NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics), + NodesStatsRequest.Metric.SEGMENT_REPLICATION_BACKPRESSURE.containedIn(metrics), NodesStatsRequest.Metric.REPOSITORIES.containedIn(metrics) ); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java index f51fabbfb2388..5efec8b876435 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -170,6 +170,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq false, false, false, + false, false ); List shardsStats = new ArrayList<>(); diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java b/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java index 4284daf9ffef4..d9d480e7b2b27 100644 --- a/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java @@ -106,10 +106,11 @@ public SegmentReplicationPressureService( ClusterService clusterService, IndicesService indicesService, ShardStateAction shardStateAction, + SegmentReplicationStatsTracker tracker, ThreadPool threadPool ) { this.indicesService = indicesService; - this.tracker = new SegmentReplicationStatsTracker(this.indicesService); + this.tracker = tracker; this.shardStateAction = shardStateAction; this.threadPool = threadPool; diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java b/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java new file mode 100644 index 0000000000000..9f9f150ebe2d7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index; + +import org.opensearch.Version; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; + +/** + * Segment replication rejection stats. + * + * @opensearch.internal + */ +public class SegmentReplicationRejectionStats implements Writeable, ToXContentFragment { + + /** + * Total rejections due to segment replication backpressure + */ + private long totalRejectionCount; + + public SegmentReplicationRejectionStats(final long totalRejectionCount) { + this.totalRejectionCount = totalRejectionCount; + } + + public SegmentReplicationRejectionStats(StreamInput in) throws IOException { + // TODO: change to V_2_12_0 on main after backport to 2.x + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + this.totalRejectionCount = in.readVLong(); + } + } + + public long getTotalRejectionCount() { + return totalRejectionCount; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject("segment_replication_backpressure"); + builder.field("total_rejected_requests", totalRejectionCount); + return builder.endObject(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + // TODO: change to V_2_12_0 on main after backport to 2.x + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeVLong(totalRejectionCount); + } + } + + @Override + public String toString() { + return "SegmentReplicationRejectionStats{ totalRejectedRequestCount=" + totalRejectionCount + '}'; + } + +} diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java b/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java index 6d5c00c08caff..f5fc8aa1c1eea 100644 --- a/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java @@ -33,6 +33,14 @@ public SegmentReplicationStatsTracker(IndicesService indicesService) { rejectionCount = ConcurrentCollections.newConcurrentMap(); } + public SegmentReplicationRejectionStats getTotalRejectionStats() { + return new SegmentReplicationRejectionStats(this.rejectionCount.values().stream().mapToInt(AtomicInteger::get).sum()); + } + + protected Map getRejectionCount() { + return rejectionCount; + } + public SegmentReplicationStats getStats() { Map stats = new HashMap<>(); for (IndexService indexService : indicesService) { diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index c9148f382a028..711a90d424ac3 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -136,6 +136,7 @@ import org.opensearch.index.IndexModule; import org.opensearch.index.IndexSettings; import org.opensearch.index.IndexingPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.recovery.RemoteStoreRestoreService; @@ -977,6 +978,7 @@ protected Node( transportService.getTaskManager() ); + final SegmentReplicationStatsTracker segmentReplicationStatsTracker = new SegmentReplicationStatsTracker(indicesService); RepositoriesModule repositoriesModule = new RepositoriesModule( this.environment, pluginsService.filterPlugins(RepositoryPlugin.class), @@ -1116,6 +1118,7 @@ protected Node( fileCache, taskCancellationMonitoringService, resourceUsageCollectorService, + segmentReplicationStatsTracker, repositoryService ); @@ -1246,6 +1249,7 @@ protected Node( b.bind(MetricsRegistry.class).toInstance(metricsRegistry); b.bind(RemoteClusterStateService.class).toProvider(() -> remoteClusterStateService); b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); + b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker); }); injector = modules.createInjector(); diff --git a/server/src/main/java/org/opensearch/node/NodeService.java b/server/src/main/java/org/opensearch/node/NodeService.java index e2d7bc2c86ba3..49dde0b81cac7 100644 --- a/server/src/main/java/org/opensearch/node/NodeService.java +++ b/server/src/main/java/org/opensearch/node/NodeService.java @@ -48,6 +48,7 @@ import org.opensearch.discovery.Discovery; import org.opensearch.http.HttpServerTransport; import org.opensearch.index.IndexingPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.store.remote.filecache.FileCache; import org.opensearch.indices.IndicesService; import org.opensearch.ingest.IngestService; @@ -96,6 +97,8 @@ public class NodeService implements Closeable { private final TaskCancellationMonitoringService taskCancellationMonitoringService; private final RepositoriesService repositoriesService; + private final SegmentReplicationStatsTracker segmentReplicationStatsTracker; + NodeService( Settings settings, ThreadPool threadPool, @@ -119,6 +122,7 @@ public class NodeService implements Closeable { FileCache fileCache, TaskCancellationMonitoringService taskCancellationMonitoringService, ResourceUsageCollectorService resourceUsageCollectorService, + SegmentReplicationStatsTracker segmentReplicationStatsTracker, RepositoriesService repositoriesService ) { this.settings = settings; @@ -146,6 +150,7 @@ public class NodeService implements Closeable { this.repositoriesService = repositoriesService; clusterService.addStateApplier(ingestService); clusterService.addStateApplier(searchPipelineService); + this.segmentReplicationStatsTracker = segmentReplicationStatsTracker; } public NodeInfo info( @@ -226,6 +231,7 @@ public NodeStats stats( boolean taskCancellation, boolean searchPipelineStats, boolean resourceUsageStats, + boolean segmentReplicationTrackerStats, boolean repositoriesStats ) { // for indices stats we want to include previous allocated shards stats as well (it will @@ -256,6 +262,7 @@ public NodeStats stats( fileCacheStats && fileCache != null ? fileCache.fileCacheStats() : null, taskCancellation ? this.taskCancellationMonitoringService.stats() : null, searchPipelineStats ? this.searchPipelineService.stats() : null, + segmentReplicationTrackerStats ? this.segmentReplicationStatsTracker.getTotalRejectionStats() : null, repositoriesStats ? this.repositoriesService.getRepositoriesStats() : null ); } diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index 3491f18da9550..ebdd012006fb2 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -49,6 +49,7 @@ import org.opensearch.discovery.DiscoveryStats; import org.opensearch.http.HttpStats; import org.opensearch.index.ReplicationStats; +import org.opensearch.index.SegmentReplicationRejectionStats; import org.opensearch.index.remote.RemoteSegmentStats; import org.opensearch.index.remote.RemoteTranslogTransferTracker; import org.opensearch.index.translog.RemoteTranslogStats; @@ -417,6 +418,17 @@ public void testSerialization() throws IOException { assertEquals(aResourceUsageStats.getTimestamp(), bResourceUsageStats.getTimestamp()); }); } + SegmentReplicationRejectionStats segmentReplicationRejectionStats = nodeStats.getSegmentReplicationRejectionStats(); + SegmentReplicationRejectionStats deserializedSegmentReplicationRejectionStats = deserializedNodeStats + .getSegmentReplicationRejectionStats(); + if (segmentReplicationRejectionStats == null) { + assertNull(deserializedSegmentReplicationRejectionStats); + } else { + assertEquals( + segmentReplicationRejectionStats.getTotalRejectionCount(), + deserializedSegmentReplicationRejectionStats.getTotalRejectionCount() + ); + } ScriptCacheStats scriptCacheStats = nodeStats.getScriptCacheStats(); ScriptCacheStats deserializedScriptCacheStats = deserializedNodeStats.getScriptCacheStats(); if (scriptCacheStats == null) { @@ -812,6 +824,11 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { } nodesResourceUsageStats = new NodesResourceUsageStats(resourceUsageStatsMap); } + SegmentReplicationRejectionStats segmentReplicationRejectionStats = null; + if (frequently()) { + segmentReplicationRejectionStats = new SegmentReplicationRejectionStats(randomNonNegativeLong()); + } + ClusterManagerThrottlingStats clusterManagerThrottlingStats = null; if (frequently()) { clusterManagerThrottlingStats = new ClusterManagerThrottlingStats(); @@ -853,6 +870,7 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { null, null, null, + segmentReplicationRejectionStats, null ); } diff --git a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java index 6f03e87bf5824..f037b75dc16a3 100644 --- a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java +++ b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java @@ -192,6 +192,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -220,6 +221,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -248,6 +250,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ) ); @@ -307,6 +310,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -335,6 +339,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -363,6 +368,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ) ); diff --git a/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java b/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java index 34fa13f0ba62c..478fdcb24f76a 100644 --- a/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java +++ b/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java @@ -278,6 +278,13 @@ private SegmentReplicationPressureService buildPressureService(Settings settings ClusterService clusterService = mock(ClusterService.class); when(clusterService.getClusterSettings()).thenReturn(new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); - return new SegmentReplicationPressureService(settings, clusterService, indicesService, shardStateAction, mock(ThreadPool.class)); + return new SegmentReplicationPressureService( + settings, + clusterService, + indicesService, + shardStateAction, + new SegmentReplicationStatsTracker(indicesService), + mock(ThreadPool.class) + ); } } diff --git a/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java b/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java new file mode 100644 index 0000000000000..04423d583e8f9 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index; + +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.indices.IndicesService; +import org.opensearch.test.OpenSearchTestCase; + +import org.mockito.Mockito; + +import static org.mockito.Mockito.mock; + +public class SegmentReplicationStatsTrackerTests extends OpenSearchTestCase { + + private IndicesService indicesService = mock(IndicesService.class); + + public void testRejectedCount() { + SegmentReplicationStatsTracker segmentReplicationStatsTracker = new SegmentReplicationStatsTracker(indicesService); + + // Verify that total rejection count is 0 on an empty rejectionCount map in statsTracker. + assertTrue(segmentReplicationStatsTracker.getRejectionCount().isEmpty()); + assertEquals(segmentReplicationStatsTracker.getTotalRejectionStats().getTotalRejectionCount(), 0L); + + // Verify that total rejection count is 1 after incrementing rejectionCount. + segmentReplicationStatsTracker.incrementRejectionCount(Mockito.mock(ShardId.class)); + assertEquals(segmentReplicationStatsTracker.getTotalRejectionStats().getTotalRejectionCount(), 1L); + } + +} diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 710717532ceb4..b7a2baacba611 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -178,6 +178,7 @@ import org.opensearch.gateway.TransportNodesListGatewayStartedShards; import org.opensearch.index.IndexingPressureService; import org.opensearch.index.SegmentReplicationPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.remote.RemoteStorePressureService; import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory; @@ -2188,6 +2189,7 @@ public void onFailure(final Exception e) { clusterService, mock(IndicesService.class), mock(ShardStateAction.class), + mock(SegmentReplicationStatsTracker.class), mock(ThreadPool.class) ), mock(RemoteStorePressureService.class), diff --git a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java index 60a54110fd0b4..2ba4de5e54a67 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java +++ b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java @@ -122,6 +122,7 @@ List adjustNodesStats(List nodesStats) { nodeStats.getFileCacheStats(), nodeStats.getTaskCancellationStats(), nodeStats.getSearchPipelineStats(), + nodeStats.getSegmentReplicationRejectionStats(), nodeStats.getRepositoriesStats() ); }).collect(Collectors.toList()); diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 898e125b94954..63d8f069bebea 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -2722,6 +2722,7 @@ public void ensureEstimatedStats() { false, false, false, + false, false ); assertThat( From 7c5a806d5bbee77c0c4a184a500bf5522a8d8cd7 Mon Sep 17 00:00:00 2001 From: Movva Ajaykumar Date: Sat, 21 Oct 2023 13:31:09 +0530 Subject: [PATCH 12/45] Added changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting (#9286) * Changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting (#9286) Signed-off-by: Ajay Kumar Movva --- CHANGELOG.md | 1 + .../TransportReplicationAction.java | 10 +- .../common/network/NetworkModule.java | 16 +- .../common/settings/ClusterSettings.java | 8 +- .../main/java/org/opensearch/node/Node.java | 19 +- .../AdmissionControlService.java | 104 +++++++++ .../AdmissionControlSettings.java | 83 ++++++++ .../controllers/AdmissionController.java | 70 ++++++ .../CPUBasedAdmissionController.java | 55 +++++ .../controllers/package-info.java | 12 ++ .../enums/AdmissionControlMode.java | 66 ++++++ .../enums/TransportActionType.java | 45 ++++ .../admissioncontrol/enums/package-info.java | 12 ++ .../admissioncontrol/package-info.java | 12 ++ .../CPUBasedAdmissionControllerSettings.java | 110 ++++++++++ .../settings/package-info.java | 11 + .../AdmissionControlTransportHandler.java | 65 ++++++ .../AdmissionControlTransportInterceptor.java | 40 ++++ .../transport/package-info.java | 11 + .../ratelimitting/package-info.java | 12 ++ .../common/network/NetworkModuleTests.java | 199 +++++++++++++++++- .../AdmissionControlServiceTests.java | 140 ++++++++++++ .../AdmissionControlSettingsTests.java | 103 +++++++++ .../CPUBasedAdmissionControllerTests.java | 109 ++++++++++ .../enums/AdmissionControlModeTests.java | 29 +++ .../enums/TransportActionTypeTests.java | 27 +++ ...CPUBasedAdmissionControlSettingsTests.java | 153 ++++++++++++++ ...AdmissionControlTransportHandlerTests.java | 92 ++++++++ 28 files changed, 1595 insertions(+), 19 deletions(-) create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettings.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlMode.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettings.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/package-info.java create mode 100644 server/src/main/java/org/opensearch/ratelimitting/package-info.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlModeTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControlSettingsTests.java create mode 100644 server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index d7d492679c79d..374dd4ab57ee6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote cluster state] Upload global metadata in cluster state to remote store([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) - [Remote cluster state] Download functionality of global metadata from remote store ([#10535](https://github.com/opensearch-project/OpenSearch/pull/10535)) - [Remote cluster state] Restore global metadata from remote store when local state is lost after quorum loss ([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) +- [AdmissionControl] Added changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting ([#9286](https://github.com/opensearch-project/OpenSearch/pull/9286)) ### Dependencies - Bump `log4j-core` from 2.18.0 to 2.19.0 diff --git a/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java b/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java index b68bd13cfed80..ddebdc5530e70 100644 --- a/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java +++ b/server/src/main/java/org/opensearch/action/support/replication/TransportReplicationAction.java @@ -134,6 +134,12 @@ public abstract class TransportReplicationAction< Setting.Property.NodeScope ); + /** + * Making primary and replica actions suffixes as constant + */ + public static final String PRIMARY_ACTION_SUFFIX = "[p]"; + public static final String REPLICA_ACTION_SUFFIX = "[r]"; + protected final ThreadPool threadPool; protected final TransportService transportService; protected final ClusterService clusterService; @@ -204,8 +210,8 @@ protected TransportReplicationAction( this.shardStateAction = shardStateAction; this.executor = executor; - this.transportPrimaryAction = actionName + "[p]"; - this.transportReplicaAction = actionName + "[r]"; + this.transportPrimaryAction = actionName + PRIMARY_ACTION_SUFFIX; + this.transportReplicaAction = actionName + REPLICA_ACTION_SUFFIX; this.initialRetryBackoffBound = REPLICATION_INITIAL_RETRY_BACKOFF_BOUND.get(settings); this.retryTimeout = REPLICATION_RETRY_TIMEOUT.get(settings); diff --git a/server/src/main/java/org/opensearch/common/network/NetworkModule.java b/server/src/main/java/org/opensearch/common/network/NetworkModule.java index 0734659d8ee72..821d48fccf48c 100644 --- a/server/src/main/java/org/opensearch/common/network/NetworkModule.java +++ b/server/src/main/java/org/opensearch/common/network/NetworkModule.java @@ -131,7 +131,7 @@ public final class NetworkModule { private final Map> transportFactories = new HashMap<>(); private final Map> transportHttpFactories = new HashMap<>(); - private final List transportIntercetors = new ArrayList<>(); + private final List transportInterceptors = new ArrayList<>(); /** * Creates a network module that custom networking classes can be plugged into. @@ -149,9 +149,13 @@ public NetworkModule( NetworkService networkService, HttpServerTransport.Dispatcher dispatcher, ClusterSettings clusterSettings, - Tracer tracer + Tracer tracer, + List transportInterceptors ) { this.settings = settings; + if (transportInterceptors != null) { + transportInterceptors.forEach(this::registerTransportInterceptor); + } for (NetworkPlugin plugin : plugins) { Map> httpTransportFactory = plugin.getHttpTransports( settings, @@ -180,11 +184,11 @@ public NetworkModule( for (Map.Entry> entry : transportFactory.entrySet()) { registerTransport(entry.getKey(), entry.getValue()); } - List transportInterceptors = plugin.getTransportInterceptors( + List pluginTransportInterceptors = plugin.getTransportInterceptors( namedWriteableRegistry, threadPool.getThreadContext() ); - for (TransportInterceptor interceptor : transportInterceptors) { + for (TransportInterceptor interceptor : pluginTransportInterceptors) { registerTransportInterceptor(interceptor); } } @@ -264,7 +268,7 @@ public Supplier getTransportSupplier() { * Registers a new {@link TransportInterceptor} */ private void registerTransportInterceptor(TransportInterceptor interceptor) { - this.transportIntercetors.add(Objects.requireNonNull(interceptor, "interceptor must not be null")); + this.transportInterceptors.add(Objects.requireNonNull(interceptor, "interceptor must not be null")); } /** @@ -272,7 +276,7 @@ private void registerTransportInterceptor(TransportInterceptor interceptor) { * @see #registerTransportInterceptor(TransportInterceptor) */ public TransportInterceptor getTransportInterceptor() { - return new CompositeTransportInterceptor(this.transportIntercetors); + return new CompositeTransportInterceptor(this.transportInterceptors); } static final class CompositeTransportInterceptor implements TransportInterceptor { diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 76883c200542e..7ac7da819b215 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -134,6 +134,8 @@ import org.opensearch.persistent.PersistentTasksClusterService; import org.opensearch.persistent.decider.EnableAssignmentDecider; import org.opensearch.plugins.PluginsService; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlSettings; +import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; import org.opensearch.repositories.fs.FsRepository; import org.opensearch.rest.BaseRestHandler; import org.opensearch.script.ScriptService; @@ -682,7 +684,11 @@ public void apply(Settings value, Settings current, Settings previous) { RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING, RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING, IndicesService.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, - IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING + IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING, + AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, + CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT, + CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT ) ) ); diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 711a90d424ac3..e80b768074fc7 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -197,6 +197,8 @@ import org.opensearch.plugins.SearchPlugin; import org.opensearch.plugins.SystemIndexPlugin; import org.opensearch.plugins.TelemetryPlugin; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.ratelimitting.admissioncontrol.transport.AdmissionControlTransportInterceptor; import org.opensearch.repositories.RepositoriesModule; import org.opensearch.repositories.RepositoriesService; import org.opensearch.rest.RestController; @@ -820,6 +822,7 @@ protected Node( remoteStoreStatsTrackerFactory, recoverySettings ); + final AliasValidator aliasValidator = new AliasValidator(); final ShardLimitValidator shardLimitValidator = new ShardLimitValidator(settings, clusterService, systemIndices); @@ -891,6 +894,17 @@ protected Node( final RestController restController = actionModule.getRestController(); + final AdmissionControlService admissionControlService = new AdmissionControlService( + settings, + clusterService.getClusterSettings(), + threadPool + ); + + AdmissionControlTransportInterceptor admissionControlTransportInterceptor = new AdmissionControlTransportInterceptor( + admissionControlService + ); + + List transportInterceptors = List.of(admissionControlTransportInterceptor); final NetworkModule networkModule = new NetworkModule( settings, pluginsService.filterPlugins(NetworkPlugin.class), @@ -903,8 +917,10 @@ protected Node( networkService, restController, clusterService.getClusterSettings(), - tracer + tracer, + transportInterceptors ); + Collection>> indexTemplateMetadataUpgraders = pluginsService.filterPlugins( Plugin.class ).stream().map(Plugin::getIndexTemplateMetadataUpgrader).collect(Collectors.toList()); @@ -1181,6 +1197,7 @@ protected Node( b.bind(IndexingPressureService.class).toInstance(indexingPressureService); b.bind(TaskResourceTrackingService.class).toInstance(taskResourceTrackingService); b.bind(SearchBackpressureService.class).toInstance(searchBackpressureService); + b.bind(AdmissionControlService.class).toInstance(admissionControlService); b.bind(UsageService.class).toInstance(usageService); b.bind(AggregationUsageService.class).toInstance(searchModule.getValuesSourceRegistry().getUsageService()); b.bind(NamedWriteableRegistry.class).toInstance(namedWriteableRegistry); diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java new file mode 100644 index 0000000000000..2cc409b0e4465 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlService.java @@ -0,0 +1,104 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.controllers.AdmissionController; +import org.opensearch.ratelimitting.admissioncontrol.controllers.CPUBasedAdmissionController; +import org.opensearch.threadpool.ThreadPool; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +import static org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER; + +/** + * Admission control Service that bootstraps and manages all the Admission Controllers in OpenSearch. + */ +public class AdmissionControlService { + private final ThreadPool threadPool; + public final AdmissionControlSettings admissionControlSettings; + private final ConcurrentMap ADMISSION_CONTROLLERS; + private static final Logger logger = LogManager.getLogger(AdmissionControlService.class); + private final ClusterSettings clusterSettings; + private final Settings settings; + + /** + * + * @param settings Immutable settings instance + * @param clusterSettings ClusterSettings Instance + * @param threadPool ThreadPool Instance + */ + public AdmissionControlService(Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) { + this.threadPool = threadPool; + this.admissionControlSettings = new AdmissionControlSettings(clusterSettings, settings); + this.ADMISSION_CONTROLLERS = new ConcurrentHashMap<>(); + this.clusterSettings = clusterSettings; + this.settings = settings; + this.initialise(); + } + + /** + * Initialise and Register all the admissionControllers + */ + private void initialise() { + // Initialise different type of admission controllers + registerAdmissionController(CPU_BASED_ADMISSION_CONTROLLER); + } + + /** + * Handler to trigger registered admissionController + */ + public void applyTransportAdmissionControl(String action) { + this.ADMISSION_CONTROLLERS.forEach((name, admissionController) -> { admissionController.apply(action); }); + } + + /** + * + * @param admissionControllerName admissionControllerName to register into the service. + */ + public void registerAdmissionController(String admissionControllerName) { + AdmissionController admissionController = this.controllerFactory(admissionControllerName); + this.ADMISSION_CONTROLLERS.put(admissionControllerName, admissionController); + } + + /** + * @return AdmissionController Instance + */ + private AdmissionController controllerFactory(String admissionControllerName) { + switch (admissionControllerName) { + case CPU_BASED_ADMISSION_CONTROLLER: + return new CPUBasedAdmissionController(admissionControllerName, this.settings, this.clusterSettings); + default: + throw new IllegalArgumentException("Not Supported AdmissionController : " + admissionControllerName); + } + } + + /** + * + * @return list of the registered admissionControllers + */ + public List getAdmissionControllers() { + return new ArrayList<>(this.ADMISSION_CONTROLLERS.values()); + } + + /** + * + * @param controllerName name of the admissionController + * @return instance of the AdmissionController Instance + */ + public AdmissionController getAdmissionController(String controllerName) { + return this.ADMISSION_CONTROLLERS.getOrDefault(controllerName, null); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettings.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettings.java new file mode 100644 index 0000000000000..b557190ab54ac --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettings.java @@ -0,0 +1,83 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; + +/** + * Settings related to admission control. + * @opensearch.internal + */ +public final class AdmissionControlSettings { + + /** + * Default parameters for the AdmissionControlSettings + */ + public static class Defaults { + public static final String MODE = "disabled"; + } + + /** + * Feature level setting to operate in shadow-mode or in enforced-mode. If enforced field is set + * rejection will be performed, otherwise only rejection metrics will be populated. + */ + public static final Setting ADMISSION_CONTROL_TRANSPORT_LAYER_MODE = new Setting<>( + "admission_control.transport.mode", + Defaults.MODE, + AdmissionControlMode::fromName, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + private volatile AdmissionControlMode transportLayeradmissionControlMode; + + /** + * @param clusterSettings clusterSettings Instance + * @param settings settings instance + */ + public AdmissionControlSettings(ClusterSettings clusterSettings, Settings settings) { + this.transportLayeradmissionControlMode = ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.get(settings); + clusterSettings.addSettingsUpdateConsumer(ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, this::setAdmissionControlTransportLayerMode); + } + + /** + * + * @param admissionControlMode update the mode of admission control feature + */ + private void setAdmissionControlTransportLayerMode(AdmissionControlMode admissionControlMode) { + this.transportLayeradmissionControlMode = admissionControlMode; + } + + /** + * + * @return return the default mode of the admissionControl + */ + public AdmissionControlMode getAdmissionControlTransportLayerMode() { + return this.transportLayeradmissionControlMode; + } + + /** + * + * @return true based on the admission control feature is enforced else false + */ + public Boolean isTransportLayerAdmissionControlEnforced() { + return this.transportLayeradmissionControlMode == AdmissionControlMode.ENFORCED; + } + + /** + * + * @return true based on the admission control feature is enabled else false + */ + public Boolean isTransportLayerAdmissionControlEnabled() { + return this.transportLayeradmissionControlMode != AdmissionControlMode.DISABLED; + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java new file mode 100644 index 0000000000000..00564a9967f31 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/AdmissionController.java @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.controllers; + +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Abstract class for Admission Controller in OpenSearch, which aims to provide resource based request admission control. + * It provides methods for any tracking-object that can be incremented (such as memory size), + * and admission control can be applied if configured limit has been reached + */ +public abstract class AdmissionController { + + private final AtomicLong rejectionCount; + private final String admissionControllerName; + + /** + * + * @param rejectionCount initialised rejectionCount value for AdmissionController + * @param admissionControllerName name of the admissionController + */ + public AdmissionController(AtomicLong rejectionCount, String admissionControllerName) { + this.rejectionCount = rejectionCount; + this.admissionControllerName = admissionControllerName; + } + + /** + * Return the current state of the admission controller + * @return true if admissionController is enabled for the transport layer else false + */ + public boolean isEnabledForTransportLayer(AdmissionControlMode admissionControlMode) { + return admissionControlMode != AdmissionControlMode.DISABLED; + } + + /** + * Increment the tracking-objects and apply the admission control if threshold is breached. + * Mostly applicable while applying admission controller + */ + public abstract void apply(String action); + + /** + * @return name of the admission-controller + */ + public String getName() { + return this.admissionControllerName; + } + + /** + * Adds the rejection count for the controller. Primarily used when copying controller states. + * @param count To add the value of the tracking resource object as the provided count + */ + public void addRejectionCount(long count) { + this.rejectionCount.addAndGet(count); + } + + /** + * @return current value of the rejection count metric tracked by the admission-controller. + */ + public long getRejectionCount() { + return this.rejectionCount.get(); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java new file mode 100644 index 0000000000000..3a8956b2cce87 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionController.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.controllers; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Class for CPU Based Admission Controller in OpenSearch, which aims to provide CPU utilisation admission control. + * It provides methods to apply admission control if configured limit has been reached + */ +public class CPUBasedAdmissionController extends AdmissionController { + private static final Logger LOGGER = LogManager.getLogger(CPUBasedAdmissionController.class); + public CPUBasedAdmissionControllerSettings settings; + + /** + * + * @param admissionControllerName State of the admission controller + */ + public CPUBasedAdmissionController(String admissionControllerName, Settings settings, ClusterSettings clusterSettings) { + super(new AtomicLong(0), admissionControllerName); + this.settings = new CPUBasedAdmissionControllerSettings(clusterSettings, settings); + } + + /** + * This function will take of applying admission controller based on CPU usage + * @param action is the transport action + */ + @Override + public void apply(String action) { + // TODO Will extend this logic further currently just incrementing rejectionCount + if (this.isEnabledForTransportLayer(this.settings.getTransportLayerAdmissionControllerMode())) { + this.applyForTransportLayer(action); + } + } + + private void applyForTransportLayer(String actionName) { + // currently incrementing counts to evaluate the controller triggering as expected and using in testing so limiting to 10 + // TODO will update rejection logic further in next PR's + if (this.getRejectionCount() < 10) { + this.addRejectionCount(1); + } + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/package-info.java new file mode 100644 index 0000000000000..23746cc61a203 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/controllers/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * This package contains classes related to the different admission controllers + */ +package org.opensearch.ratelimitting.admissioncontrol.controllers; diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlMode.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlMode.java new file mode 100644 index 0000000000000..2ae2436ba84e7 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlMode.java @@ -0,0 +1,66 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.enums; + +import java.util.Locale; + +/** + * Defines the AdmissionControlMode + */ +public enum AdmissionControlMode { + /** + * AdmissionController is completely disabled. + */ + DISABLED("disabled"), + + /** + * AdmissionController only monitors the rejection criteria for the requests. + */ + MONITOR("monitor_only"), + + /** + * AdmissionController monitors and rejects tasks that exceed resource usage thresholds. + */ + ENFORCED("enforced"); + + private final String mode; + + /** + * @param mode update mode of the admission controller + */ + AdmissionControlMode(String mode) { + this.mode = mode; + } + + /** + * + * @return mode of the admission controller + */ + public String getMode() { + return this.mode; + } + + /** + * + * @param name is the mode of the current + * @return Enum of AdmissionControlMode based on the mode + */ + public static AdmissionControlMode fromName(String name) { + switch (name.toLowerCase(Locale.ROOT)) { + case "disabled": + return DISABLED; + case "monitor_only": + return MONITOR; + case "enforced": + return ENFORCED; + default: + throw new IllegalArgumentException("Invalid AdmissionControlMode: " + name); + } + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java new file mode 100644 index 0000000000000..f2fdca0cfe49b --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionType.java @@ -0,0 +1,45 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.enums; + +import java.util.Locale; + +/** + * Enums that defines the type of the transport requests + */ +public enum TransportActionType { + INDEXING("indexing"), + SEARCH("search"); + + private final String type; + + TransportActionType(String uriType) { + this.type = uriType; + } + + /** + * + * @return type of the request + */ + public String getType() { + return type; + } + + public static TransportActionType fromName(String name) { + name = name.toLowerCase(Locale.ROOT); + switch (name) { + case "indexing": + return INDEXING; + case "search": + return SEARCH; + default: + throw new IllegalArgumentException("Not Supported TransportAction Type: " + name); + } + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/package-info.java new file mode 100644 index 0000000000000..98b08ebd0a7bf --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/enums/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * This package contains enums related to the different admission controller feature + */ +package org.opensearch.ratelimitting.admissioncontrol.enums; diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/package-info.java new file mode 100644 index 0000000000000..b3dc229f86fb6 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * This package contains base classes needed for the admissionController Feature + */ +package org.opensearch.ratelimitting.admissioncontrol; diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettings.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettings.java new file mode 100644 index 0000000000000..141e9b68db145 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettings.java @@ -0,0 +1,110 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.settings; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlSettings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; + +import java.util.Arrays; +import java.util.List; + +/** + * Settings related to cpu based admission controller. + * @opensearch.internal + */ +public class CPUBasedAdmissionControllerSettings { + public static final String CPU_BASED_ADMISSION_CONTROLLER = "global_cpu_usage"; + + /** + * Default parameters for the CPUBasedAdmissionControllerSettings + */ + public static class Defaults { + public static final long CPU_USAGE = 95; + public static List TRANSPORT_LAYER_DEFAULT_URI_TYPE = Arrays.asList("indexing", "search"); + } + + private AdmissionControlMode transportLayerMode; + private Long searchCPULimit; + private Long indexingCPULimit; + + private final List transportActionsList; + /** + * Feature level setting to operate in shadow-mode or in enforced-mode. If enforced field is set + * rejection will be performed, otherwise only rejection metrics will be populated. + */ + public static final Setting CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE = new Setting<>( + "admission_control.transport.cpu_usage.mode_override", + AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, + AdmissionControlMode::fromName, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * This setting used to set the CPU Limits for the search requests by default it will use default IO usage limit + */ + public static final Setting SEARCH_CPU_USAGE_LIMIT = Setting.longSetting( + "admission_control.search.cpu_usage.limit", + Defaults.CPU_USAGE, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * This setting used to set the CPU limits for the indexing requests by default it will use default IO usage limit + */ + public static final Setting INDEXING_CPU_USAGE_LIMIT = Setting.longSetting( + "admission_control.indexing.cpu_usage.limit", + Defaults.CPU_USAGE, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + // currently limited to one setting will add further more settings in follow-up PR's + public CPUBasedAdmissionControllerSettings(ClusterSettings clusterSettings, Settings settings) { + this.transportLayerMode = CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.get(settings); + clusterSettings.addSettingsUpdateConsumer(CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, this::setTransportLayerMode); + this.searchCPULimit = SEARCH_CPU_USAGE_LIMIT.get(settings); + this.indexingCPULimit = INDEXING_CPU_USAGE_LIMIT.get(settings); + this.transportActionsList = Defaults.TRANSPORT_LAYER_DEFAULT_URI_TYPE; + clusterSettings.addSettingsUpdateConsumer(INDEXING_CPU_USAGE_LIMIT, this::setIndexingCPULimit); + clusterSettings.addSettingsUpdateConsumer(SEARCH_CPU_USAGE_LIMIT, this::setSearchCPULimit); + } + + private void setTransportLayerMode(AdmissionControlMode admissionControlMode) { + this.transportLayerMode = admissionControlMode; + } + + public AdmissionControlMode getTransportLayerAdmissionControllerMode() { + return transportLayerMode; + } + + public Long getSearchCPULimit() { + return searchCPULimit; + } + + public Long getIndexingCPULimit() { + return indexingCPULimit; + } + + public void setIndexingCPULimit(Long indexingCPULimit) { + this.indexingCPULimit = indexingCPULimit; + } + + public void setSearchCPULimit(Long searchCPULimit) { + this.searchCPULimit = searchCPULimit; + } + + public List getTransportActionsList() { + return transportActionsList; + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/package-info.java new file mode 100644 index 0000000000000..a024ccc756745 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/settings/package-info.java @@ -0,0 +1,11 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +/** + * This package contains settings related classes for the different admission controllers + */ +package org.opensearch.ratelimitting.admissioncontrol.settings; diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java new file mode 100644 index 0000000000000..7d0f5fbc17a51 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandler.java @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.transport; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.tasks.Task; +import org.opensearch.transport.TransportChannel; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportRequestHandler; + +/** + * AdmissionControl Handler to intercept Transport Requests. + * @param Transport Request + */ +public class AdmissionControlTransportHandler implements TransportRequestHandler { + + private final String action; + private final TransportRequestHandler actualHandler; + protected final Logger log = LogManager.getLogger(this.getClass()); + AdmissionControlService admissionControlService; + boolean forceExecution; + + public AdmissionControlTransportHandler( + String action, + TransportRequestHandler actualHandler, + AdmissionControlService admissionControlService, + boolean forceExecution + ) { + super(); + this.action = action; + this.actualHandler = actualHandler; + this.admissionControlService = admissionControlService; + this.forceExecution = forceExecution; + } + + /** + * @param request Transport Request that landed on the node + * @param channel Transport channel allows to send a response to a request + * @param task Current task that is executing + * @throws Exception when admission control rejected the requests + */ + @Override + public void messageReceived(T request, TransportChannel channel, Task task) throws Exception { + // intercept all the transport requests here and apply admission control + try { + // TODO Need to evaluate if we need to apply admission control or not if force Execution is true will update in next PR. + this.admissionControlService.applyTransportAdmissionControl(this.action); + } catch (final OpenSearchRejectedExecutionException openSearchRejectedExecutionException) { + log.warn(openSearchRejectedExecutionException.getMessage()); + channel.sendResponse(openSearchRejectedExecutionException); + } catch (final Exception e) { + throw e; + } + actualHandler.messageReceived(request, channel, task); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java new file mode 100644 index 0000000000000..01cfcbd780006 --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportInterceptor.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.transport; + +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.transport.TransportInterceptor; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportRequestHandler; + +/** + * This class allows throttling to intercept requests on both the sender and the receiver side. + */ +public class AdmissionControlTransportInterceptor implements TransportInterceptor { + + AdmissionControlService admissionControlService; + + public AdmissionControlTransportInterceptor(AdmissionControlService admissionControlService) { + this.admissionControlService = admissionControlService; + } + + /** + * + * @return admissionController handler to intercept transport requests + */ + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + return new AdmissionControlTransportHandler<>(action, actualHandler, this.admissionControlService, forceExecution); + } +} diff --git a/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/package-info.java new file mode 100644 index 0000000000000..f97f31bc7b1db --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/admissioncontrol/transport/package-info.java @@ -0,0 +1,11 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +/** + * This package contains transport related classes for the admissionController Feature + */ +package org.opensearch.ratelimitting.admissioncontrol.transport; diff --git a/server/src/main/java/org/opensearch/ratelimitting/package-info.java b/server/src/main/java/org/opensearch/ratelimitting/package-info.java new file mode 100644 index 0000000000000..c04358e14284f --- /dev/null +++ b/server/src/main/java/org/opensearch/ratelimitting/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Base OpenSearch Throttling package + */ +package org.opensearch.ratelimitting; diff --git a/server/src/test/java/org/opensearch/common/network/NetworkModuleTests.java b/server/src/test/java/org/opensearch/common/network/NetworkModuleTests.java index 0ca118fe422a5..ab51cafb039c2 100644 --- a/server/src/test/java/org/opensearch/common/network/NetworkModuleTests.java +++ b/server/src/test/java/org/opensearch/common/network/NetworkModuleTests.java @@ -57,6 +57,7 @@ import org.opensearch.transport.TransportRequest; import org.opensearch.transport.TransportRequestHandler; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -124,7 +125,7 @@ public Map> getTransports( return Collections.singletonMap("custom", custom); } }; - NetworkModule module = newNetworkModule(settings, plugin); + NetworkModule module = newNetworkModule(settings, null, plugin); assertSame(custom, module.getTransportSupplier()); } @@ -135,7 +136,7 @@ public void testRegisterHttpTransport() { .build(); Supplier custom = FakeHttpTransport::new; - NetworkModule module = newNetworkModule(settings, new NetworkPlugin() { + NetworkModule module = newNetworkModule(settings, null, new NetworkPlugin() { @Override public Map> getHttpTransports( Settings settings, @@ -155,7 +156,7 @@ public Map> getHttpTransports( assertSame(custom, module.getHttpServerTransportSupplier()); settings = Settings.builder().put(NetworkModule.TRANSPORT_TYPE_KEY, "local").build(); - NetworkModule newModule = newNetworkModule(settings); + NetworkModule newModule = newNetworkModule(settings, null); expectThrows(IllegalStateException.class, () -> newModule.getHttpServerTransportSupplier()); } @@ -169,7 +170,7 @@ public void testOverrideDefault() { Supplier customTransport = () -> null; // content doesn't matter we check reference equality Supplier custom = FakeHttpTransport::new; Supplier def = FakeHttpTransport::new; - NetworkModule module = newNetworkModule(settings, new NetworkPlugin() { + NetworkModule module = newNetworkModule(settings, null, new NetworkPlugin() { @Override public Map> getTransports( Settings settings, @@ -214,7 +215,7 @@ public void testDefaultKeys() { Supplier custom = FakeHttpTransport::new; Supplier def = FakeHttpTransport::new; Supplier customTransport = () -> null; - NetworkModule module = newNetworkModule(settings, new NetworkPlugin() { + NetworkModule module = newNetworkModule(settings, null, new NetworkPlugin() { @Override public Map> getTransports( Settings settings, @@ -273,7 +274,7 @@ public TransportRequestHandler interceptHandler( return actualHandler; } }; - NetworkModule module = newNetworkModule(settings, new NetworkPlugin() { + NetworkModule module = newNetworkModule(settings, null, new NetworkPlugin() { @Override public List getTransportInterceptors( NamedWriteableRegistry namedWriteableRegistry, @@ -295,7 +296,7 @@ public List getTransportInterceptors( assertSame(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.get(0), interceptor); NullPointerException nullPointerException = expectThrows(NullPointerException.class, () -> { - newNetworkModule(settings, new NetworkPlugin() { + newNetworkModule(settings, null, new NetworkPlugin() { @Override public List getTransportInterceptors( NamedWriteableRegistry namedWriteableRegistry, @@ -309,7 +310,186 @@ public List getTransportInterceptors( assertEquals("interceptor must not be null", nullPointerException.getMessage()); } - private NetworkModule newNetworkModule(Settings settings, NetworkPlugin... plugins) { + public void testRegisterCoreInterceptor() { + Settings settings = Settings.builder().put(NetworkModule.TRANSPORT_TYPE_KEY, "local").build(); + AtomicInteger called = new AtomicInteger(0); + + TransportInterceptor interceptor = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called.incrementAndGet(); + if ("foo/bar/boom".equals(action)) { + assertTrue(forceExecution); + } else { + assertFalse(forceExecution); + } + return actualHandler; + } + }; + + List coreTransportInterceptors = new ArrayList<>(); + coreTransportInterceptors.add(interceptor); + + NetworkModule module = newNetworkModule(settings, coreTransportInterceptors); + + TransportInterceptor transportInterceptor = module.getTransportInterceptor(); + assertEquals(0, called.get()); + transportInterceptor.interceptHandler("foo/bar/boom", null, true, null); + assertEquals(1, called.get()); + transportInterceptor.interceptHandler("foo/baz/boom", null, false, null); + assertEquals(2, called.get()); + assertTrue(transportInterceptor instanceof NetworkModule.CompositeTransportInterceptor); + assertEquals(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.size(), 1); + assertSame(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.get(0), interceptor); + } + + public void testInterceptorOrder() { + Settings settings = Settings.builder().put(NetworkModule.TRANSPORT_TYPE_KEY, "local").build(); + AtomicInteger called = new AtomicInteger(0); + AtomicInteger called1 = new AtomicInteger(0); + + TransportInterceptor interceptor = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called.incrementAndGet(); + if ("foo/bar/boom".equals(action)) { + assertTrue(forceExecution); + } else { + assertFalse(forceExecution); + } + return actualHandler; + } + }; + + TransportInterceptor interceptor1 = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called1.incrementAndGet(); + if ("foo/bar/boom".equals(action)) { + assertTrue(forceExecution); + } else { + assertFalse(forceExecution); + } + return actualHandler; + } + }; + + List coreTransportInterceptors = new ArrayList<>(); + coreTransportInterceptors.add(interceptor1); + + NetworkModule module = newNetworkModule(settings, coreTransportInterceptors, new NetworkPlugin() { + @Override + public List getTransportInterceptors( + NamedWriteableRegistry namedWriteableRegistry, + ThreadContext threadContext + ) { + assertNotNull(threadContext); + return Collections.singletonList(interceptor); + } + }); + + TransportInterceptor transportInterceptor = module.getTransportInterceptor(); + assertEquals(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.size(), 2); + + assertEquals(0, called.get()); + assertEquals(0, called1.get()); + transportInterceptor.interceptHandler("foo/bar/boom", null, true, null); + assertEquals(1, called.get()); + assertEquals(1, called1.get()); + transportInterceptor.interceptHandler("foo/baz/boom", null, false, null); + assertEquals(2, called.get()); + assertEquals(2, called1.get()); + } + + public void testInterceptorOrderException() { + Settings settings = Settings.builder().put(NetworkModule.TRANSPORT_TYPE_KEY, "local").build(); + AtomicInteger called = new AtomicInteger(0); + AtomicInteger called1 = new AtomicInteger(0); + + TransportInterceptor interceptor = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called.incrementAndGet(); + if ("foo/bar/boom".equals(action)) { + assertTrue(forceExecution); + } else { + assertFalse(forceExecution); + } + return actualHandler; + } + }; + + TransportInterceptor interceptor1 = new TransportInterceptor() { + @Override + public TransportRequestHandler interceptHandler( + String action, + String executor, + boolean forceExecution, + TransportRequestHandler actualHandler + ) { + called1.incrementAndGet(); + throw new RuntimeException("Handler Invoke Failed"); + } + }; + + List coreTransportInterceptors = new ArrayList<>(); + coreTransportInterceptors.add(interceptor1); + + NetworkModule module = newNetworkModule(settings, coreTransportInterceptors, new NetworkPlugin() { + @Override + public List getTransportInterceptors( + NamedWriteableRegistry namedWriteableRegistry, + ThreadContext threadContext + ) { + assertNotNull(threadContext); + return Collections.singletonList(interceptor); + } + }); + + TransportInterceptor transportInterceptor = module.getTransportInterceptor(); + assertEquals(((NetworkModule.CompositeTransportInterceptor) transportInterceptor).transportInterceptors.size(), 2); + + assertEquals(0, called.get()); + assertEquals(0, called1.get()); + try { + transportInterceptor.interceptHandler("foo/bar/boom", null, true, null); + } catch (Exception e) { + assertEquals(0, called.get()); + assertEquals(1, called1.get()); + } + try { + transportInterceptor.interceptHandler("foo/baz/boom", null, false, null); + } catch (Exception e) { + assertEquals(0, called.get()); + assertEquals(2, called1.get()); + } + } + + private NetworkModule newNetworkModule( + Settings settings, + List coreTransportInterceptors, + NetworkPlugin... plugins + ) { return new NetworkModule( settings, Arrays.asList(plugins), @@ -322,7 +502,8 @@ private NetworkModule newNetworkModule(Settings settings, NetworkPlugin... plugi null, new NullDispatcher(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), - NoopTracer.INSTANCE + NoopTracer.INSTANCE, + coreTransportInterceptors ); } } diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java new file mode 100644 index 0000000000000..bac4eaf3fd677 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java @@ -0,0 +1,140 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.controllers.AdmissionController; +import org.opensearch.ratelimitting.admissioncontrol.controllers.CPUBasedAdmissionController; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.util.List; + +public class AdmissionControlServiceTests extends OpenSearchTestCase { + private ClusterService clusterService; + private ThreadPool threadPool; + private AdmissionControlService admissionControlService; + private String action = ""; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("admission_controller_settings_test"); + clusterService = new ClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + action = "indexing"; + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testWhenAdmissionControllerRegistered() { + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + assertEquals(admissionControlService.getAdmissionControllers().size(), 1); + } + + public void testRegisterInvalidAdmissionController() { + String test = "TEST"; + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + assertEquals(admissionControlService.getAdmissionControllers().size(), 1); + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> admissionControlService.registerAdmissionController(test) + ); + assertEquals(ex.getMessage(), "Not Supported AdmissionController : " + test); + } + + public void testAdmissionControllerSettings() { + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + AdmissionControlSettings admissionControlSettings = admissionControlService.admissionControlSettings; + List admissionControllerList = admissionControlService.getAdmissionControllers(); + assertEquals(admissionControllerList.size(), 1); + CPUBasedAdmissionController cpuBasedAdmissionController = (CPUBasedAdmissionController) admissionControlService + .getAdmissionController(CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER); + assertEquals( + admissionControlSettings.isTransportLayerAdmissionControlEnabled(), + cpuBasedAdmissionController.isEnabledForTransportLayer( + cpuBasedAdmissionController.settings.getTransportLayerAdmissionControllerMode() + ) + ); + + Settings settings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.DISABLED.getMode()) + .build(); + clusterService.getClusterSettings().applySettings(settings); + assertEquals( + admissionControlSettings.isTransportLayerAdmissionControlEnabled(), + cpuBasedAdmissionController.isEnabledForTransportLayer( + cpuBasedAdmissionController.settings.getTransportLayerAdmissionControllerMode() + ) + ); + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + + Settings newSettings = Settings.builder() + .put(settings) + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .build(); + clusterService.getClusterSettings().applySettings(newSettings); + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertTrue( + cpuBasedAdmissionController.isEnabledForTransportLayer( + cpuBasedAdmissionController.settings.getTransportLayerAdmissionControllerMode() + ) + ); + } + + public void testApplyAdmissionControllerDisabled() { + this.action = "indices:data/write/bulk[s][p]"; + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + admissionControlService.applyTransportAdmissionControl(this.action); + List admissionControllerList = admissionControlService.getAdmissionControllers(); + admissionControllerList.forEach(admissionController -> { assertEquals(admissionController.getRejectionCount(), 0); }); + } + + public void testApplyAdmissionControllerEnabled() { + this.action = "indices:data/write/bulk[s][p]"; + admissionControlService = new AdmissionControlService(Settings.EMPTY, clusterService.getClusterSettings(), threadPool); + admissionControlService.applyTransportAdmissionControl(this.action); + assertEquals( + admissionControlService.getAdmissionController(CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER) + .getRejectionCount(), + 0 + ); + + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.MONITOR.getMode() + ) + .build(); + clusterService.getClusterSettings().applySettings(settings); + admissionControlService.applyTransportAdmissionControl(this.action); + List admissionControllerList = admissionControlService.getAdmissionControllers(); + assertEquals(admissionControllerList.size(), 1); + assertEquals( + admissionControlService.getAdmissionController(CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER) + .getRejectionCount(), + 1 + ); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java new file mode 100644 index 0000000000000..c11ee1cc608f6 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java @@ -0,0 +1,103 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.util.List; +import java.util.Set; + +public class AdmissionControlSettingsTests extends OpenSearchTestCase { + private ClusterService clusterService; + private ThreadPool threadPool; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("admission_controller_settings_test"); + clusterService = new ClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testSettingsExists() { + Set> settings = ClusterSettings.BUILT_IN_CLUSTER_SETTINGS; + assertTrue( + "All the admission controller settings should be supported built in settings", + settings.containsAll(List.of(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE)) + ); + } + + public void testDefaultSettings() { + AdmissionControlSettings admissionControlSettings = new AdmissionControlSettings( + clusterService.getClusterSettings(), + Settings.EMPTY + ); + + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnforced()); + assertEquals(admissionControlSettings.getAdmissionControlTransportLayerMode().getMode(), AdmissionControlSettings.Defaults.MODE); + } + + public void testGetConfiguredSettings() { + Settings settings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.ENFORCED.getMode()) + .build(); + + AdmissionControlSettings admissionControlSettings = new AdmissionControlSettings(clusterService.getClusterSettings(), settings); + + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnforced()); + } + + public void testUpdateAfterGetDefaultSettings() { + AdmissionControlSettings admissionControlSettings = new AdmissionControlSettings( + clusterService.getClusterSettings(), + Settings.EMPTY + ); + Settings settings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.MONITOR.getMode()) + .build(); + clusterService.getClusterSettings().applySettings(settings); + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertFalse(admissionControlSettings.isTransportLayerAdmissionControlEnforced()); + } + + public void testUpdateAfterGetConfiguredSettings() { + Settings settings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.MONITOR.getMode()) + .build(); + + AdmissionControlSettings admissionControlSettings = new AdmissionControlSettings(clusterService.getClusterSettings(), settings); + + Settings newSettings = Settings.builder() + .put(AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE.getKey(), AdmissionControlMode.ENFORCED.getMode()) + .build(); + + clusterService.getClusterSettings().applySettings(newSettings); + + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnabled()); + assertTrue(admissionControlSettings.isTransportLayerAdmissionControlEnforced()); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java new file mode 100644 index 0000000000000..af6ec0749e709 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CPUBasedAdmissionControllerTests.java @@ -0,0 +1,109 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.controllers; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.ratelimitting.admissioncontrol.settings.CPUBasedAdmissionControllerSettings; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +public class CPUBasedAdmissionControllerTests extends OpenSearchTestCase { + private ClusterService clusterService; + private ThreadPool threadPool; + CPUBasedAdmissionController admissionController = null; + + String action = "TEST_ACTION"; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("admission_controller_settings_test"); + clusterService = new ClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testCheckDefaultParameters() { + admissionController = new CPUBasedAdmissionController( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, + Settings.EMPTY, + clusterService.getClusterSettings() + ); + assertEquals(admissionController.getName(), CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER); + assertEquals(admissionController.getRejectionCount(), 0); + assertEquals(admissionController.settings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.DISABLED); + assertFalse( + admissionController.isEnabledForTransportLayer(admissionController.settings.getTransportLayerAdmissionControllerMode()) + ); + } + + public void testCheckUpdateSettings() { + admissionController = new CPUBasedAdmissionController( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, + Settings.EMPTY, + clusterService.getClusterSettings() + ); + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .build(); + clusterService.getClusterSettings().applySettings(settings); + + assertEquals(admissionController.getName(), CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER); + assertEquals(admissionController.getRejectionCount(), 0); + assertEquals(admissionController.settings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.ENFORCED); + assertTrue(admissionController.isEnabledForTransportLayer(admissionController.settings.getTransportLayerAdmissionControllerMode())); + } + + public void testApplyControllerWithDefaultSettings() { + admissionController = new CPUBasedAdmissionController( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, + Settings.EMPTY, + clusterService.getClusterSettings() + ); + assertEquals(admissionController.getRejectionCount(), 0); + assertEquals(admissionController.settings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.DISABLED); + action = "indices:data/write/bulk[s][p]"; + admissionController.apply(action); + assertEquals(admissionController.getRejectionCount(), 0); + } + + public void testApplyControllerWhenSettingsEnabled() { + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .build(); + admissionController = new CPUBasedAdmissionController( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER, + settings, + clusterService.getClusterSettings() + ); + assertTrue(admissionController.isEnabledForTransportLayer(admissionController.settings.getTransportLayerAdmissionControllerMode())); + assertEquals(admissionController.getRejectionCount(), 0); + action = "indices:data/write/bulk[s][p]"; + admissionController.apply(action); + assertEquals(admissionController.getRejectionCount(), 1); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlModeTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlModeTests.java new file mode 100644 index 0000000000000..98c0f3c7cf24c --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/AdmissionControlModeTests.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.enums; + +import org.opensearch.test.OpenSearchTestCase; + +public class AdmissionControlModeTests extends OpenSearchTestCase { + + public void testValidActionType() { + assertEquals(AdmissionControlMode.DISABLED.getMode(), "disabled"); + assertEquals(AdmissionControlMode.ENFORCED.getMode(), "enforced"); + assertEquals(AdmissionControlMode.MONITOR.getMode(), "monitor_only"); + assertEquals(AdmissionControlMode.fromName("disabled"), AdmissionControlMode.DISABLED); + assertEquals(AdmissionControlMode.fromName("enforced"), AdmissionControlMode.ENFORCED); + assertEquals(AdmissionControlMode.fromName("monitor_only"), AdmissionControlMode.MONITOR); + } + + public void testInValidActionType() { + String name = "TEST"; + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> AdmissionControlMode.fromName(name)); + assertEquals(ex.getMessage(), "Invalid AdmissionControlMode: " + name); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java new file mode 100644 index 0000000000000..02f582c26f54e --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/enums/TransportActionTypeTests.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.enums; + +import org.opensearch.test.OpenSearchTestCase; + +public class TransportActionTypeTests extends OpenSearchTestCase { + + public void testValidActionType() { + assertEquals(TransportActionType.SEARCH.getType(), "search"); + assertEquals(TransportActionType.INDEXING.getType(), "indexing"); + assertEquals(TransportActionType.fromName("search"), TransportActionType.SEARCH); + assertEquals(TransportActionType.fromName("indexing"), TransportActionType.INDEXING); + } + + public void testInValidActionType() { + String name = "test"; + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> TransportActionType.fromName(name)); + assertEquals(ex.getMessage(), "Not Supported TransportAction Type: " + name); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControlSettingsTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControlSettingsTests.java new file mode 100644 index 0000000000000..43103926a69a2 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControlSettingsTests.java @@ -0,0 +1,153 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.settings; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.util.Arrays; +import java.util.Set; + +public class CPUBasedAdmissionControlSettingsTests extends OpenSearchTestCase { + private ClusterService clusterService; + private ThreadPool threadPool; + + @Override + public void setUp() throws Exception { + super.setUp(); + threadPool = new TestThreadPool("admission_controller_settings_test"); + clusterService = new ClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testSettingsExists() { + Set> settings = ClusterSettings.BUILT_IN_CLUSTER_SETTINGS; + assertTrue( + "All the cpu based admission controller settings should be supported built in settings", + settings.containsAll( + Arrays.asList( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, + CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT, + CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT + ) + ) + ); + } + + public void testDefaultSettings() { + CPUBasedAdmissionControllerSettings cpuBasedAdmissionControllerSettings = new CPUBasedAdmissionControllerSettings( + clusterService.getClusterSettings(), + Settings.EMPTY + ); + long percent = 95; + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.DISABLED); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), percent); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), percent); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportActionsList(), Arrays.asList("indexing", "search")); + } + + public void testGetConfiguredSettings() { + long percent = 95; + long indexingPercent = 85; + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .put(CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT.getKey(), indexingPercent) + .build(); + + CPUBasedAdmissionControllerSettings cpuBasedAdmissionControllerSettings = new CPUBasedAdmissionControllerSettings( + clusterService.getClusterSettings(), + settings + ); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.ENFORCED); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), percent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), indexingPercent); + } + + public void testUpdateAfterGetDefaultSettings() { + long percent = 95; + long searchPercent = 80; + CPUBasedAdmissionControllerSettings cpuBasedAdmissionControllerSettings = new CPUBasedAdmissionControllerSettings( + clusterService.getClusterSettings(), + Settings.EMPTY + ); + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .put(CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT.getKey(), searchPercent) + .build(); + clusterService.getClusterSettings().applySettings(settings); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.ENFORCED); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), searchPercent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), percent); + } + + public void testUpdateAfterGetConfiguredSettings() { + long percent = 95; + long indexingPercent = 85; + long searchPercent = 80; + Settings settings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.ENFORCED.getMode() + ) + .put(CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT.getKey(), searchPercent) + .build(); + + CPUBasedAdmissionControllerSettings cpuBasedAdmissionControllerSettings = new CPUBasedAdmissionControllerSettings( + clusterService.getClusterSettings(), + settings + ); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.ENFORCED); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), searchPercent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), percent); + + Settings updatedSettings = Settings.builder() + .put( + CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE.getKey(), + AdmissionControlMode.MONITOR.getMode() + ) + .put(CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT.getKey(), indexingPercent) + .build(); + clusterService.getClusterSettings().applySettings(updatedSettings); + assertEquals(cpuBasedAdmissionControllerSettings.getTransportLayerAdmissionControllerMode(), AdmissionControlMode.MONITOR); + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), searchPercent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), indexingPercent); + + searchPercent = 70; + + updatedSettings = Settings.builder() + .put(updatedSettings) + .put(CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT.getKey(), searchPercent) + .build(); + clusterService.getClusterSettings().applySettings(updatedSettings); + + assertEquals(cpuBasedAdmissionControllerSettings.getSearchCPULimit().longValue(), searchPercent); + assertEquals(cpuBasedAdmissionControllerSettings.getIndexingCPULimit().longValue(), indexingPercent); + } +} diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java new file mode 100644 index 0000000000000..03d4819a94045 --- /dev/null +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/transport/AdmissionControlTransportHandlerTests.java @@ -0,0 +1,92 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ratelimitting.admissioncontrol.transport; + +import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; +import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlService; +import org.opensearch.tasks.Task; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.transport.TransportChannel; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportRequestHandler; + +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; + +public class AdmissionControlTransportHandlerTests extends OpenSearchTestCase { + AdmissionControlTransportHandler admissionControlTransportHandler; + + public void testHandlerInvoked() throws Exception { + String action = "TEST"; + InterceptingRequestHandler handler = new InterceptingRequestHandler<>(action); + admissionControlTransportHandler = new AdmissionControlTransportHandler( + action, + handler, + mock(AdmissionControlService.class), + false + ); + admissionControlTransportHandler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + assertEquals(1, handler.count); + } + + public void testHandlerInvokedRejectedException() throws Exception { + String action = "TEST"; + AdmissionControlService admissionControlService = mock(AdmissionControlService.class); + doThrow(new OpenSearchRejectedExecutionException()).when(admissionControlService).applyTransportAdmissionControl(action); + InterceptingRequestHandler handler = new InterceptingRequestHandler<>(action); + admissionControlTransportHandler = new AdmissionControlTransportHandler( + action, + handler, + admissionControlService, + false + ); + try { + admissionControlTransportHandler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + } catch (OpenSearchRejectedExecutionException exception) { + assertEquals(0, handler.count); + handler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + } + assertEquals(1, handler.count); + } + + public void testHandlerInvokedRandomException() throws Exception { + String action = "TEST"; + AdmissionControlService admissionControlService = mock(AdmissionControlService.class); + doThrow(new NullPointerException()).when(admissionControlService).applyTransportAdmissionControl(action); + InterceptingRequestHandler handler = new InterceptingRequestHandler<>(action); + admissionControlTransportHandler = new AdmissionControlTransportHandler( + action, + handler, + admissionControlService, + false + ); + try { + admissionControlTransportHandler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + } catch (Exception exception) { + assertEquals(0, handler.count); + handler.messageReceived(mock(TransportRequest.class), mock(TransportChannel.class), mock(Task.class)); + } + assertEquals(1, handler.count); + } + + private class InterceptingRequestHandler implements TransportRequestHandler { + private final String action; + public int count; + + public InterceptingRequestHandler(String action) { + this.action = action; + this.count = 0; + } + + @Override + public void messageReceived(T request, TransportChannel channel, Task task) throws Exception { + this.count = this.count + 1; + } + } +} From 14d4a6389bd5a7612c14e2e6fcb5a39822af9ee0 Mon Sep 17 00:00:00 2001 From: Ashish Date: Sat, 21 Oct 2023 18:42:52 +0530 Subject: [PATCH 13/45] [Remote Store] Use time elapsed since last successful local refresh for refresh lag (#10803) * [Remote Store] Use time elapsed since last successful local refresh for time lag Signed-off-by: Ashish Singh * Incorporate PR review comments Signed-off-by: Ashish Singh --------- Signed-off-by: Ashish Singh --- ...emoteStoreBackpressureAndResiliencyIT.java | 6 ++- .../remote/RemoteSegmentTransferTracker.java | 53 ++++++++++++------- .../remote/RemoteStorePressureService.java | 1 - .../RemoteSegmentTransferTrackerTests.java | 39 +++++++++----- .../RemoteStorePressureServiceTests.java | 32 ++++++++--- 5 files changed, 86 insertions(+), 45 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java index 98586b60dcc69..f19c9db7874db 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java @@ -56,7 +56,7 @@ public void testWritesRejectedDueToBytesLagBreach() throws Exception { public void testWritesRejectedDueToTimeLagBreach() throws Exception { // Initially indexing happens with doc size of 1KB, then all remote store interactions start failing. Now, the // indexing happens with doc size of 1 byte leading to time lag limit getting exceeded and leading to rejections. - validateBackpressure(ByteSizeUnit.KB.toIntBytes(1), 20, ByteSizeUnit.BYTES.toIntBytes(1), 15, "time_lag"); + validateBackpressure(ByteSizeUnit.KB.toIntBytes(1), 20, ByteSizeUnit.BYTES.toIntBytes(1), 3, "time_lag"); } private void validateBackpressure( @@ -133,11 +133,13 @@ private RemoteSegmentTransferTracker.Stats stats() { return matches.get(0).getSegmentStats(); } - private void indexDocAndRefresh(BytesReference source, int iterations) { + private void indexDocAndRefresh(BytesReference source, int iterations) throws InterruptedException { for (int i = 0; i < iterations; i++) { client().prepareIndex(INDEX_NAME).setSource(source, MediaTypeRegistry.JSON).get(); refresh(INDEX_NAME); } + Thread.sleep(250); + client().prepareIndex(INDEX_NAME).setSource(source, MediaTypeRegistry.JSON).get(); } /** diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java index 2a703f17aa953..fb65d9ef83be2 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; @@ -66,6 +67,12 @@ public class RemoteSegmentTransferTracker extends RemoteTransferTracker { */ private volatile long remoteRefreshTimeMs; + /** + * This is the time of first local refresh after the last successful remote refresh. When the remote store is in + * sync with local refresh, this will be reset to -1. + */ + private volatile long remoteRefreshStartTimeMs = -1; + /** * The refresh time(clock) of most recent remote refresh. */ @@ -76,11 +83,6 @@ public class RemoteSegmentTransferTracker extends RemoteTransferTracker { */ private volatile long refreshSeqNoLag; - /** - * Keeps the time (ms) lag computed so that we do not compute it for every request. - */ - private volatile long timeMsLag; - /** * Keeps track of the total bytes of segment files which were uploaded to remote store during last successful remote refresh */ @@ -132,14 +134,19 @@ public RemoteSegmentTransferTracker( logger = Loggers.getLogger(getClass(), shardId); // Both the local refresh time and remote refresh time are set with current time to give consistent view of time lag when it arises. long currentClockTimeMs = System.currentTimeMillis(); - long currentTimeMs = System.nanoTime() / 1_000_000L; + long currentTimeMs = currentTimeMsUsingSystemNanos(); localRefreshTimeMs = currentTimeMs; remoteRefreshTimeMs = currentTimeMs; + remoteRefreshStartTimeMs = currentTimeMs; localRefreshClockTimeMs = currentClockTimeMs; remoteRefreshClockTimeMs = currentClockTimeMs; this.directoryFileTransferTracker = directoryFileTransferTracker; } + public static long currentTimeMsUsingSystemNanos() { + return TimeUnit.NANOSECONDS.toMillis(System.nanoTime()); + } + @Override public void incrementTotalUploadsFailed() { super.incrementTotalUploadsFailed(); @@ -180,19 +187,22 @@ public long getLocalRefreshClockTimeMs() { */ public void updateLocalRefreshTimeAndSeqNo() { updateLocalRefreshClockTimeMs(System.currentTimeMillis()); - updateLocalRefreshTimeMs(System.nanoTime() / 1_000_000L); + updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); updateLocalRefreshSeqNo(getLocalRefreshSeqNo() + 1); } // Visible for testing - void updateLocalRefreshTimeMs(long localRefreshTimeMs) { + synchronized void updateLocalRefreshTimeMs(long localRefreshTimeMs) { assert localRefreshTimeMs >= this.localRefreshTimeMs : "newLocalRefreshTimeMs=" + localRefreshTimeMs + " < " + "currentLocalRefreshTimeMs=" + this.localRefreshTimeMs; + boolean isRemoteInSyncBeforeLocalRefresh = this.localRefreshTimeMs == this.remoteRefreshTimeMs; this.localRefreshTimeMs = localRefreshTimeMs; - computeTimeMsLag(); + if (isRemoteInSyncBeforeLocalRefresh) { + this.remoteRefreshStartTimeMs = localRefreshTimeMs; + } } private void updateLocalRefreshClockTimeMs(long localRefreshClockTimeMs) { @@ -221,14 +231,18 @@ long getRemoteRefreshClockTimeMs() { return remoteRefreshClockTimeMs; } - public void updateRemoteRefreshTimeMs(long remoteRefreshTimeMs) { - assert remoteRefreshTimeMs >= this.remoteRefreshTimeMs : "newRemoteRefreshTimeMs=" - + remoteRefreshTimeMs + public synchronized void updateRemoteRefreshTimeMs(long refreshTimeMs) { + assert refreshTimeMs >= this.remoteRefreshTimeMs : "newRemoteRefreshTimeMs=" + + refreshTimeMs + " < " + "currentRemoteRefreshTimeMs=" + this.remoteRefreshTimeMs; - this.remoteRefreshTimeMs = remoteRefreshTimeMs; - computeTimeMsLag(); + this.remoteRefreshTimeMs = refreshTimeMs; + // When multiple refreshes have failed, there is a possibility that retry is ongoing while another refresh gets + // triggered. After the segments have been uploaded and before the below code runs, the updateLocalRefreshTimeAndSeqNo + // method is triggered, which will update the local localRefreshTimeMs. Now, the lag would basically become the + // time since the last refresh happened locally. + this.remoteRefreshStartTimeMs = refreshTimeMs == this.localRefreshTimeMs ? -1 : this.localRefreshTimeMs; } public void updateRemoteRefreshClockTimeMs(long remoteRefreshClockTimeMs) { @@ -243,12 +257,11 @@ public long getRefreshSeqNoLag() { return refreshSeqNoLag; } - private void computeTimeMsLag() { - timeMsLag = localRefreshTimeMs - remoteRefreshTimeMs; - } - public long getTimeMsLag() { - return timeMsLag; + if (remoteRefreshTimeMs == localRefreshTimeMs) { + return 0; + } + return currentTimeMsUsingSystemNanos() - remoteRefreshStartTimeMs; } public long getBytesLag() { @@ -354,7 +367,7 @@ public RemoteSegmentTransferTracker.Stats stats() { shardId, localRefreshClockTimeMs, remoteRefreshClockTimeMs, - timeMsLag, + getTimeMsLag(), localRefreshSeqNo, remoteRefreshSeqNo, uploadBytesStarted.get(), diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java b/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java index 2920b33921869..33cd40f802d43 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java @@ -180,7 +180,6 @@ public boolean validate(RemoteSegmentTransferTracker pressureTracker, ShardId sh return true; } if (pressureTracker.isUploadTimeMovingAverageReady() == false) { - logger.trace("upload time moving average is not ready"); return true; } long timeLag = pressureTracker.getTimeMsLag(); diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java index 0bf00f9e48137..c87cdfcc8f1a1 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java @@ -23,6 +23,8 @@ import java.util.HashMap; import java.util.Map; +import static org.opensearch.index.remote.RemoteSegmentTransferTracker.currentTimeMsUsingSystemNanos; + public class RemoteSegmentTransferTrackerTests extends OpenSearchTestCase { private RemoteStoreStatsTrackerFactory remoteStoreStatsTrackerFactory; private ClusterService clusterService; @@ -92,7 +94,7 @@ public void testUpdateLocalRefreshTimeMs() { directoryFileTransferTracker, remoteStoreStatsTrackerFactory.getMovingAverageWindowSize() ); - long refreshTimeMs = System.nanoTime() / 1_000_000L + randomIntBetween(10, 100); + long refreshTimeMs = currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100); transferTracker.updateLocalRefreshTimeMs(refreshTimeMs); assertEquals(refreshTimeMs, transferTracker.getLocalRefreshTimeMs()); } @@ -103,7 +105,7 @@ public void testUpdateRemoteRefreshTimeMs() { directoryFileTransferTracker, remoteStoreStatsTrackerFactory.getMovingAverageWindowSize() ); - long refreshTimeMs = System.nanoTime() / 1_000_000 + randomIntBetween(10, 100); + long refreshTimeMs = currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100); transferTracker.updateRemoteRefreshTimeMs(refreshTimeMs); assertEquals(refreshTimeMs, transferTracker.getRemoteRefreshTimeMs()); } @@ -133,20 +135,29 @@ public void testComputeSeqNoLagOnUpdate() { assertEquals(localRefreshSeqNo - remoteRefreshSeqNo, transferTracker.getRefreshSeqNoLag()); } - public void testComputeTimeLagOnUpdate() { + public void testComputeTimeLagOnUpdate() throws InterruptedException { transferTracker = new RemoteSegmentTransferTracker( shardId, directoryFileTransferTracker, remoteStoreStatsTrackerFactory.getMovingAverageWindowSize() ); - long currentLocalRefreshTimeMs = transferTracker.getLocalRefreshTimeMs(); - long currentTimeMs = System.nanoTime() / 1_000_000L; - long localRefreshTimeMs = currentTimeMs + randomIntBetween(100, 500); - long remoteRefreshTimeMs = currentTimeMs + randomIntBetween(50, 99); - transferTracker.updateLocalRefreshTimeMs(localRefreshTimeMs); - assertEquals(localRefreshTimeMs - currentLocalRefreshTimeMs, transferTracker.getTimeMsLag()); - transferTracker.updateRemoteRefreshTimeMs(remoteRefreshTimeMs); - assertEquals(localRefreshTimeMs - remoteRefreshTimeMs, transferTracker.getTimeMsLag()); + + // No lag if there is a remote upload corresponding to a local refresh + assertEquals(0, transferTracker.getTimeMsLag()); + + // Set a local refresh time that is higher than remote refresh time + Thread.sleep(1); + transferTracker.updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); + + // Sleep for 100ms and then the lag should be within 100ms +/- 20ms + Thread.sleep(100); + assertTrue(Math.abs(transferTracker.getTimeMsLag() - 100) <= 20); + + transferTracker.updateRemoteRefreshTimeMs(transferTracker.getLocalRefreshTimeMs()); + transferTracker.updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); + long random = randomIntBetween(50, 200); + Thread.sleep(random); + assertTrue(Math.abs(transferTracker.getTimeMsLag() - random) <= 20); } public void testAddUploadBytesStarted() { @@ -519,7 +530,7 @@ public void testStatsObjectCreation() { transferTracker = constructTracker(); RemoteSegmentTransferTracker.Stats transferTrackerStats = transferTracker.stats(); assertEquals(transferTracker.getShardId(), transferTrackerStats.shardId); - assertEquals(transferTracker.getTimeMsLag(), (int) transferTrackerStats.refreshTimeLagMs); + assertTrue(Math.abs(transferTracker.getTimeMsLag() - transferTrackerStats.refreshTimeLagMs) <= 20); assertEquals(transferTracker.getLocalRefreshSeqNo(), (int) transferTrackerStats.localRefreshNumber); assertEquals(transferTracker.getRemoteRefreshSeqNo(), (int) transferTrackerStats.remoteRefreshNumber); assertEquals(transferTracker.getBytesLag(), (int) transferTrackerStats.bytesLag); @@ -591,9 +602,9 @@ private RemoteSegmentTransferTracker constructTracker() { ); transferTracker.incrementTotalUploadsStarted(); transferTracker.incrementTotalUploadsFailed(); - transferTracker.updateUploadTimeMovingAverage(System.nanoTime() / 1_000_000L + randomIntBetween(10, 100)); + transferTracker.updateUploadTimeMovingAverage(currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100)); transferTracker.updateUploadBytesMovingAverage(99); - transferTracker.updateRemoteRefreshTimeMs(System.nanoTime() / 1_000_000L + randomIntBetween(10, 100)); + transferTracker.updateRemoteRefreshTimeMs(currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100)); transferTracker.incrementRejectionCount(); transferTracker.getDirectoryFileTransferTracker().addTransferredBytesStarted(10); transferTracker.getDirectoryFileTransferTracker().addTransferredBytesSucceeded(10, System.currentTimeMillis()); diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java index de610083f3327..cb77174e612fd 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java @@ -21,8 +21,11 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.IntStream; +import static org.opensearch.index.remote.RemoteSegmentTransferTracker.currentTimeMsUsingSystemNanos; import static org.opensearch.index.remote.RemoteStoreTestsHelper.createIndexShard; public class RemoteStorePressureServiceTests extends OpenSearchTestCase { @@ -68,7 +71,7 @@ public void testIsSegmentsUploadBackpressureEnabled() { assertTrue(pressureService.isSegmentsUploadBackpressureEnabled()); } - public void testValidateSegmentUploadLag() { + public void testValidateSegmentUploadLag() throws InterruptedException { // Create the pressure tracker IndexShard indexShard = createIndexShard(shardId, true); remoteStoreStatsTrackerFactory = new RemoteStoreStatsTrackerFactory(clusterService, Settings.EMPTY); @@ -86,14 +89,27 @@ public void testValidateSegmentUploadLag() { sum.addAndGet(i); }); double avg = (double) sum.get() / 20; - long currentMs = System.nanoTime() / 1_000_000; - pressureTracker.updateLocalRefreshTimeMs((long) (currentMs + 12 * avg)); - pressureTracker.updateRemoteRefreshTimeMs(currentMs); - Exception e = assertThrows(OpenSearchRejectedExecutionException.class, () -> pressureService.validateSegmentsUploadLag(shardId)); - assertTrue(e.getMessage().contains("due to remote segments lagging behind local segments")); - assertTrue(e.getMessage().contains("time_lag:114 ms dynamic_time_lag_threshold:95.0 ms")); - pressureTracker.updateRemoteRefreshTimeMs((long) (currentMs + 2 * avg)); + // We run this to ensure that the local and remote refresh time are not same anymore + while (pressureTracker.getLocalRefreshTimeMs() == currentTimeMsUsingSystemNanos()) { + Thread.sleep(10); + } + long localRefreshTimeMs = currentTimeMsUsingSystemNanos(); + pressureTracker.updateLocalRefreshTimeMs(localRefreshTimeMs); + + while (currentTimeMsUsingSystemNanos() - localRefreshTimeMs <= 20 * avg) { + Thread.sleep((long) (4 * avg)); + } + Exception e = assertThrows(OpenSearchRejectedExecutionException.class, () -> pressureService.validateSegmentsUploadLag(shardId)); + String regex = "^rejected execution on primary shard:\\[index]\\[0] due to remote segments lagging behind " + + "local segments.time_lag:[0-9]{2,3} ms dynamic_time_lag_threshold:95\\.0 ms$"; + Pattern pattern = Pattern.compile(regex); + Matcher matcher = pattern.matcher(e.getMessage()); + assertTrue(matcher.matches()); + + pressureTracker.updateRemoteRefreshTimeMs(pressureTracker.getLocalRefreshTimeMs()); + pressureTracker.updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); + Thread.sleep((long) (2 * avg)); pressureService.validateSegmentsUploadLag(shardId); // 2. bytes lag more than dynamic threshold From 911afc4002b3a9faa86e109871332af0caaa73d0 Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Sun, 22 Oct 2023 11:31:31 +0530 Subject: [PATCH 14/45] Remove remote store attributes from DiscoveryNode toString() (#10810) Signed-off-by: Dhwanil Patel --- .../cluster/node/DiscoveryNode.java | 8 +++++++- .../cluster/node/DiscoveryNodeTests.java | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java index 4e49b25eb5789..0c58aabf95207 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java @@ -553,7 +553,13 @@ public String toString() { sb.append('}'); } if (!attributes.isEmpty()) { - sb.append(attributes); + sb.append( + attributes.entrySet() + .stream() + .filter(entry -> !entry.getKey().startsWith(REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX)) // filter remote_store attributes + // from logging to reduce noise. + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)) + ); } return sb.toString(); } diff --git a/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java b/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java index 8b61e8f6d724d..c8a6fc76ce820 100644 --- a/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java +++ b/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java @@ -38,13 +38,16 @@ import org.opensearch.common.settings.Settings; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.transport.TransportAddress; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.test.NodeRoles; import org.opensearch.test.OpenSearchTestCase; import java.net.InetAddress; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Locale; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -81,6 +84,22 @@ public void testRolesAreSorted() { } + public void testRemoteStoreRedactionInToString() { + final Set roles = new HashSet<>(randomSubsetOf(DiscoveryNodeRole.BUILT_IN_ROLES)); + Map attributes = new HashMap<>(); + attributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "test-repo"); + attributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "test-repo"); + final DiscoveryNode node = new DiscoveryNode( + "name", + "id", + new TransportAddress(TransportAddress.META_ADDRESS, 9200), + attributes, + roles, + Version.CURRENT + ); + assertFalse(node.toString().contains(RemoteStoreNodeAttribute.REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX)); + } + public void testDiscoveryNodeIsCreatedWithHostFromInetAddress() throws Exception { InetAddress inetAddress = randomBoolean() ? InetAddress.getByName("192.0.2.1") From 79c6e322f584e72b48793658f6a3456d1eea1fc2 Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Sun, 22 Oct 2023 11:54:10 +0530 Subject: [PATCH 15/45] Adds util to compare Metadata for remote state (#10661) * Adds util to compare Metadata for remote state durng restore flow when identifying if 2 clusterUUIDs have same cluster state Signed-off-by: bansvaru --- .../opensearch/cluster/metadata/Metadata.java | 17 +- .../remote/RemoteClusterStateService.java | 12 +- .../cluster/metadata/MetadataTests.java | 33 ++++ .../RemoteClusterStateServiceTests.java | 163 ++++++++++++------ 4 files changed, 168 insertions(+), 57 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java index 626903877b0c6..70c1d059a1b9e 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java @@ -922,19 +922,26 @@ public static boolean isGlobalStateEquals(Metadata metadata1, Metadata metadata2 if (!metadata1.coordinationMetadata.equals(metadata2.coordinationMetadata)) { return false; } - if (!metadata1.persistentSettings.equals(metadata2.persistentSettings)) { + if (!metadata1.hashesOfConsistentSettings.equals(metadata2.hashesOfConsistentSettings)) { return false; } - if (!metadata1.hashesOfConsistentSettings.equals(metadata2.hashesOfConsistentSettings)) { + if (!metadata1.clusterUUID.equals(metadata2.clusterUUID)) { return false; } - if (!metadata1.templates.equals(metadata2.templates())) { + if (metadata1.clusterUUIDCommitted != metadata2.clusterUUIDCommitted) { return false; } - if (!metadata1.clusterUUID.equals(metadata2.clusterUUID)) { + return isGlobalResourcesMetadataEquals(metadata1, metadata2); + } + + /** + * Compares Metadata entities persisted in Remote Store. + */ + public static boolean isGlobalResourcesMetadataEquals(Metadata metadata1, Metadata metadata2) { + if (!metadata1.persistentSettings.equals(metadata2.persistentSettings)) { return false; } - if (metadata1.clusterUUIDCommitted != metadata2.clusterUUIDCommitted) { + if (!metadata1.templates.equals(metadata2.templates())) { return false; } // Check if any persistent metadata needs to be saved diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 96ce2fc779ea0..13fbda77d8a2d 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -689,10 +689,11 @@ private Map getIndexMetadataMap( * @return {@link IndexMetadata} */ private IndexMetadata getIndexMetadata(String clusterName, String clusterUUID, UploadedIndexMetadata uploadedIndexMetadata) { + BlobContainer blobContainer = indexMetadataContainer(clusterName, clusterUUID, uploadedIndexMetadata.getIndexUUID()); try { String[] splitPath = uploadedIndexMetadata.getUploadedFilename().split("/"); return INDEX_METADATA_FORMAT.read( - indexMetadataContainer(clusterName, clusterUUID, uploadedIndexMetadata.getIndexUUID()), + blobContainer, splitPath[splitPath.length - 1], blobStoreRepository.getNamedXContentRegistry() ); @@ -882,7 +883,8 @@ private Map trimClusterUUIDs( } } else { ClusterMetadataManifest previousManifest = trimmedUUIDs.get(currentManifest.getPreviousClusterUUID()); - if (isMetadataEqual(currentManifest, previousManifest, clusterName)) { + if (isMetadataEqual(currentManifest, previousManifest, clusterName) + && isGlobalMetadataEqual(currentManifest, previousManifest, clusterName)) { trimmedUUIDs.remove(clusterUUID); } } @@ -912,6 +914,12 @@ private boolean isMetadataEqual(ClusterMetadataManifest first, ClusterMetadataMa return true; } + private boolean isGlobalMetadataEqual(ClusterMetadataManifest first, ClusterMetadataManifest second, String clusterName) { + Metadata secondGlobalMetadata = getGlobalMetadata(clusterName, second.getClusterUUID(), second); + Metadata firstGlobalMetadata = getGlobalMetadata(clusterName, first.getClusterUUID(), first); + return Metadata.isGlobalResourcesMetadataEquals(firstGlobalMetadata, secondGlobalMetadata); + } + private boolean isInvalidClusterUUID(ClusterMetadataManifest manifest) { return !manifest.isClusterUUIDCommitted(); } diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java index 40eefa6cdbf03..618fcb923bc60 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java @@ -627,6 +627,39 @@ public void testGlobalStateEqualsCoordinationMetadata() { assertFalse(Metadata.isGlobalStateEquals(metadata1, metadata2)); } + public void testGlobalResourcesStateEqualsCoordinationMetadata() { + CoordinationMetadata coordinationMetadata1 = new CoordinationMetadata( + randomNonNegativeLong(), + randomVotingConfig(), + randomVotingConfig(), + randomVotingConfigExclusions() + ); + Metadata metadata1 = Metadata.builder() + .coordinationMetadata(coordinationMetadata1) + .clusterUUID(randomAlphaOfLength(10)) + .clusterUUIDCommitted(false) + .hashesOfConsistentSettings(Map.of("a", "b")) + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build(); + CoordinationMetadata coordinationMetadata2 = new CoordinationMetadata( + randomNonNegativeLong(), + randomVotingConfig(), + randomVotingConfig(), + randomVotingConfigExclusions() + ); + Metadata metadata2 = Metadata.builder() + .coordinationMetadata(coordinationMetadata2) + .clusterUUIDCommitted(true) + .clusterUUID(randomAlphaOfLength(11)) + .hashesOfConsistentSettings(Map.of("b", "a")) + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build(); + + assertTrue(Metadata.isGlobalStateEquals(metadata1, metadata1)); + assertFalse(Metadata.isGlobalStateEquals(metadata1, metadata2)); + assertTrue(Metadata.isGlobalResourcesMetadataEquals(metadata1, metadata2)); + } + public void testSerializationWithIndexGraveyard() throws IOException { final IndexGraveyard graveyard = IndexGraveyardTests.createRandom(); final Metadata originalMeta = Metadata.builder().indexGraveyard(graveyard).build(); diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 5202f31c514ed..55094400581b4 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -9,6 +9,7 @@ package org.opensearch.gateway.remote; import org.opensearch.Version; +import org.opensearch.cluster.ClusterModule; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.CoordinationMetadata; @@ -27,6 +28,7 @@ import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; import org.opensearch.common.compress.DeflateCompressor; import org.opensearch.common.lucene.store.ByteArrayIndexInput; +import org.opensearch.common.network.NetworkModule; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.core.ParseField; @@ -37,6 +39,7 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.indices.IndicesModule; import org.opensearch.repositories.FilterRepository; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.RepositoryMissingException; @@ -65,11 +68,14 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; import java.util.function.Supplier; +import java.util.stream.Stream; import org.mockito.ArgumentCaptor; import org.mockito.ArgumentMatchers; +import static java.util.stream.Collectors.toList; import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER; import static org.opensearch.gateway.remote.RemoteClusterStateService.FORMAT_PARAMS; import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_CURRENT_CODEC_VERSION; @@ -126,11 +132,19 @@ public void setup() { .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) .build(); + NamedXContentRegistry xContentRegistry = new NamedXContentRegistry( + Stream.of( + NetworkModule.getNamedXContents().stream(), + IndicesModule.getNamedXContents().stream(), + ClusterModule.getNamedXWriteables().stream() + ).flatMap(Function.identity()).collect(toList()) + ); + blobStoreRepository = mock(BlobStoreRepository.class); blobStore = mock(BlobStore.class); when(blobStoreRepository.blobStore()).thenReturn(blobStore); when(repositoriesService.repository("remote_store_repository")).thenReturn(blobStoreRepository); - when(blobStoreRepository.getNamedXContentRegistry()).thenReturn(new NamedXContentRegistry(new ArrayList<>())); + when(blobStoreRepository.getNamedXContentRegistry()).thenReturn(xContentRegistry); remoteClusterStateService = new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, @@ -887,7 +901,7 @@ public void testGetValidPreviousClusterUUIDWithMultipleChains() throws IOExcepti "cluster-uuid3", "cluster-uuid1" ); - mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers); + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, randomBoolean()); remoteClusterStateService.start(); String previousClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster"); @@ -1040,6 +1054,11 @@ public void testSingleConcurrentExecutionOfStaleManifestCleanup() throws Excepti } private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false); + } + + private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers, boolean differGlobalMetadata) + throws IOException { final BlobPath blobPath = mock(BlobPath.class); when((blobStoreRepository.basePath())).thenReturn(blobPath); when(blobPath.add(anyString())).thenReturn(blobPath); @@ -1061,7 +1080,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste "cluster-uuid1", clusterUUIDsPointers.get("cluster-uuid1"), randomAlphaOfLength(10), - uploadedIndexMetadataList1 + uploadedIndexMetadataList1, + "test-metadata1" ); Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build(); IndexMetadata indexMetadata1 = IndexMetadata.builder("index1") @@ -1074,8 +1094,12 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste .numberOfShards(1) .numberOfReplicas(1) .build(); + Metadata metadata1 = Metadata.builder() + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build(); Map indexMetadataMap1 = Map.of("index-uuid1", indexMetadata1, "index-uuid2", indexMetadata2); - mockBlobContainer(blobContainer1, clusterManifest1, indexMetadataMap1); + mockBlobContainerForGlobalMetadata(blobContainer1, clusterManifest1, metadata1); + mockBlobContainer(blobContainer1, clusterManifest1, indexMetadataMap1, ClusterMetadataManifest.CODEC_V1); List uploadedIndexMetadataList2 = List.of( new UploadedIndexMetadata("index1", "index-uuid1", "key1"), @@ -1085,7 +1109,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste "cluster-uuid2", clusterUUIDsPointers.get("cluster-uuid2"), randomAlphaOfLength(10), - uploadedIndexMetadataList2 + uploadedIndexMetadataList2, + "test-metadata2" ); IndexMetadata indexMetadata3 = IndexMetadata.builder("index1") .settings(indexSettings) @@ -1097,37 +1122,59 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste .numberOfShards(1) .numberOfReplicas(1) .build(); + Metadata metadata2 = Metadata.builder() + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build(); Map indexMetadataMap2 = Map.of("index-uuid1", indexMetadata3, "index-uuid2", indexMetadata4); - mockBlobContainer(blobContainer2, clusterManifest2, indexMetadataMap2); + mockBlobContainerForGlobalMetadata(blobContainer2, clusterManifest2, metadata2); + mockBlobContainer(blobContainer2, clusterManifest2, indexMetadataMap2, ClusterMetadataManifest.CODEC_V1); + + // differGlobalMetadata controls which one of IndexMetadata or Metadata object would be different + // when comparing cluster-uuid3 and cluster-uuid1 state. + // if set true, only Metadata will differ b/w cluster uuid1 and cluster uuid3. + // If set to false, only IndexMetadata would be different + // Adding difference in EXACTLY on of these randomly will help us test if our uuid trimming logic compares both + // IndexMetadata and Metadata when deciding if the remote state b/w two different cluster uuids is same. + List uploadedIndexMetadataList3 = differGlobalMetadata + ? new ArrayList<>(uploadedIndexMetadataList1) + : List.of(new UploadedIndexMetadata("index1", "index-uuid1", "key1")); + IndexMetadata indexMetadata5 = IndexMetadata.builder("index1") + .settings(indexSettings) + .numberOfShards(1) + .numberOfReplicas(1) + .build(); + Map indexMetadataMap3 = differGlobalMetadata + ? new HashMap<>(indexMetadataMap1) + : Map.of("index-uuid1", indexMetadata5); + Metadata metadata3 = Metadata.builder() + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), !differGlobalMetadata).build()) + .build(); - List uploadedIndexMetadataList3 = List.of(new UploadedIndexMetadata("index1", "index-uuid1", "key1")); final ClusterMetadataManifest clusterManifest3 = generateClusterMetadataManifest( "cluster-uuid3", clusterUUIDsPointers.get("cluster-uuid3"), randomAlphaOfLength(10), - uploadedIndexMetadataList3 + uploadedIndexMetadataList3, + "test-metadata3" ); - IndexMetadata indexMetadata5 = IndexMetadata.builder("index1") - .settings(indexSettings) - .numberOfShards(1) - .numberOfReplicas(1) - .build(); - Map indexMetadataMap3 = Map.of("index-uuid1", indexMetadata5); - mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3); - - when(blobStore.blobContainer(ArgumentMatchers.any())).thenReturn( - uuidBlobContainer, - blobContainer1, - blobContainer1, - blobContainer3, - blobContainer3, - blobContainer2, - blobContainer2, - blobContainer1, - blobContainer2, - blobContainer1, - blobContainer2 + mockBlobContainerForGlobalMetadata(blobContainer3, clusterManifest3, metadata3); + mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3, ClusterMetadataManifest.CODEC_V1); + + ArrayList mockBlobContainerOrderedList = new ArrayList<>( + List.of(blobContainer1, blobContainer1, blobContainer3, blobContainer3, blobContainer2, blobContainer2) ); + + if (differGlobalMetadata) { + mockBlobContainerOrderedList.addAll( + List.of(blobContainer3, blobContainer1, blobContainer3, blobContainer1, blobContainer1, blobContainer3) + ); + } + mockBlobContainerOrderedList.addAll( + List.of(blobContainer2, blobContainer1, blobContainer2, blobContainer1, blobContainer1, blobContainer2) + ); + BlobContainer[] mockBlobContainerOrderedArray = new BlobContainer[mockBlobContainerOrderedList.size()]; + mockBlobContainerOrderedList.toArray(mockBlobContainerOrderedArray); + when(blobStore.blobContainer(ArgumentMatchers.any())).thenReturn(uuidBlobContainer, mockBlobContainerOrderedArray); when(blobStoreRepository.getCompressor()).thenReturn(new DeflateCompressor()); } @@ -1135,7 +1182,8 @@ private ClusterMetadataManifest generateClusterMetadataManifest( String clusterUUID, String previousClusterUUID, String stateUUID, - List uploadedIndexMetadata + List uploadedIndexMetadata, + String globalMetadataFileName ) { return ClusterMetadataManifest.builder() .indices(uploadedIndexMetadata) @@ -1148,7 +1196,8 @@ private ClusterMetadataManifest generateClusterMetadataManifest( .previousClusterUUID(previousClusterUUID) .committed(true) .clusterUUIDCommitted(true) - .globalMetadataFileName("test-global-metadata") + .globalMetadataFileName(globalMetadataFileName) + .codecVersion(ClusterMetadataManifest.CODEC_V1) .build(); } @@ -1181,17 +1230,29 @@ private void mockBlobContainer( ClusterMetadataManifest clusterMetadataManifest, Map indexMetadataMap ) throws IOException { - BlobMetadata blobMetadata = new PlainBlobMetadata("manifestFileName", 1); + mockBlobContainer(blobContainer, clusterMetadataManifest, indexMetadataMap, ClusterMetadataManifest.CODEC_V0); + } + + private void mockBlobContainer( + BlobContainer blobContainer, + ClusterMetadataManifest clusterMetadataManifest, + Map indexMetadataMap, + int codecVersion + ) throws IOException { + String manifestFileName = codecVersion >= ClusterMetadataManifest.CODEC_V1 + ? "manifest__manifestFileName__abcd__abcd__abcd__1" + : "manifestFileName"; + BlobMetadata blobMetadata = new PlainBlobMetadata(manifestFileName, 1); when(blobContainer.listBlobsByPrefixInSortedOrder("manifest" + DELIMITER, 1, BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC)) .thenReturn(Arrays.asList(blobMetadata)); BytesReference bytes = RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.serialize( clusterMetadataManifest, - "manifestFileName", + manifestFileName, blobStoreRepository.getCompressor(), FORMAT_PARAMS ); - when(blobContainer.readBlob("manifestFileName")).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); + when(blobContainer.readBlob(manifestFileName)).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> { try { @@ -1200,15 +1261,15 @@ private void mockBlobContainer( return; } String fileName = uploadedIndexMetadata.getUploadedFilename(); - BytesReference bytesIndexMetadata = RemoteClusterStateService.INDEX_METADATA_FORMAT.serialize( - indexMetadata, - fileName, - blobStoreRepository.getCompressor(), - FORMAT_PARAMS - ); - when(blobContainer.readBlob(fileName + ".dat")).thenReturn( - new ByteArrayInputStream(bytesIndexMetadata.streamInput().readAllBytes()) - ); + when(blobContainer.readBlob(fileName + ".dat")).thenAnswer((invocationOnMock) -> { + BytesReference bytesIndexMetadata = RemoteClusterStateService.INDEX_METADATA_FORMAT.serialize( + indexMetadata, + fileName, + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + return new ByteArrayInputStream(bytesIndexMetadata.streamInput().readAllBytes()); + }); } catch (IOException e) { throw new RuntimeException(e); } @@ -1238,15 +1299,17 @@ private void mockBlobContainerForGlobalMetadata( ); when(blobContainer.readBlob(mockManifestFileName)).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); - BytesReference bytesGlobalMetadata = RemoteClusterStateService.GLOBAL_METADATA_FORMAT.serialize( - metadata, - "global-metadata-file", - blobStoreRepository.getCompressor(), - FORMAT_PARAMS - ); String[] splitPath = clusterMetadataManifest.getGlobalMetadataFileName().split("/"); - when(blobContainer.readBlob(RemoteClusterStateService.GLOBAL_METADATA_FORMAT.blobName(splitPath[splitPath.length - 1]))).thenReturn( - new ByteArrayInputStream(bytesGlobalMetadata.streamInput().readAllBytes()) + when(blobContainer.readBlob(RemoteClusterStateService.GLOBAL_METADATA_FORMAT.blobName(splitPath[splitPath.length - 1]))).thenAnswer( + (invocationOnMock) -> { + BytesReference bytesGlobalMetadata = RemoteClusterStateService.GLOBAL_METADATA_FORMAT.serialize( + metadata, + "global-metadata-file", + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + return new ByteArrayInputStream(bytesGlobalMetadata.streamInput().readAllBytes()); + } ); } From e618b9bd27af47c4b1a04f4eaabc323a39573c40 Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Sun, 22 Oct 2023 13:28:50 +0530 Subject: [PATCH 16/45] Fix custom metadata not getting stored for remote store not supporting async write (#10812) Signed-off-by: Dhwanil Patel --- .../RemoteStoreClusterStateRestoreIT.java | 28 +++++++++++-------- .../remote/RemoteClusterStateService.java | 8 +++++- .../blobstore/ChecksumBlobStoreFormat.java | 25 +++++++++++++++-- 3 files changed, 47 insertions(+), 14 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index c2cb7cc60f152..f0863966fa222 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.nio.file.Files; +import java.nio.file.Path; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -233,8 +234,7 @@ public void testFullClusterRestoreGlobalMetadata() throws Exception { String prevClusterUUID = clusterService().state().metadata().clusterUUID(); // Create global metadata - register a custom repo - // TODO - uncomment after all customs is also uploaded for all repos - https://github.com/opensearch-project/OpenSearch/issues/10691 - // registerCustomRepository(); + Path repoPath = registerCustomRepository(); // Create global metadata - persistent settings updatePersistentSettings(Settings.builder().put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), 34).build()); @@ -263,30 +263,36 @@ public void testFullClusterRestoreGlobalMetadata() throws Exception { verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, false); // validate global metadata restored - verifyRestoredRepositories(); + verifyRestoredRepositories(repoPath); verifyRestoredIndexTemplate(); } - private void registerCustomRepository() { + private Path registerCustomRepository() { + Path path = randomRepoPath(); assertAcked( client().admin() .cluster() .preparePutRepository("custom-repo") .setType("fs") - .setSettings(Settings.builder().put("location", randomRepoPath()).put("compress", false)) + .setSettings(Settings.builder().put("location", path).put("compress", false)) .get() ); + return path; } - private void verifyRestoredRepositories() { + private void verifyRestoredRepositories(Path repoPath) { RepositoriesMetadata repositoriesMetadata = clusterService().state().metadata().custom(RepositoriesMetadata.TYPE); - assertEquals(2, repositoriesMetadata.repositories().size()); // includes remote store repo as well + assertEquals(3, repositoriesMetadata.repositories().size()); // includes remote store repo as well assertTrue(SYSTEM_REPOSITORY_SETTING.get(repositoriesMetadata.repository(REPOSITORY_NAME).settings())); assertTrue(SYSTEM_REPOSITORY_SETTING.get(repositoriesMetadata.repository(REPOSITORY_2_NAME).settings())); - // TODO - uncomment after all customs is also uploaded for all repos - https://github.com/opensearch-project/OpenSearch/issues/10691 - // assertEquals("fs", repositoriesMetadata.repository("custom-repo").type()); - // assertEquals(Settings.builder().put("location", randomRepoPath()).put("compress", false).build(), - // repositoriesMetadata.repository("custom-repo").settings()); + assertEquals("fs", repositoriesMetadata.repository("custom-repo").type()); + assertEquals( + Settings.builder().put("location", repoPath).put("compress", false).build(), + repositoriesMetadata.repository("custom-repo").settings() + ); + + // repo cleanup post verification + clusterAdmin().prepareDeleteRepository("custom-repo").get(); } private void addClusterLevelReadOnlyBlock() throws InterruptedException, ExecutionException { diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 13fbda77d8a2d..dfe3659c1efd1 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -568,7 +568,13 @@ private ClusterMetadataManifest uploadManifest( private void writeMetadataManifest(String clusterName, String clusterUUID, ClusterMetadataManifest uploadManifest, String fileName) throws IOException { final BlobContainer metadataManifestContainer = manifestContainer(clusterName, clusterUUID); - CLUSTER_METADATA_MANIFEST_FORMAT.write(uploadManifest, metadataManifestContainer, fileName, blobStoreRepository.getCompressor()); + CLUSTER_METADATA_MANIFEST_FORMAT.write( + uploadManifest, + metadataManifestContainer, + fileName, + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); } private String fetchPreviousClusterUUID(String clusterName, String clusterUUID) { diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java index 17cb68f798094..e280141c12bc1 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java @@ -170,8 +170,29 @@ public T deserialize(String blobName, NamedXContentRegistry namedXContentRegistr * @param compressor whether to use compression */ public void write(final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor) throws IOException { + write(obj, blobContainer, name, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS); + } + + /** + * Writes blob with resolving the blob name using {@link #blobName} method. + *

+ * The blob will optionally by compressed. + * + * @param obj object to be serialized + * @param blobContainer blob container + * @param name blob name + * @param compressor whether to use compression + * @param params ToXContent params + */ + public void write( + final T obj, + final BlobContainer blobContainer, + final String name, + final Compressor compressor, + final ToXContent.Params params + ) throws IOException { final String blobName = blobName(name); - final BytesReference bytes = serialize(obj, blobName, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS); + final BytesReference bytes = serialize(obj, blobName, compressor, params); blobContainer.writeBlob(blobName, bytes.streamInput(), bytes.length(), false); } @@ -195,7 +216,7 @@ public void writeAsync( final ToXContent.Params params ) throws IOException { if (blobContainer instanceof AsyncMultiStreamBlobContainer == false) { - write(obj, blobContainer, name, compressor); + write(obj, blobContainer, name, compressor, params); listener.onResponse(null); return; } From a09047a4a9870eca642577094c9793256cbe71d1 Mon Sep 17 00:00:00 2001 From: Shivansh Arora <31575408+shiv0408@users.noreply.github.com> Date: Sun, 22 Oct 2023 13:41:50 +0530 Subject: [PATCH 17/45] Removed unnecessary catch statement (#10783) * Removed unnecessary catch statement related to repo missing exception in remote state flow Signed-off-by: Shivansh Arora --- .../main/java/org/opensearch/gateway/GatewayMetaState.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index 9eb7fb0ca04d0..5d417ce78fe5c 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -67,7 +67,6 @@ import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult; import org.opensearch.node.Node; import org.opensearch.plugins.MetadataUpgrader; -import org.opensearch.repositories.RepositoryMissingException; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; @@ -712,12 +711,6 @@ assert verifyManifestAndClusterState(lastAcceptedManifest, lastAcceptedState) == assert verifyManifestAndClusterState(manifest, clusterState) == true : "Manifest and ClusterState are not in sync"; lastAcceptedManifest = manifest; lastAcceptedState = clusterState; - } catch (RepositoryMissingException e) { - // TODO This logic needs to be modified once PR for repo registration during bootstrap is pushed - // https://github.com/opensearch-project/OpenSearch/pull/9105/ - // After the above PR is pushed, we can remove this silent failure and throw the exception instead. - logger.error("Remote repository is not yet registered"); - lastAcceptedState = clusterState; } catch (Exception e) { handleExceptionOnWrite(e); } From 9b7a9d0026aa379537804fd24b95619abe88e1c0 Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Sun, 22 Oct 2023 16:16:10 +0530 Subject: [PATCH 18/45] Override local disk state if we are able to restore from remote (#10748) * Override local disk state if we are able to restore from remote Signed-off-by: bansvaru --- .../RemoteStoreClusterStateRestoreIT.java | 68 +++++++++++++ .../opensearch/gateway/GatewayMetaState.java | 7 +- .../recovery/RemoteStoreRestoreService.java | 95 +++---------------- 3 files changed, 89 insertions(+), 81 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index f0863966fa222..29786158bc73c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -21,6 +21,7 @@ import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import java.io.IOException; @@ -32,6 +33,7 @@ import java.util.Objects; import java.util.concurrent.ExecutionException; +import static org.opensearch.cluster.coordination.ClusterBootstrapService.INITIAL_CLUSTER_MANAGER_NODES_SETTING; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_READ_ONLY_SETTING; import static org.opensearch.cluster.metadata.Metadata.CLUSTER_READ_ONLY_BLOCK; import static org.opensearch.cluster.metadata.Metadata.SETTING_READ_ONLY_SETTING; @@ -94,6 +96,72 @@ public void testFullClusterRestore() throws Exception { verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); } + /** + * This test scenario covers the case where right after remote state restore and persisting it to disk via LucenePersistedState, full cluster restarts. + * This is a special case for remote state as at this point cluster uuid in the restored state is still ClusterState.UNKNOWN_UUID as we persist it disk. + * After restart the local disk state will be read but should be again overridden with remote state. + * + * 1. Form a cluster and index few docs + * 2. Replace all nodes to remove all local disk state + * 3. Start cluster manager node without correct seeding to ensure local disk state is written with cluster uuid ClusterState.UNKNOWN_UUID but with remote restored Metadata + * 4. Restart the cluster manager node with correct seeding. + * 5. After restart the cluster manager picks up the local disk state with has same Metadata as remote but cluster uuid is still ClusterState.UNKNOWN_UUID + * 6. The cluster manager will try to restore from remote again. + * 7. Metadata loaded from local disk state will be overridden with remote Metadata and no conflict should arise. + * 8. Add data nodes to recover index data + * 9. Verify Metadata and index data is restored. + */ + public void testFullClusterStateRestore() throws Exception { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 1; + + // index some data to generate files in remote directory + Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, 1); + String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + + // stop all nodes + internalCluster().stopAllNodes(); + + // start a cluster manager node with no cluster manager seeding. + // This should fail with IllegalStateException as cluster manager fails to form without any initial seed + assertThrows( + IllegalStateException.class, + () -> internalCluster().startClusterManagerOnlyNodes( + clusterManagerNodeCount, + Settings.builder() + .putList(INITIAL_CLUSTER_MANAGER_NODES_SETTING.getKey()) // disable seeding during bootstrapping + .build() + ) + ); + + // verify cluster manager not elected + String newClusterUUID = clusterService().state().metadata().clusterUUID(); + assert Objects.equals(newClusterUUID, ClusterState.UNKNOWN_UUID) + : "Disabling Cluster manager seeding failed. cluster uuid is not unknown"; + + // restart cluster manager with correct seed + internalCluster().fullRestart(new InternalTestCluster.RestartCallback() { + @Override + public Settings onNodeStopped(String nodeName) { + return Settings.builder() + .putList(INITIAL_CLUSTER_MANAGER_NODES_SETTING.getKey(), nodeName) // Seed with correct Cluster Manager node + .build(); + } + }); + + // validate new cluster state formed + newClusterUUID = clusterService().state().metadata().clusterUUID(); + assert !Objects.equals(newClusterUUID, ClusterState.UNKNOWN_UUID) : "cluster restart not successful. cluster uuid is still unknown"; + assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; + validateMetadata(List.of(INDEX_NAME)); + + // start data nodes to trigger index data recovery + internalCluster().startDataOnlyNodes(dataNodeCount); + verifyRestoredData(indexStats, INDEX_NAME); + } + public void testFullClusterRestoreMultipleIndices() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index 5d417ce78fe5c..f855449c708d2 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -174,7 +174,9 @@ public void start( if (ClusterState.UNKNOWN_UUID.equals(lastKnownClusterUUID) == false) { // Load state from remote final RemoteRestoreResult remoteRestoreResult = remoteStoreRestoreService.restore( - clusterState, + // Remote Metadata should always override local disk Metadata + // if local disk Metadata's cluster uuid is UNKNOWN_UUID + ClusterState.builder(clusterState).metadata(Metadata.EMPTY_METADATA).build(), lastKnownClusterUUID, false, new String[] {} @@ -549,6 +551,9 @@ static class LucenePersistedState implements PersistedState { // out by this version of OpenSearch. TODO TBD should we avoid indexing when possible? final PersistedClusterStateService.Writer writer = persistedClusterStateService.createWriter(); try { + // During remote state restore, there will be non empty metadata getting persisted with cluster UUID as + // ClusterState.UNKOWN_UUID . The valid UUID will be generated and persisted along with the first cluster state getting + // published. writer.writeFullStateAndCommit(currentTerm, lastAcceptedState); } catch (Exception e) { try { diff --git a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java index 6692d521b8f65..9541d13421e27 100644 --- a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java +++ b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java @@ -40,12 +40,10 @@ import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; @@ -146,6 +144,11 @@ public RemoteRestoreResult restore( || restoreClusterUUID.isBlank()) == false; if (metadataFromRemoteStore) { try { + // Restore with current cluster UUID will fail as same indices would be present in the cluster which we are trying to + // restore + if (currentState.metadata().clusterUUID().equals(restoreClusterUUID)) { + throw new IllegalArgumentException("clusterUUID to restore from should be different from current cluster UUID"); + } remoteMetadata = remoteClusterStateService.getLatestMetadata(currentState.getClusterName().value(), restoreClusterUUID); remoteMetadata.getIndices().values().forEach(indexMetadata -> { indexMetadataMap.put(indexMetadata.getIndex().getName(), new Tuple<>(true, indexMetadata)); @@ -158,12 +161,21 @@ public RemoteRestoreResult restore( IndexMetadata indexMetadata = currentState.metadata().index(indexName); if (indexMetadata == null) { logger.warn("Index restore is not supported for non-existent index. Skipping: {}", indexName); + } else if (indexMetadata.getSettings().getAsBoolean(SETTING_REMOTE_STORE_ENABLED, false) == false) { + logger.warn("Remote store is not enabled for index: {}", indexName); + } else if (restoreAllShards && IndexMetadata.State.CLOSE.equals(indexMetadata.getState()) == false) { + throw new IllegalStateException( + String.format( + Locale.ROOT, + "cannot restore index [%s] because an open index with same name/uuid already exists in the cluster.", + indexName + ) + " Close the existing index." + ); } else { indexMetadataMap.put(indexName, new Tuple<>(false, indexMetadata)); } } } - validate(currentState, indexMetadataMap, restoreClusterUUID, restoreAllShards); return executeRestore(currentState, indexMetadataMap, restoreAllShards, remoteMetadata); } @@ -272,83 +284,6 @@ private void restoreGlobalMetadata(Metadata.Builder mdBuilder, Metadata remoteMe repositoriesMetadata.ifPresent(metadata -> mdBuilder.putCustom(RepositoriesMetadata.TYPE, metadata)); } - /** - * Performs various validations needed before executing restore - * @param currentState current cluster state - * @param indexMetadataMap map of index metadata to restore - * @param restoreClusterUUID cluster UUID used to restore IndexMetadata - * @param restoreAllShards indicates if all shards of the index needs to be restored. This flat is ignored if remoteClusterUUID is provided - */ - private void validate( - ClusterState currentState, - Map> indexMetadataMap, - @Nullable String restoreClusterUUID, - boolean restoreAllShards - ) throws IllegalStateException, IllegalArgumentException { - String errorMsg = "cannot restore index [%s] because an open index with same name/uuid already exists in the cluster."; - - // Restore with current cluster UUID will fail as same indices would be present in the cluster which we are trying to - // restore - if (currentState.metadata().clusterUUID().equals(restoreClusterUUID)) { - throw new IllegalArgumentException("clusterUUID to restore from should be different from current cluster UUID"); - } - for (Map.Entry> indexMetadataEntry : indexMetadataMap.entrySet()) { - String indexName = indexMetadataEntry.getKey(); - IndexMetadata indexMetadata = indexMetadataEntry.getValue().v2(); - String indexUUID = indexMetadata.getIndexUUID(); - boolean metadataFromRemoteStore = indexMetadataEntry.getValue().v1(); - if (indexMetadata.getSettings().getAsBoolean(SETTING_REMOTE_STORE_ENABLED, false)) { - if (metadataFromRemoteStore) { - Set graveyardIndexNames = new HashSet<>(); - Set graveyardIndexUUID = new HashSet<>(); - Set liveClusterIndexUUIDs = currentState.metadata() - .indices() - .values() - .stream() - .map(IndexMetadata::getIndexUUID) - .collect(Collectors.toSet()); - - currentState.metadata().indexGraveyard().getTombstones().forEach(tombstone -> { - graveyardIndexNames.add(tombstone.getIndex().getName()); - graveyardIndexUUID.add(tombstone.getIndex().getUUID()); - }); - - // Since updates to graveyard are synced to remote we should neven land in a situation where remote contain index - // metadata for graveyard index. - assert graveyardIndexNames.contains(indexName) == false : String.format( - Locale.ROOT, - "Index name [%s] exists in graveyard!", - indexName - ); - assert graveyardIndexUUID.contains(indexUUID) == false : String.format( - Locale.ROOT, - "Index UUID [%s] exists in graveyard!", - indexUUID - ); - - // Any indices being restored from remote cluster state should not already be part of the cluster as this causes - // conflict - boolean sameNameIndexExists = currentState.metadata().hasIndex(indexName); - boolean sameUUIDIndexExists = liveClusterIndexUUIDs.contains(indexUUID); - if (sameNameIndexExists || sameUUIDIndexExists) { - String finalErrorMsg = String.format(Locale.ROOT, errorMsg, indexName); - logger.info(finalErrorMsg); - throw new IllegalStateException(finalErrorMsg); - } - - boolean isHidden = IndexMetadata.INDEX_HIDDEN_SETTING.get(indexMetadata.getSettings()); - createIndexService.validateIndexName(indexName, currentState); - createIndexService.validateDotIndex(indexName, isHidden); - shardLimitValidator.validateShardLimit(indexName, indexMetadata.getSettings(), currentState); - } else if (restoreAllShards && IndexMetadata.State.CLOSE.equals(indexMetadata.getState()) == false) { - throw new IllegalStateException(String.format(Locale.ROOT, errorMsg, indexName) + " Close the existing index."); - } - } else { - logger.warn("Remote store is not enabled for index: {}", indexName); - } - } - } - /** * Result of a remote restore operation. */ From 5e7c0689534f94b5f09f85eb0a64b42f79899241 Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Mon, 23 Oct 2023 11:24:13 +0530 Subject: [PATCH 19/45] fix integ test: check for red indices and then manually restore to green (#10838) Signed-off-by: bansvaru --- .../remotestore/RemoteStoreClusterStateRestoreIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index 29786158bc73c..c429b587a1399 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -111,7 +111,7 @@ public void testFullClusterRestore() throws Exception { * 8. Add data nodes to recover index data * 9. Verify Metadata and index data is restored. */ - public void testFullClusterStateRestore() throws Exception { + public void testFullClusterRestoreDoesntFailWithConflictingLocalState() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; int dataNodeCount = shardCount * (replicaCount + 1); @@ -159,7 +159,7 @@ public Settings onNodeStopped(String nodeName) { // start data nodes to trigger index data recovery internalCluster().startDataOnlyNodes(dataNodeCount); - verifyRestoredData(indexStats, INDEX_NAME); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); } public void testFullClusterRestoreMultipleIndices() throws Exception { From ef4b327ab48a6a674b657049e440418fe427cb0c Mon Sep 17 00:00:00 2001 From: Rishikesh Pasham <62345295+Rishikesh1159@users.noreply.github.com> Date: Sun, 22 Oct 2023 22:54:43 -0700 Subject: [PATCH 20/45] Update version check made in PR #10656. (#10835) Signed-off-by: Rishikesh1159 --- .../action/admin/cluster/node/stats/NodeStats.java | 7 +++---- .../opensearch/index/SegmentReplicationRejectionStats.java | 6 ++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java index 3d37056956c69..874713b51d627 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java @@ -215,8 +215,7 @@ public NodeStats(StreamInput in) throws IOException { } else { resourceUsageStats = null; } - // TODO: change to V_2_12_0 on main after backport to 2.x - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { segmentReplicationRejectionStats = in.readOptionalWriteable(SegmentReplicationRejectionStats::new); } else { segmentReplicationRejectionStats = null; @@ -431,6 +430,7 @@ public SegmentReplicationRejectionStats getSegmentReplicationRejectionStats() { return segmentReplicationRejectionStats; } + @Nullable public RepositoriesStats getRepositoriesStats() { return repositoriesStats; } @@ -481,8 +481,7 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(resourceUsageStats); } - // TODO: change to V_2_12_0 on main after backport to 2.x - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(segmentReplicationRejectionStats); } if (out.getVersion().onOrAfter(Version.V_2_12_0)) { diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java b/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java index 9f9f150ebe2d7..492f253bbcb7c 100644 --- a/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java @@ -34,8 +34,7 @@ public SegmentReplicationRejectionStats(final long totalRejectionCount) { } public SegmentReplicationRejectionStats(StreamInput in) throws IOException { - // TODO: change to V_2_12_0 on main after backport to 2.x - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { this.totalRejectionCount = in.readVLong(); } } @@ -53,8 +52,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @Override public void writeTo(StreamOutput out) throws IOException { - // TODO: change to V_2_12_0 on main after backport to 2.x - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeVLong(totalRejectionCount); } } From b5ef0788699152fb2b1ae158b518c212ba784184 Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Mon, 23 Oct 2023 13:56:33 +0530 Subject: [PATCH 21/45] Fix remote cluster restore for data stream (#10777) * Fix remote cluster restore for data stream, also added integ test Signed-off-by: Dhwanil Patel --- .../datastream/DataStreamTestCase.java | 3 ++- .../RemoteStoreClusterStateRestoreIT.java | 9 +++++++ .../remote/RemoteClusterStateService.java | 5 +++- .../RemoteClusterStateServiceTests.java | 6 ++--- .../opensearch/test/InternalTestCluster.java | 12 ++++++++++ .../test/OpenSearchIntegTestCase.java | 24 +++++++++++++++++++ 6 files changed, 54 insertions(+), 5 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/datastream/DataStreamTestCase.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/datastream/DataStreamTestCase.java index 50ff76c6b62f3..82ab5b0118c0e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/datastream/DataStreamTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/datastream/DataStreamTestCase.java @@ -37,6 +37,7 @@ public AcknowledgedResponse createDataStream(String name) throws Exception { CreateDataStreamAction.Request request = new CreateDataStreamAction.Request(name); AcknowledgedResponse response = client().admin().indices().createDataStream(request).get(); assertThat(response.isAcknowledged(), is(true)); + performRemoteStoreTestAction(); return response; } @@ -67,6 +68,7 @@ public RolloverResponse rolloverDataStream(String name) throws Exception { RolloverResponse response = client().admin().indices().rolloverIndex(request).get(); assertThat(response.isAcknowledged(), is(true)); assertThat(response.isRolledOver(), is(true)); + performRemoteStoreTestAction(); return response; } @@ -109,5 +111,4 @@ public AcknowledgedResponse deleteIndexTemplate(String name) throws Exception { assertThat(response.isAcknowledged(), is(true)); return response; } - } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index c429b587a1399..e9afd6d36bb87 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -9,6 +9,7 @@ package org.opensearch.remotestore; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; +import org.opensearch.action.admin.indices.datastream.DataStreamRolloverIT; import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; import org.opensearch.action.admin.indices.template.put.PutIndexTemplateRequest; import org.opensearch.cluster.ClusterState; @@ -291,6 +292,14 @@ private void validateCurrentMetadata() throws Exception { }); } + public void testDataStreamPostRemoteStateRestore() throws Exception { + new DataStreamRolloverIT() { + protected boolean triggerRemoteStateRestore() { + return true; + } + }.testDataStreamRollover(); + } + public void testFullClusterRestoreGlobalMetadata() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index dfe3659c1efd1..b505b3e2d4c0c 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -732,7 +732,10 @@ public Metadata getLatestMetadata(String clusterName, String clusterUUID) { // Fetch Index Metadata Map indices = getIndexMetadataMap(clusterName, clusterUUID, clusterMetadataManifest.get()); - return Metadata.builder(globalMetadata).indices(indices).build(); + Map indexMetadataMap = new HashMap<>(); + indices.values().forEach(indexMetadata -> { indexMetadataMap.put(indexMetadata.getIndex().getName(), indexMetadata); }); + + return Metadata.builder(globalMetadata).indices(indexMetadataMap).build(); } private Metadata getGlobalMetadata(String clusterName, String clusterUUID, ClusterMetadataManifest clusterMetadataManifest) { diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 55094400581b4..827735f6bcc0b 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -826,9 +826,9 @@ public void testReadLatestIndexMetadataSuccess() throws IOException { ).getIndices(); assertEquals(indexMetadataMap.size(), 1); - assertEquals(indexMetadataMap.get(index.getUUID()).getIndex().getName(), index.getName()); - assertEquals(indexMetadataMap.get(index.getUUID()).getNumberOfShards(), indexMetadata.getNumberOfShards()); - assertEquals(indexMetadataMap.get(index.getUUID()).getNumberOfReplicas(), indexMetadata.getNumberOfReplicas()); + assertEquals(indexMetadataMap.get(index.getName()).getIndex().getName(), index.getName()); + assertEquals(indexMetadataMap.get(index.getName()).getNumberOfShards(), indexMetadata.getNumberOfShards()); + assertEquals(indexMetadataMap.get(index.getName()).getNumberOfReplicas(), indexMetadata.getNumberOfReplicas()); } public void testMarkLastStateAsCommittedSuccess() throws IOException { diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 63d8f069bebea..952cd6c085966 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -1871,6 +1871,18 @@ public void stopAllNodes() { } } + /** + * Replace all nodes by stopping all current node and starting new node. + * Used for remote store test cases, where remote state is restored. + */ + public void resetCluster() { + int totalClusterManagerNodes = numClusterManagerNodes(); + int totalDataNodes = numDataNodes(); + stopAllNodes(); + startClusterManagerOnlyNodes(totalClusterManagerNodes); + startDataOnlyNodes(totalDataNodes); + } + private synchronized void startAndPublishNodesAndClients(List nodeAndClients) { if (nodeAndClients.size() > 0) { final int newClusterManagers = (int) nodeAndClients.stream() diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index c16cc1d2a5fba..ad27d9834f159 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -789,6 +789,30 @@ protected Settings featureFlagSettings() { return featureSettings.build(); } + /** + * Represent if it needs to trigger remote state restore or not. + * For tests with remote store enabled domain, it will be overridden to true. + * + * @return if needs to perform remote state restore or not + */ + protected boolean triggerRemoteStateRestore() { + return false; + } + + /** + * For tests with remote cluster state, it will reset the cluster and cluster state will be + * restored from remote. + */ + protected void performRemoteStoreTestAction() { + if (triggerRemoteStateRestore()) { + String clusterUUIDBefore = clusterService().state().metadata().clusterUUID(); + internalCluster().resetCluster(); + String clusterUUIDAfter = clusterService().state().metadata().clusterUUID(); + // assertion that UUID is changed post restore. + assertFalse(clusterUUIDBefore.equals(clusterUUIDAfter)); + } + } + /** * Creates one or more indices and asserts that the indices are acknowledged. If one of the indices * already exists this method will fail and wipe all the indices created so far. From 7453daa2b7ce698fa7bf3647002eb12394a5dae1 Mon Sep 17 00:00:00 2001 From: Ashish Date: Mon, 23 Oct 2023 14:10:14 +0530 Subject: [PATCH 22/45] [Remote Store] Sync segments in refresh listener on refresh after commit (#10830) * [Remote Store] Sync segments in refresh listener on refresh after commit Signed-off-by: Ashish Singh * Add Integration Tests Signed-off-by: Ashish Singh * Add comments and java doc Signed-off-by: Ashish Singh --------- Signed-off-by: Ashish Singh --- .../remotestore/RemoteStoreStatsIT.java | 76 ++++++++++++++++- .../remote/RemoteTranslogTransferTracker.java | 57 +++++++++++++ .../opensearch/index/shard/IndexShard.java | 8 ++ .../shard/RemoteStoreRefreshListener.java | 84 +++++++++++++++---- .../index/translog/RemoteFsTranslog.java | 8 +- 5 files changed, 215 insertions(+), 18 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java index 8ae25c6758195..5e91176ed0473 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java @@ -15,6 +15,8 @@ import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsResponse; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.coordination.FollowersChecker; +import org.opensearch.cluster.coordination.LeaderChecker; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.ShardRoutingState; @@ -23,15 +25,20 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.remote.RemoteSegmentTransferTracker; import org.opensearch.index.remote.RemoteTranslogTransferTracker; +import org.opensearch.plugins.Plugin; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; -import org.junit.Before; +import org.opensearch.test.disruption.NetworkDisruption; +import org.opensearch.test.transport.MockTransportService; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; import java.util.List; import java.util.Locale; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -44,12 +51,17 @@ public class RemoteStoreStatsIT extends RemoteStoreBaseIntegTestCase { private static final String INDEX_NAME = "remote-store-test-idx-1"; - @Before + @Override + protected Collection> nodePlugins() { + return Arrays.asList(MockTransportService.TestPlugin.class); + } + public void setup() { internalCluster().startNodes(3); } public void testStatsResponseFromAllNodes() { + setup(); // Step 1 - We create cluster, create an index, and then index documents into. We also do multiple refreshes/flushes // during this time frame. This ensures that the segment upload has started. @@ -118,6 +130,7 @@ public void testStatsResponseFromAllNodes() { } public void testStatsResponseAllShards() { + setup(); // Step 1 - We create cluster, create an index, and then index documents into. We also do multiple refreshes/flushes // during this time frame. This ensures that the segment upload has started. @@ -175,6 +188,7 @@ public void testStatsResponseAllShards() { } public void testStatsResponseFromLocalNode() { + setup(); // Step 1 - We create cluster, create an index, and then index documents into. We also do multiple refreshes/flushes // during this time frame. This ensures that the segment upload has started. @@ -236,6 +250,7 @@ public void testStatsResponseFromLocalNode() { } public void testDownloadStatsCorrectnessSinglePrimarySingleReplica() throws Exception { + setup(); // Scenario: // - Create index with single primary and single replica shard // - Disable Refresh Interval for the index @@ -325,6 +340,7 @@ public void testDownloadStatsCorrectnessSinglePrimarySingleReplica() throws Exce } public void testDownloadStatsCorrectnessSinglePrimaryMultipleReplicaShards() throws Exception { + setup(); // Scenario: // - Create index with single primary and N-1 replica shards (N = no of data nodes) // - Disable Refresh Interval for the index @@ -416,6 +432,7 @@ public void testDownloadStatsCorrectnessSinglePrimaryMultipleReplicaShards() thr } public void testStatsOnShardRelocation() { + setup(); // Scenario: // - Create index with single primary and single replica shard // - Index documents @@ -471,6 +488,7 @@ public void testStatsOnShardRelocation() { } public void testStatsOnShardUnassigned() throws IOException { + setup(); // Scenario: // - Create index with single primary and two replica shard // - Index documents @@ -497,6 +515,7 @@ public void testStatsOnShardUnassigned() throws IOException { } public void testStatsOnRemoteStoreRestore() throws IOException { + setup(); // Creating an index with primary shard count == total nodes in cluster and 0 replicas int dataNodeCount = client().admin().cluster().prepareHealth().get().getNumberOfDataNodes(); createIndex(INDEX_NAME, remoteStoreIndexSettings(0, dataNodeCount)); @@ -544,6 +563,7 @@ public void testStatsOnRemoteStoreRestore() throws IOException { } public void testNonZeroPrimaryStatsOnNewlyCreatedIndexWithZeroDocs() throws Exception { + setup(); // Create an index with one primary and one replica shard createIndex(INDEX_NAME, remoteStoreIndexSettings(1, 1)); ensureGreen(INDEX_NAME); @@ -581,6 +601,58 @@ public void testNonZeroPrimaryStatsOnNewlyCreatedIndexWithZeroDocs() throws Exce }, 5, TimeUnit.SECONDS); } + public void testStatsCorrectnessOnFailover() { + Settings clusterSettings = Settings.builder() + .put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "100ms") + .put(LeaderChecker.LEADER_CHECK_INTERVAL_SETTING.getKey(), "500ms") + .put(LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) + .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "100ms") + .put(FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING.getKey(), "500ms") + .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) + .put(nodeSettings(0)) + .build(); + String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(clusterSettings); + internalCluster().startDataOnlyNodes(2, clusterSettings); + + // Create an index with one primary and one replica shard + createIndex(INDEX_NAME, remoteStoreIndexSettings(1, 1)); + ensureGreen(INDEX_NAME); + + // Index some docs and refresh + indexDocs(); + refresh(INDEX_NAME); + + String primaryNode = primaryNodeName(INDEX_NAME); + String replicaNode = replicaNodeName(INDEX_NAME); + + // Start network disruption - primary node will be isolated + Set nodesInOneSide = Stream.of(clusterManagerNode, replicaNode).collect(Collectors.toCollection(HashSet::new)); + Set nodesInOtherSide = Stream.of(primaryNode).collect(Collectors.toCollection(HashSet::new)); + NetworkDisruption networkDisruption = new NetworkDisruption( + new NetworkDisruption.TwoPartitions(nodesInOneSide, nodesInOtherSide), + NetworkDisruption.DISCONNECT + ); + internalCluster().setDisruptionScheme(networkDisruption); + logger.info("--> network disruption is started"); + networkDisruption.startDisrupting(); + ensureStableCluster(2, clusterManagerNode); + + RemoteStoreStatsResponse response = client(clusterManagerNode).admin().cluster().prepareRemoteStoreStats(INDEX_NAME, "0").get(); + final String indexShardId = String.format(Locale.ROOT, "[%s][%s]", INDEX_NAME, "0"); + List matches = Arrays.stream(response.getRemoteStoreStats()) + .filter(stat -> indexShardId.equals(stat.getSegmentStats().shardId.toString())) + .collect(Collectors.toList()); + assertEquals(1, matches.size()); + RemoteSegmentTransferTracker.Stats segmentStats = matches.get(0).getSegmentStats(); + assertEquals(0, segmentStats.refreshTimeLagMs); + + networkDisruption.stopDisrupting(); + internalCluster().clearDisruptionScheme(); + ensureStableCluster(3, clusterManagerNode); + ensureGreen(INDEX_NAME); + logger.info("Test completed"); + } + private void indexDocs() { for (int i = 0; i < randomIntBetween(5, 10); i++) { if (randomBoolean()) { diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteTranslogTransferTracker.java b/server/src/main/java/org/opensearch/index/remote/RemoteTranslogTransferTracker.java index 1a9896540212e..4214a87049350 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteTranslogTransferTracker.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteTranslogTransferTracker.java @@ -232,6 +232,63 @@ public RemoteTranslogTransferTracker.Stats stats() { ); } + @Override + public String toString() { + return "RemoteTranslogTransferStats{" + + "lastSuccessfulUploadTimestamp=" + + lastSuccessfulUploadTimestamp.get() + + "," + + "totalUploadsStarted=" + + totalUploadsStarted.get() + + "," + + "totalUploadsSucceeded=" + + totalUploadsSucceeded.get() + + "," + + "totalUploadsFailed=" + + totalUploadsFailed.get() + + "," + + "uploadBytesStarted=" + + uploadBytesStarted.get() + + "," + + "uploadBytesFailed=" + + uploadBytesFailed.get() + + "," + + "totalUploadTimeInMillis=" + + totalUploadTimeInMillis.get() + + "," + + "uploadBytesMovingAverage=" + + uploadBytesMovingAverageReference.get().getAverage() + + "," + + "uploadBytesPerSecMovingAverage=" + + uploadBytesPerSecMovingAverageReference.get().getAverage() + + "," + + "uploadTimeMovingAverage=" + + uploadTimeMsMovingAverageReference.get().getAverage() + + "," + + "lastSuccessfulDownloadTimestamp=" + + lastSuccessfulDownloadTimestamp.get() + + "," + + "totalDownloadsSucceeded=" + + totalDownloadsSucceeded.get() + + "," + + "downloadBytesSucceeded=" + + downloadBytesSucceeded.get() + + "," + + "totalDownloadTimeInMillis=" + + totalDownloadTimeInMillis.get() + + "," + + "downloadBytesMovingAverage=" + + downloadBytesMovingAverageReference.get().getAverage() + + "," + + "downloadBytesPerSecMovingAverage=" + + downloadBytesPerSecMovingAverageReference.get().getAverage() + + "," + + "downloadTimeMovingAverage=" + + downloadTimeMsMovingAverageReference.get().getAverage() + + "," + + "}"; + } + /** * Represents the tracker's state as seen in the stats API. * diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index f990a3b56e856..fb4e9056153aa 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -4774,6 +4774,8 @@ public void syncSegmentsFromRemoteSegmentStore(boolean overrideLocal) throws IOE * @throws IOException if exception occurs while reading segments from remote store. */ public void syncSegmentsFromRemoteSegmentStore(boolean overrideLocal, final Runnable onFileSync) throws IOException { + boolean syncSegmentSuccess = false; + long startTimeMs = System.currentTimeMillis(); assert indexSettings.isRemoteStoreEnabled(); logger.trace("Downloading segments from remote segment store"); RemoteSegmentStoreDirectory remoteDirectory = getRemoteDirectory(); @@ -4823,9 +4825,15 @@ public void syncSegmentsFromRemoteSegmentStore(boolean overrideLocal, final Runn : "There should not be any segments file in the dir"; store.commitSegmentInfos(infosSnapshot, processedLocalCheckpoint, processedLocalCheckpoint); } + syncSegmentSuccess = true; } catch (IOException e) { throw new IndexShardRecoveryException(shardId, "Exception while copying segment files from remote segment store", e); } finally { + logger.trace( + "syncSegmentsFromRemoteSegmentStore success={} elapsedTime={}", + syncSegmentSuccess, + (System.currentTimeMillis() - startTimeMs) + ); store.decRef(); remoteStore.decRef(); } diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index c650edc31da8d..3e97b07abfb5d 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -123,14 +123,13 @@ public void beforeRefresh() throws IOException {} @Override protected void runAfterRefreshExactlyOnce(boolean didRefresh) { - if (shouldSync(didRefresh)) { + // We have 2 separate methods to check if sync needs to be done or not. This is required since we use the return boolean + // from isReadyForUpload to schedule refresh retries as the index shard or the primary mode are not in complete + // ready state. + if (shouldSync(didRefresh) && isReadyForUpload()) { segmentTracker.updateLocalRefreshTimeAndSeqNo(); try { - if (this.primaryTerm != indexShard.getOperationPrimaryTerm()) { - logger.debug("primaryTerm update from={} to={}", primaryTerm, indexShard.getOperationPrimaryTerm()); - this.primaryTerm = indexShard.getOperationPrimaryTerm(); - this.remoteDirectory.init(); - } + initializeRemoteDirectoryOnTermUpdate(); try (GatedCloseable segmentInfosGatedCloseable = indexShard.getSegmentInfosSnapshot()) { Collection localSegmentsPostRefresh = segmentInfosGatedCloseable.get().files(true); updateLocalSizeMapAndTracker(localSegmentsPostRefresh); @@ -160,20 +159,20 @@ protected boolean performAfterRefreshWithPermit(boolean didRefresh) { } private boolean shouldSync(boolean didRefresh) { - // The third condition exists for uploading the zero state segments where the refresh has not changed the reader reference, but it - // is important to upload the zero state segments so that the restore does not break. return this.primaryTerm != indexShard.getOperationPrimaryTerm() + // If the readers change, didRefresh is always true. || didRefresh - || remoteDirectory.getSegmentsUploadedToRemoteStore().isEmpty(); + // The third condition exists for uploading the zero state segments where the refresh has not changed the reader + // reference, but it is important to upload the zero state segments so that the restore does not break. + || remoteDirectory.getSegmentsUploadedToRemoteStore().isEmpty() + // When the shouldSync is called the first time, then 1st condition on primary term is true. But after that + // we update the primary term and the same condition would not evaluate to true again in syncSegments. + // Below check ensures that if there is commit, then that gets picked up by both 1st and 2nd shouldSync call. + || isRefreshAfterCommitSafe(); } private boolean syncSegments() { - if (indexShard.getReplicationTracker().isPrimaryMode() == false || indexShard.state() == IndexShardState.CLOSED) { - logger.debug( - "Skipped syncing segments with primaryMode={} indexShardState={}", - indexShard.getReplicationTracker().isPrimaryMode(), - indexShard.state() - ); + if (isReadyForUpload() == false) { // Following check is required to enable retry and make sure that we do not lose this refresh event // When primary shard is restored from remote store, the recovery happens first followed by changing // primaryMode to true. Due to this, the refresh that is triggered post replay of translog will not go through @@ -323,6 +322,19 @@ private boolean isRefreshAfterCommit() throws IOException { && !remoteDirectory.containsFile(lastCommittedLocalSegmentFileName, getChecksumOfLocalFile(lastCommittedLocalSegmentFileName))); } + /** + * Returns if the current refresh has happened after a commit. + * @return true if this refresh has happened on account of a commit. If otherwise or exception, returns false. + */ + private boolean isRefreshAfterCommitSafe() { + try { + return isRefreshAfterCommit(); + } catch (Exception e) { + logger.info("Exception occurred in isRefreshAfterCommitSafe", e); + } + return false; + } + void uploadMetadata(Collection localSegmentsPostRefresh, SegmentInfos segmentInfos, ReplicationCheckpoint replicationCheckpoint) throws IOException { final long maxSeqNo = ((InternalEngine) indexShard.getEngine()).currentOngoingRefreshCheckpoint(); @@ -439,6 +451,48 @@ private void updateFinalStatusInSegmentTracker(boolean uploadStatus, long bytesB } } + /** + * On primary term update, we (re)initialise the remote segment directory to reflect the latest metadata file that + * has been uploaded to remote store successfully. This method also updates the segment tracker about the latest + * uploaded segment files onto remote store. + */ + private void initializeRemoteDirectoryOnTermUpdate() throws IOException { + if (this.primaryTerm != indexShard.getOperationPrimaryTerm()) { + logger.trace("primaryTerm update from={} to={}", primaryTerm, indexShard.getOperationPrimaryTerm()); + this.primaryTerm = indexShard.getOperationPrimaryTerm(); + RemoteSegmentMetadata uploadedMetadata = this.remoteDirectory.init(); + + // During failover, the uploaded metadata would have names of files that have been uploaded to remote store. + // Here we update the tracker with latest remote uploaded files. + if (uploadedMetadata != null) { + segmentTracker.setLatestUploadedFiles(uploadedMetadata.getMetadata().keySet()); + } + } + } + + /** + * This checks for readiness of the index shard and primary mode. This has separated from shouldSync since we use the + * returned value of this method for scheduling retries in syncSegments method. + * @return true iff primaryMode is true and index shard is not in closed state. + */ + private boolean isReadyForUpload() { + boolean isReady = indexShard.getReplicationTracker().isPrimaryMode() && indexShard.state() != IndexShardState.CLOSED; + if (isReady == false) { + StringBuilder sb = new StringBuilder("Skipped syncing segments with"); + if (indexShard.getReplicationTracker() != null) { + sb.append(" primaryMode=").append(indexShard.getReplicationTracker().isPrimaryMode()); + } + if (indexShard.state() != null) { + sb.append(" indexShardState=").append(indexShard.state()); + } + if (indexShard.getEngineOrNull() != null) { + sb.append(" engineType=").append(indexShard.getEngine().getClass().getSimpleName()); + } + logger.trace(sb.toString()); + } + return isReady; + } + /** * Creates an {@link UploadListener} containing the stats population logic which would be triggered before and after segment upload events */ diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index 2dd9b1a545d4a..db85a37b556fc 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -161,6 +161,7 @@ public static void download(Repository repository, ShardId shardId, ThreadPool t remoteTranslogTransferTracker ); RemoteFsTranslog.download(translogTransferManager, location, logger); + logger.trace(remoteTranslogTransferTracker.toString()); } static void download(TranslogTransferManager translogTransferManager, Path location, Logger logger) throws IOException { @@ -173,15 +174,20 @@ static void download(TranslogTransferManager translogTransferManager, Path locat */ IOException ex = null; for (int i = 0; i <= DOWNLOAD_RETRIES; i++) { + boolean success = false; + long startTimeMs = System.currentTimeMillis(); try { downloadOnce(translogTransferManager, location, logger); + success = true; return; } catch (FileNotFoundException | NoSuchFileException e) { // continue till download retries ex = e; + } finally { + logger.trace("downloadOnce success={} timeElapsed={}", success, (System.currentTimeMillis() - startTimeMs)); } } - logger.debug("Exhausted all download retries during translog/checkpoint file download"); + logger.info("Exhausted all download retries during translog/checkpoint file download"); throw ex; } From 218a2ef7c6f670f99cc20d22f8866d7597718640 Mon Sep 17 00:00:00 2001 From: Gaurav Bafna <85113518+gbbafna@users.noreply.github.com> Date: Mon, 23 Oct 2023 14:46:26 +0530 Subject: [PATCH 23/45] [Remote Store] Add extra buffer before deleting older generations of translog (#10817) --------- Signed-off-by: Gaurav Bafna --- .../common/settings/IndexScopedSettings.java | 1 + .../org/opensearch/index/IndexSettings.java | 19 ++ .../index/translog/RemoteFsTranslog.java | 2 +- .../index/translog/RemoteFsTranslogTests.java | 181 +++++++++++++----- 4 files changed, 154 insertions(+), 49 deletions(-) diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java index 83bf8c82ee3dd..62e8faf33e1fa 100644 --- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java @@ -221,6 +221,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { // Settings for remote translog IndexSettings.INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, + IndexSettings.INDEX_REMOTE_TRANSLOG_KEEP_EXTRA_GEN_SETTING, // Settings for remote store enablement IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING, diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index 99d2b5a74c406..00e765d73f77f 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -668,6 +668,14 @@ public static IndexMergePolicy fromString(String text) { Property.IndexScope ); + public static final Setting INDEX_REMOTE_TRANSLOG_KEEP_EXTRA_GEN_SETTING = Setting.intSetting( + "index.remote_store.translog.keep_extra_gen", + 100, + 0, + Property.Dynamic, + Property.IndexScope + ); + private final Index index; private final Version version; private final Logger logger; @@ -680,6 +688,7 @@ public static IndexMergePolicy fromString(String text) { private final String remoteStoreTranslogRepository; private final String remoteStoreRepository; private final boolean isRemoteSnapshot; + private int remoteTranslogKeepExtraGen; private Version extendedCompatibilitySnapshotVersion; // volatile fields are updated via #updateIndexMetadata(IndexMetadata) under lock @@ -850,6 +859,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti remoteStoreTranslogRepository = settings.get(IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY); remoteTranslogUploadBufferInterval = INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.get(settings); remoteStoreRepository = settings.get(IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY); + this.remoteTranslogKeepExtraGen = INDEX_REMOTE_TRANSLOG_KEEP_EXTRA_GEN_SETTING.get(settings); isRemoteSnapshot = IndexModule.Type.REMOTE_SNAPSHOT.match(this.settings); if (isRemoteSnapshot && FeatureFlags.isEnabled(SEARCHABLE_SNAPSHOT_EXTENDED_COMPATIBILITY)) { @@ -1021,6 +1031,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, this::setRemoteTranslogUploadBufferInterval ); + scopedSettings.addSettingsUpdateConsumer(INDEX_REMOTE_TRANSLOG_KEEP_EXTRA_GEN_SETTING, this::setRemoteTranslogKeepExtraGen); } private void setSearchIdleAfter(TimeValue searchIdleAfter) { @@ -1300,6 +1311,10 @@ public TimeValue getRemoteTranslogUploadBufferInterval() { return remoteTranslogUploadBufferInterval; } + public int getRemoteTranslogExtraKeep() { + return remoteTranslogKeepExtraGen; + } + /** * Returns true iff the remote translog buffer interval setting exists or in other words is explicitly set. */ @@ -1311,6 +1326,10 @@ public void setRemoteTranslogUploadBufferInterval(TimeValue remoteTranslogUpload this.remoteTranslogUploadBufferInterval = remoteTranslogUploadBufferInterval; } + public void setRemoteTranslogKeepExtraGen(int extraGen) { + this.remoteTranslogKeepExtraGen = extraGen; + } + /** * Returns this interval in which the shards of this index are asynchronously refreshed. {@code -1} means async refresh is disabled. */ diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index db85a37b556fc..a305a774f5854 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -431,7 +431,7 @@ public void trimUnreferencedReaders() throws IOException { // cleans up remote translog files not referenced in latest uploaded metadata. // This enables us to restore translog from the metadata in case of failover or relocation. Set generationsToDelete = new HashSet<>(); - for (long generation = minRemoteGenReferenced - 1; generation >= 0; generation--) { + for (long generation = minRemoteGenReferenced - 1 - indexSettings().getRemoteTranslogExtraKeep(); generation >= 0; generation--) { if (fileTransferTracker.uploaded(Translog.getFilename(generation)) == false) { break; } diff --git a/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java index 42e0df2dc90c1..3cb65610fab58 100644 --- a/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java @@ -97,6 +97,7 @@ import java.util.zip.CheckedInputStream; import static org.opensearch.common.util.BigArrays.NON_RECYCLING_INSTANCE; +import static org.opensearch.index.IndexSettings.INDEX_REMOTE_TRANSLOG_KEEP_EXTRA_GEN_SETTING; import static org.opensearch.index.translog.RemoteFsTranslog.TRANSLOG; import static org.opensearch.index.translog.SnapshotMatchers.containsOperationsInAnyOrder; import static org.opensearch.index.translog.TranslogDeletionPolicies.createTranslogDeletionPolicy; @@ -124,6 +125,8 @@ public class RemoteFsTranslogTests extends OpenSearchTestCase { private ThreadPool threadPool; private final static String METADATA_DIR = "metadata"; private final static String DATA_DIR = "data"; + + AtomicInteger writeCalls = new AtomicInteger(); BlobStoreRepository repository; BlobStoreTransferService blobStoreTransferService; @@ -163,13 +166,13 @@ public void tearDown() throws Exception { private RemoteFsTranslog create(Path path) throws IOException { final String translogUUID = Translog.createEmptyTranslog(path, SequenceNumbers.NO_OPS_PERFORMED, shardId, primaryTerm.get()); - return create(path, createRepository(), translogUUID); + return create(path, createRepository(), translogUUID, 0); } - private RemoteFsTranslog create(Path path, BlobStoreRepository repository, String translogUUID) throws IOException { + private RemoteFsTranslog create(Path path, BlobStoreRepository repository, String translogUUID, int extraGenToKeep) throws IOException { this.repository = repository; globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); - final TranslogConfig translogConfig = getTranslogConfig(path); + final TranslogConfig translogConfig = getTranslogConfig(path, extraGenToKeep); final TranslogDeletionPolicy deletionPolicy = createTranslogDeletionPolicy(translogConfig.getIndexSettings()); threadPool = new TestThreadPool(getClass().getName()); blobStoreTransferService = new BlobStoreTransferService(repository.blobStore(), threadPool); @@ -185,10 +188,17 @@ private RemoteFsTranslog create(Path path, BlobStoreRepository repository, Strin primaryMode::get, new RemoteTranslogTransferTracker(shardId, 10) ); + } + private RemoteFsTranslog create(Path path, BlobStoreRepository repository, String translogUUID) throws IOException { + return create(path, repository, translogUUID, 0); } private TranslogConfig getTranslogConfig(final Path path) { + return getTranslogConfig(path, 0); + } + + private TranslogConfig getTranslogConfig(final Path path, int gensToKeep) { final Settings settings = Settings.builder() .put(IndexMetadata.SETTING_VERSION_CREATED, org.opensearch.Version.CURRENT) // only randomize between nog age retention and a long one, so failures will have a chance of reproducing @@ -196,6 +206,7 @@ private TranslogConfig getTranslogConfig(final Path path) { .put(IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.getKey(), randomIntBetween(-1, 2048) + "b") .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) + .put(INDEX_REMOTE_TRANSLOG_KEEP_EXTRA_GEN_SETTING.getKey(), gensToKeep) .build(); return getTranslogConfig(path, settings); } @@ -372,6 +383,111 @@ public void testSimpleOperations() throws IOException { } + private TranslogConfig getConfig(int gensToKeep) { + Path tempDir = createTempDir(); + final TranslogConfig temp = getTranslogConfig(tempDir, gensToKeep); + final TranslogConfig config = new TranslogConfig( + temp.getShardId(), + temp.getTranslogPath(), + temp.getIndexSettings(), + temp.getBigArrays(), + new ByteSizeValue(1, ByteSizeUnit.KB), + "" + ); + return config; + } + + private ChannelFactory getChannelFactory() { + writeCalls = new AtomicInteger(); + final ChannelFactory channelFactory = (file, openOption) -> { + FileChannel delegate = FileChannel.open(file, openOption); + boolean success = false; + try { + // don't do partial writes for checkpoints we rely on the fact that the bytes are written as an atomic operation + final boolean isCkpFile = file.getFileName().toString().endsWith(".ckp"); + + final FileChannel channel; + if (isCkpFile) { + channel = delegate; + } else { + channel = new FilterFileChannel(delegate) { + + @Override + public int write(ByteBuffer src) throws IOException { + writeCalls.incrementAndGet(); + return super.write(src); + } + }; + } + success = true; + return channel; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(delegate); + } + } + }; + return channelFactory; + } + + public void testExtraGenToKeep() throws Exception { + TranslogConfig config = getConfig(1); + ChannelFactory channelFactory = getChannelFactory(); + final Set persistedSeqNos = new HashSet<>(); + String translogUUID = Translog.createEmptyTranslog( + config.getTranslogPath(), + SequenceNumbers.NO_OPS_PERFORMED, + shardId, + channelFactory, + primaryTerm.get() + ); + TranslogDeletionPolicy deletionPolicy = createTranslogDeletionPolicy(config.getIndexSettings()); + ArrayList ops = new ArrayList<>(); + try ( + RemoteFsTranslog translog = new RemoteFsTranslog( + config, + translogUUID, + deletionPolicy, + () -> SequenceNumbers.NO_OPS_PERFORMED, + primaryTerm::get, + persistedSeqNos::add, + repository, + threadPool, + () -> Boolean.TRUE, + new RemoteTranslogTransferTracker(shardId, 10) + ) { + @Override + ChannelFactory getChannelFactory() { + return channelFactory; + } + } + ) { + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("1", 0, primaryTerm.get(), new byte[] { 1 })); + + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("2", 1, primaryTerm.get(), new byte[] { 1 })); + + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("3", 2, primaryTerm.get(), new byte[] { 1 })); + + // expose the new checkpoint (simulating a commit), before we trim the translog + translog.setMinSeqNoToKeep(2); + + // Trims from local + translog.trimUnreferencedReaders(); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("4", 3, primaryTerm.get(), new byte[] { 1 })); + + // Trims from remote now + translog.trimUnreferencedReaders(); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + assertEquals( + 6, + blobStoreTransferService.listAll(getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get()))).size() + ); + + } + } + public void testReadLocation() throws IOException { ArrayList ops = new ArrayList<>(); ArrayList locs = new ArrayList<>(); @@ -619,14 +735,22 @@ public void testSimpleOperationsUpload() throws Exception { // this should now trim as tlog-2 files from remote, but not tlog-3 and tlog-4 addToTranslogAndListAndUpload(translog, ops, new Translog.Index("2", 2, primaryTerm.get(), new byte[] { 1 })); assertEquals(2, translog.stats().estimatedNumberOfOperations()); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); translog.setMinSeqNoToKeep(2); - - assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + // this should now trim as tlog-2 files from remote, but not tlog-3 and tlog-4 translog.trimUnreferencedReaders(); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); assertEquals(1, translog.readers.size()); assertEquals(1, translog.stats().estimatedNumberOfOperations()); - assertBusy(() -> assertEquals(4, translog.allUploaded().size())); + assertBusy(() -> { + assertEquals(4, translog.allUploaded().size()); + assertEquals( + 4, + blobStoreTransferService.listAll(getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get()))).size() + ); + }); + } public void testMetadataFileDeletion() throws Exception { @@ -1273,49 +1397,10 @@ public void testTranslogWriter() throws IOException { } public void testTranslogWriterCanFlushInAddOrReadCall() throws IOException { - Path tempDir = createTempDir(); - final TranslogConfig temp = getTranslogConfig(tempDir); - final TranslogConfig config = new TranslogConfig( - temp.getShardId(), - temp.getTranslogPath(), - temp.getIndexSettings(), - temp.getBigArrays(), - new ByteSizeValue(1, ByteSizeUnit.KB), - "" - ); - + final TranslogConfig config = getConfig(1); final Set persistedSeqNos = new HashSet<>(); - final AtomicInteger writeCalls = new AtomicInteger(); - - final ChannelFactory channelFactory = (file, openOption) -> { - FileChannel delegate = FileChannel.open(file, openOption); - boolean success = false; - try { - // don't do partial writes for checkpoints we rely on the fact that the bytes are written as an atomic operation - final boolean isCkpFile = file.getFileName().toString().endsWith(".ckp"); - - final FileChannel channel; - if (isCkpFile) { - channel = delegate; - } else { - channel = new FilterFileChannel(delegate) { - - @Override - public int write(ByteBuffer src) throws IOException { - writeCalls.incrementAndGet(); - return super.write(src); - } - }; - } - success = true; - return channel; - } finally { - if (success == false) { - IOUtils.closeWhileHandlingException(delegate); - } - } - }; - + writeCalls = new AtomicInteger(); + final ChannelFactory channelFactory = getChannelFactory(); String translogUUID = Translog.createEmptyTranslog( config.getTranslogPath(), SequenceNumbers.NO_OPS_PERFORMED, From 8f13dee77a7e78833cf90b20607cb4d714032bd8 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar <50844303+rahulkarajgikar@users.noreply.github.com> Date: Mon, 23 Oct 2023 14:50:20 +0530 Subject: [PATCH 24/45] Make index and global metadata upload timeout dynamic cluster settings (#10814) * Make index and global metadata upload wait time dynamic Signed-off-by: Rahul Karajgikar --- CHANGELOG.md | 1 + .../common/settings/ClusterSettings.java | 2 + .../remote/RemoteClusterStateService.java | 47 +++++++++++++++++-- .../RemoteClusterStateServiceTests.java | 36 +++++++++++++- 4 files changed, 80 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 374dd4ab57ee6..9874e7c431b7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -94,6 +94,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote Store] Add repository stats for remote store([#10567](https://github.com/opensearch-project/OpenSearch/pull/10567)) - Add search query categorizer ([#10255](https://github.com/opensearch-project/OpenSearch/pull/10255)) - Introduce ConcurrentQueryProfiler to profile query using concurrent segment search path and support concurrency during rewrite and create weight ([10352](https://github.com/opensearch-project/OpenSearch/pull/10352)) +- [Remote cluster state] Make index and global metadata upload timeout dynamic cluster settings ([#10814](https://github.com/opensearch-project/OpenSearch/pull/10814)) ### Dependencies - Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.1 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208), [#10298](https://github.com/opensearch-project/OpenSearch/pull/10298)) diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 7ac7da819b215..a0fca4f0a2ff0 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -682,6 +682,8 @@ public void apply(Settings value, Settings current, Settings previous) { // Remote cluster state settings RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING, + RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, + RemoteClusterStateService.GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING, IndicesService.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING, diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index b505b3e2d4c0c..ae4a3fab9852d 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -83,9 +83,23 @@ public class RemoteClusterStateService implements Closeable { private static final Logger logger = LogManager.getLogger(RemoteClusterStateService.class); - // TODO make this two variable as dynamic setting [issue: #10688] - public static final int INDEX_METADATA_UPLOAD_WAIT_MILLIS = 20000; - public static final int GLOBAL_METADATA_UPLOAD_WAIT_MILLIS = 20000; + public static final TimeValue INDEX_METADATA_UPLOAD_TIMEOUT_DEFAULT = TimeValue.timeValueMillis(20000); + + public static final TimeValue GLOBAL_METADATA_UPLOAD_TIMEOUT_DEFAULT = TimeValue.timeValueMillis(20000); + + public static final Setting INDEX_METADATA_UPLOAD_TIMEOUT_SETTING = Setting.timeSetting( + "cluster.remote_store.state.index_metadata.upload_timeout", + INDEX_METADATA_UPLOAD_TIMEOUT_DEFAULT, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + public static final Setting GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING = Setting.timeSetting( + "cluster.remote_store.state.global_metadata.upload_timeout", + GLOBAL_METADATA_UPLOAD_TIMEOUT_DEFAULT, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); public static final ChecksumBlobStoreFormat INDEX_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( "index-metadata", @@ -141,6 +155,9 @@ public class RemoteClusterStateService implements Closeable { private BlobStoreTransferService blobStoreTransferService; private volatile TimeValue slowWriteLoggingThreshold; + private volatile TimeValue indexMetadataUploadTimeout; + private volatile TimeValue globalMetadataUploadTimeout; + private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); public static final int INDEX_METADATA_CURRENT_CODEC_VERSION = 1; @@ -171,7 +188,11 @@ public RemoteClusterStateService( this.relativeTimeNanosSupplier = relativeTimeNanosSupplier; this.threadpool = threadPool; this.slowWriteLoggingThreshold = clusterSettings.get(SLOW_WRITE_LOGGING_THRESHOLD); + this.indexMetadataUploadTimeout = clusterSettings.get(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING); + this.globalMetadataUploadTimeout = clusterSettings.get(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING); clusterSettings.addSettingsUpdateConsumer(SLOW_WRITE_LOGGING_THRESHOLD, this::setSlowWriteLoggingThreshold); + clusterSettings.addSettingsUpdateConsumer(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, this::setIndexMetadataUploadTimeout); + clusterSettings.addSettingsUpdateConsumer(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, this::setGlobalMetadataUploadTimeout); } private BlobStoreTransferService getBlobStoreTransferService() { @@ -367,7 +388,7 @@ private String writeGlobalMetadata(ClusterState clusterState) throws IOException ); try { - if (latch.await(GLOBAL_METADATA_UPLOAD_WAIT_MILLIS, TimeUnit.MILLISECONDS) == false) { + if (latch.await(getGlobalMetadataUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { // TODO: We should add metrics where transfer is timing out. [Issue: #10687] GlobalMetadataTransferException ex = new GlobalMetadataTransferException( String.format(Locale.ROOT, "Timed out waiting for transfer of global metadata to complete") @@ -422,7 +443,7 @@ private List writeIndexMetadataParallel(ClusterState clus } try { - if (latch.await(INDEX_METADATA_UPLOAD_WAIT_MILLIS, TimeUnit.MILLISECONDS) == false) { + if (latch.await(getIndexMetadataUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { IndexMetadataTransferException ex = new IndexMetadataTransferException( String.format( Locale.ROOT, @@ -621,6 +642,22 @@ private void setSlowWriteLoggingThreshold(TimeValue slowWriteLoggingThreshold) { this.slowWriteLoggingThreshold = slowWriteLoggingThreshold; } + private void setIndexMetadataUploadTimeout(TimeValue newIndexMetadataUploadTimeout) { + this.indexMetadataUploadTimeout = newIndexMetadataUploadTimeout; + } + + private void setGlobalMetadataUploadTimeout(TimeValue newGlobalMetadataUploadTimeout) { + this.globalMetadataUploadTimeout = newGlobalMetadataUploadTimeout; + } + + public TimeValue getIndexMetadataUploadTimeout() { + return this.indexMetadataUploadTimeout; + } + + public TimeValue getGlobalMetadataUploadTimeout() { + return this.globalMetadataUploadTimeout; + } + static String getManifestFileName(long term, long version, boolean committed) { // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest______C/P____ return String.join( diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 827735f6bcc0b..4be5fc03c2a6d 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -102,6 +102,7 @@ public class RemoteClusterStateServiceTests extends OpenSearchTestCase { private RemoteClusterStateService remoteClusterStateService; + private ClusterSettings clusterSettings; private Supplier repositoriesServiceSupplier; private RepositoriesService repositoriesService; private BlobStoreRepository blobStoreRepository; @@ -132,6 +133,7 @@ public void setup() { .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) .build(); + clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); NamedXContentRegistry xContentRegistry = new NamedXContentRegistry( Stream.of( NetworkModule.getNamedXContents().stream(), @@ -149,7 +151,7 @@ public void setup() { "test-node-id", repositoriesServiceSupplier, settings, - new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + clusterSettings, () -> 0L, threadPool ); @@ -1053,6 +1055,38 @@ public void testSingleConcurrentExecutionOfStaleManifestCleanup() throws Excepti assertBusy(() -> assertEquals(1, callCount.get())); } + public void testIndexMetadataUploadWaitTimeSetting() { + // verify default value + assertEquals( + RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_DEFAULT, + remoteClusterStateService.getIndexMetadataUploadTimeout() + ); + + // verify update index metadata upload timeout + int indexMetadataUploadTimeout = randomIntBetween(1, 10); + Settings newSettings = Settings.builder() + .put("cluster.remote_store.state.index_metadata.upload_timeout", indexMetadataUploadTimeout + "s") + .build(); + clusterSettings.applySettings(newSettings); + assertEquals(indexMetadataUploadTimeout, remoteClusterStateService.getIndexMetadataUploadTimeout().seconds()); + } + + public void testGlobalMetadataUploadWaitTimeSetting() { + // verify default value + assertEquals( + RemoteClusterStateService.GLOBAL_METADATA_UPLOAD_TIMEOUT_DEFAULT, + remoteClusterStateService.getGlobalMetadataUploadTimeout() + ); + + // verify update global metadata upload timeout + int globalMetadataUploadTimeout = randomIntBetween(1, 10); + Settings newSettings = Settings.builder() + .put("cluster.remote_store.state.global_metadata.upload_timeout", globalMetadataUploadTimeout + "s") + .build(); + clusterSettings.applySettings(newSettings); + assertEquals(globalMetadataUploadTimeout, remoteClusterStateService.getGlobalMetadataUploadTimeout().seconds()); + } + private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false); } From d84a00bbcadd41a4631eed3a96691645322ee5c0 Mon Sep 17 00:00:00 2001 From: Sachin Kale Date: Mon, 23 Oct 2023 16:59:55 +0530 Subject: [PATCH 25/45] Revert "Validate checksum of each segment file post download from remote store (#10119)" (#10847) This reverts commit 6c022612769e6bc7ed18b9ec0888d6cd0dd415cc. Signed-off-by: Sachin Kale Co-authored-by: Sachin Kale --- .../remotestore/RemoteStoreRestoreIT.java | 33 +------------ .../store/RemoteSegmentStoreDirectory.java | 4 -- .../org/opensearch/index/store/Store.java | 47 +------------------ 3 files changed, 3 insertions(+), 81 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java index 212f797180077..7626e3dba6424 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java @@ -10,11 +10,8 @@ import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreResponse; -import org.opensearch.action.admin.indices.get.GetIndexRequest; -import org.opensearch.action.admin.indices.get.GetIndexResponse; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.health.ClusterHealthStatus; -import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.Settings; @@ -22,12 +19,10 @@ import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; -import org.opensearch.test.CorruptionUtils; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; import java.util.Locale; @@ -35,14 +30,13 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; -import java.util.stream.Stream; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; import static org.hamcrest.Matchers.greaterThan; -@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE, numDataNodes = 0) public class RemoteStoreRestoreIT extends BaseRemoteStoreRestoreIT { /** @@ -467,30 +461,5 @@ public void testRateLimitedRemoteDownloads() throws Exception { } } - public void testRestoreCorruptSegmentShouldFail() throws IOException, ExecutionException, InterruptedException { - prepareCluster(1, 3, INDEX_NAME, 0, 1); - indexData(randomIntBetween(3, 4), true, INDEX_NAME); - - GetIndexResponse getIndexResponse = client().admin().indices().getIndex(new GetIndexRequest()).get(); - String indexUUID = getIndexResponse.getSettings().get(INDEX_NAME).get(IndexMetadata.SETTING_INDEX_UUID); - - logger.info("--> Corrupting segment files in remote segment store"); - Path path = segmentRepoPath.resolve(indexUUID).resolve("0").resolve("segments").resolve("data"); - try (Stream dataPath = Files.list(path)) { - CorruptionUtils.corruptFile(random(), dataPath.toArray(Path[]::new)); - } - - logger.info("--> Stop primary"); - internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNodeName(INDEX_NAME))); - - logger.info("--> Close and restore the index"); - client().admin() - .cluster() - .restoreRemoteStore(new RestoreRemoteStoreRequest().indices(INDEX_NAME).waitForCompletion(true), PlainActionFuture.newFuture()); - - logger.info("--> Check for index status, should be red due to corruption"); - ensureRed(INDEX_NAME); - } - // TODO: Restore flow - index aliases } diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index 6b43fed3d8930..be1f2341236ab 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -290,10 +290,6 @@ public void setWrittenByMajor(int writtenByMajor) { ); } } - - public int getWrittenByMajor() { - return writtenByMajor; - } } /** diff --git a/server/src/main/java/org/opensearch/index/store/Store.java b/server/src/main/java/org/opensearch/index/store/Store.java index d0cd2635ba672..b822742de6e97 100644 --- a/server/src/main/java/org/opensearch/index/store/Store.java +++ b/server/src/main/java/org/opensearch/index/store/Store.java @@ -105,7 +105,6 @@ import java.io.UncheckedIOException; import java.nio.file.NoSuchFileException; import java.nio.file.Path; -import java.text.ParseException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -121,7 +120,6 @@ import java.util.zip.CRC32; import java.util.zip.Checksum; -import static java.lang.Character.MAX_RADIX; import static java.util.Collections.emptyMap; import static java.util.Collections.unmodifiableMap; import static org.opensearch.index.seqno.SequenceNumbers.LOCAL_CHECKPOINT_KEY; @@ -977,11 +975,7 @@ public void copyFrom(Directory from, String src, String dest, IOContext context) boolean success = false; long startTime = System.currentTimeMillis(); try { - if (from instanceof RemoteSegmentStoreDirectory) { - copyFileAndValidateChecksum(from, src, dest, context, fileSize); - } else { - super.copyFrom(from, src, dest, context); - } + super.copyFrom(from, src, dest, context); success = true; afterDownload(fileSize, startTime); } finally { @@ -991,43 +985,6 @@ public void copyFrom(Directory from, String src, String dest, IOContext context) } } - private void copyFileAndValidateChecksum(Directory from, String src, String dest, IOContext context, long fileSize) - throws IOException { - RemoteSegmentStoreDirectory.UploadedSegmentMetadata metadata = ((RemoteSegmentStoreDirectory) from) - .getSegmentsUploadedToRemoteStore() - .get(dest); - boolean success = false; - try (IndexInput is = from.openInput(src, context); IndexOutput os = createOutput(dest, context)) { - // Here, we don't need the exact version as LuceneVerifyingIndexOutput does not verify version - // It is just used to emit logs when the entire metadata object is provided as parameter. Also, - // we can't provide null version as StoreFileMetadata has non-null check on writtenBy field. - Version luceneMajorVersion = Version.parse(metadata.getWrittenByMajor() + ".0.0"); - long checksum = Long.parseLong(metadata.getChecksum()); - StoreFileMetadata storeFileMetadata = new StoreFileMetadata( - dest, - fileSize, - Long.toString(checksum, MAX_RADIX), - luceneMajorVersion - ); - VerifyingIndexOutput verifyingIndexOutput = new LuceneVerifyingIndexOutput(storeFileMetadata, os); - verifyingIndexOutput.copyBytes(is, is.length()); - verifyingIndexOutput.verify(); - success = true; - } catch (ParseException e) { - throw new IOException("Exception while reading version info for segment file from remote store: " + dest, e); - } finally { - if (success == false) { - // If the exception is thrown after file is created, we clean up the file. - // We ignore the exception as the deletion is best-effort basis and can fail if file does not exist. - try { - deleteFile("Quietly deleting", dest); - } catch (Exception e) { - // Ignore - } - } - } - } - /** * Updates the amount of bytes attempted for download */ @@ -1519,7 +1476,7 @@ public static boolean isAutogenerated(String name) { * Produces a string representation of the given digest value. */ public static String digestToString(long digest) { - return Long.toString(digest, MAX_RADIX); + return Long.toString(digest, Character.MAX_RADIX); } /** From 08ffd3586d381fe3526965e8a4570226a9704407 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Oct 2023 11:21:44 -0400 Subject: [PATCH 26/45] Bump org.apache.logging.log4j:log4j-core from 2.20.0 to 2.21.0 (#10858) * Bump org.apache.logging.log4j:log4j-core Bumps org.apache.logging.log4j:log4j-core from 2.20.0 to 2.21.0. --- updated-dependencies: - dependency-name: org.apache.logging.log4j:log4j-core dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] * Bumps org.apache.logging.log4j:log4j-core from 2.20.0 to 2.21.0 Signed-off-by: Andriy Redko --------- Signed-off-by: dependabot[bot] Signed-off-by: Andriy Redko Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] Co-authored-by: Andriy Redko --- CHANGELOG.md | 3 ++- .../src/testKit/thirdPartyAudit/sample_jars/build.gradle | 3 ++- buildSrc/version.properties | 2 +- libs/core/licenses/log4j-api-2.20.0.jar.sha1 | 1 - libs/core/licenses/log4j-api-2.21.0.jar.sha1 | 1 + plugins/crypto-kms/licenses/log4j-1.2-api-2.20.0.jar.sha1 | 1 - plugins/crypto-kms/licenses/log4j-1.2-api-2.21.0.jar.sha1 | 1 + .../licenses/log4j-1.2-api-2.20.0.jar.sha1 | 1 - .../licenses/log4j-1.2-api-2.21.0.jar.sha1 | 1 + plugins/discovery-ec2/licenses/log4j-1.2-api-2.20.0.jar.sha1 | 1 - plugins/discovery-ec2/licenses/log4j-1.2-api-2.21.0.jar.sha1 | 1 + plugins/discovery-gce/licenses/log4j-1.2-api-2.20.0.jar.sha1 | 1 - plugins/discovery-gce/licenses/log4j-1.2-api-2.21.0.jar.sha1 | 1 + .../repository-gcs/licenses/log4j-1.2-api-2.20.0.jar.sha1 | 1 - .../repository-gcs/licenses/log4j-1.2-api-2.21.0.jar.sha1 | 1 + .../licenses/log4j-slf4j-impl-2.20.0.jar.sha1 | 1 - .../licenses/log4j-slf4j-impl-2.21.0.jar.sha1 | 1 + plugins/repository-s3/licenses/log4j-1.2-api-2.20.0.jar.sha1 | 1 - plugins/repository-s3/licenses/log4j-1.2-api-2.21.0.jar.sha1 | 1 + qa/os/build.gradle | 5 +++++ server/build.gradle | 4 +++- server/licenses/log4j-api-2.20.0.jar.sha1 | 1 - server/licenses/log4j-api-2.21.0.jar.sha1 | 1 + server/licenses/log4j-core-2.20.0.jar.sha1 | 1 - server/licenses/log4j-core-2.21.0.jar.sha1 | 1 + server/licenses/log4j-jul-2.20.0.jar.sha1 | 1 - server/licenses/log4j-jul-2.21.0.jar.sha1 | 1 + 27 files changed, 24 insertions(+), 15 deletions(-) delete mode 100644 libs/core/licenses/log4j-api-2.20.0.jar.sha1 create mode 100644 libs/core/licenses/log4j-api-2.21.0.jar.sha1 delete mode 100644 plugins/crypto-kms/licenses/log4j-1.2-api-2.20.0.jar.sha1 create mode 100644 plugins/crypto-kms/licenses/log4j-1.2-api-2.21.0.jar.sha1 delete mode 100644 plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.20.0.jar.sha1 create mode 100644 plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.21.0.jar.sha1 delete mode 100644 plugins/discovery-ec2/licenses/log4j-1.2-api-2.20.0.jar.sha1 create mode 100644 plugins/discovery-ec2/licenses/log4j-1.2-api-2.21.0.jar.sha1 delete mode 100644 plugins/discovery-gce/licenses/log4j-1.2-api-2.20.0.jar.sha1 create mode 100644 plugins/discovery-gce/licenses/log4j-1.2-api-2.21.0.jar.sha1 delete mode 100644 plugins/repository-gcs/licenses/log4j-1.2-api-2.20.0.jar.sha1 create mode 100644 plugins/repository-gcs/licenses/log4j-1.2-api-2.21.0.jar.sha1 delete mode 100644 plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.20.0.jar.sha1 create mode 100644 plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.21.0.jar.sha1 delete mode 100644 plugins/repository-s3/licenses/log4j-1.2-api-2.20.0.jar.sha1 create mode 100644 plugins/repository-s3/licenses/log4j-1.2-api-2.21.0.jar.sha1 delete mode 100644 server/licenses/log4j-api-2.20.0.jar.sha1 create mode 100644 server/licenses/log4j-api-2.21.0.jar.sha1 delete mode 100644 server/licenses/log4j-core-2.20.0.jar.sha1 create mode 100644 server/licenses/log4j-core-2.21.0.jar.sha1 delete mode 100644 server/licenses/log4j-jul-2.20.0.jar.sha1 create mode 100644 server/licenses/log4j-jul-2.21.0.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 9874e7c431b7c..8c4563a216974 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -107,6 +107,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.codehaus.woodstox:stax2-api` from 4.2.1 to 4.2.2 ([#10639](https://github.com/opensearch-project/OpenSearch/pull/10639)) - Bump `com.google.http-client:google-http-client` from 1.43.2 to 1.43.3 ([#10635](https://github.com/opensearch-project/OpenSearch/pull/10635)) - Bump `com.squareup.okio:okio` from 3.5.0 to 3.6.0 ([#10637](https://github.com/opensearch-project/OpenSearch/pull/10637)) +- Bump `org.apache.logging.log4j:log4j-core` from 2.20.0 to 2.21.0 ([#10858](https://github.com/opensearch-project/OpenSearch/pull/10858)) ### Changed - Mute the query profile IT with concurrent execution ([#9840](https://github.com/opensearch-project/OpenSearch/pull/9840)) @@ -130,4 +131,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Security [Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.x...HEAD -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.12...2.x +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.12...2.x \ No newline at end of file diff --git a/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle b/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle index cb8050d1718c4..74c88e0961c9c 100644 --- a/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle +++ b/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle @@ -15,8 +15,9 @@ plugins { repositories { mavenCentral() } + dependencies { - implementation "org.apache.logging.log4j:log4j-core:2.20.0" + implementation "org.apache.logging.log4j:log4j-core:2.21.0" } ["0.0.1", "0.0.2"].forEach { v -> diff --git a/buildSrc/version.properties b/buildSrc/version.properties index a5171aa582a86..96d398c35851d 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -14,7 +14,7 @@ jackson_databind = 2.15.2 snakeyaml = 2.1 icu4j = 70.1 supercsv = 2.4.0 -log4j = 2.20.0 +log4j = 2.21.0 slf4j = 1.7.36 asm = 9.6 jettison = 1.5.4 diff --git a/libs/core/licenses/log4j-api-2.20.0.jar.sha1 b/libs/core/licenses/log4j-api-2.20.0.jar.sha1 deleted file mode 100644 index 37154d9861ac0..0000000000000 --- a/libs/core/licenses/log4j-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1fe6082e660daf07c689a89c94dc0f49c26b44bb \ No newline at end of file diff --git a/libs/core/licenses/log4j-api-2.21.0.jar.sha1 b/libs/core/licenses/log4j-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..51446052594aa --- /dev/null +++ b/libs/core/licenses/log4j-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +760192f2b69eacf4a4afc78e5a1d7a8de054fcbd \ No newline at end of file diff --git a/plugins/crypto-kms/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/crypto-kms/licenses/log4j-1.2-api-2.20.0.jar.sha1 deleted file mode 100644 index 9829576d38ce0..0000000000000 --- a/plugins/crypto-kms/licenses/log4j-1.2-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/crypto-kms/licenses/log4j-1.2-api-2.21.0.jar.sha1 b/plugins/crypto-kms/licenses/log4j-1.2-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..39d9177cb2fac --- /dev/null +++ b/plugins/crypto-kms/licenses/log4j-1.2-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +12bad3819a9570807f3c97315930699584c12152 \ No newline at end of file diff --git a/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.20.0.jar.sha1 deleted file mode 100644 index 9829576d38ce0..0000000000000 --- a/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.21.0.jar.sha1 b/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..39d9177cb2fac --- /dev/null +++ b/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +12bad3819a9570807f3c97315930699584c12152 \ No newline at end of file diff --git a/plugins/discovery-ec2/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/discovery-ec2/licenses/log4j-1.2-api-2.20.0.jar.sha1 deleted file mode 100644 index 9829576d38ce0..0000000000000 --- a/plugins/discovery-ec2/licenses/log4j-1.2-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/discovery-ec2/licenses/log4j-1.2-api-2.21.0.jar.sha1 b/plugins/discovery-ec2/licenses/log4j-1.2-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..39d9177cb2fac --- /dev/null +++ b/plugins/discovery-ec2/licenses/log4j-1.2-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +12bad3819a9570807f3c97315930699584c12152 \ No newline at end of file diff --git a/plugins/discovery-gce/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/discovery-gce/licenses/log4j-1.2-api-2.20.0.jar.sha1 deleted file mode 100644 index 9829576d38ce0..0000000000000 --- a/plugins/discovery-gce/licenses/log4j-1.2-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/discovery-gce/licenses/log4j-1.2-api-2.21.0.jar.sha1 b/plugins/discovery-gce/licenses/log4j-1.2-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..39d9177cb2fac --- /dev/null +++ b/plugins/discovery-gce/licenses/log4j-1.2-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +12bad3819a9570807f3c97315930699584c12152 \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/repository-gcs/licenses/log4j-1.2-api-2.20.0.jar.sha1 deleted file mode 100644 index 9829576d38ce0..0000000000000 --- a/plugins/repository-gcs/licenses/log4j-1.2-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/log4j-1.2-api-2.21.0.jar.sha1 b/plugins/repository-gcs/licenses/log4j-1.2-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..39d9177cb2fac --- /dev/null +++ b/plugins/repository-gcs/licenses/log4j-1.2-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +12bad3819a9570807f3c97315930699584c12152 \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.20.0.jar.sha1 b/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.20.0.jar.sha1 deleted file mode 100644 index 800a4aa87ba0e..0000000000000 --- a/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -7ab4f082fd162f60afcaf2b8744a3d959feab3e8 \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.21.0.jar.sha1 b/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..0e22f98daa61c --- /dev/null +++ b/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.21.0.jar.sha1 @@ -0,0 +1 @@ +911fdb5b1a1df36719c579ecc6f2957b88bce1ab \ No newline at end of file diff --git a/plugins/repository-s3/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/repository-s3/licenses/log4j-1.2-api-2.20.0.jar.sha1 deleted file mode 100644 index 9829576d38ce0..0000000000000 --- a/plugins/repository-s3/licenses/log4j-1.2-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/repository-s3/licenses/log4j-1.2-api-2.21.0.jar.sha1 b/plugins/repository-s3/licenses/log4j-1.2-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..39d9177cb2fac --- /dev/null +++ b/plugins/repository-s3/licenses/log4j-1.2-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +12bad3819a9570807f3c97315930699584c12152 \ No newline at end of file diff --git a/qa/os/build.gradle b/qa/os/build.gradle index 66c6525439dac..082ed5277575a 100644 --- a/qa/os/build.gradle +++ b/qa/os/build.gradle @@ -70,6 +70,11 @@ tasks.dependenciesInfo.enabled = false tasks.thirdPartyAudit.ignoreMissingClasses() +tasks.thirdPartyAudit.ignoreViolations( + 'org.apache.logging.log4j.core.util.internal.UnsafeUtil', + 'org.apache.logging.log4j.core.util.internal.UnsafeUtil$1' +) + tasks.register('destructivePackagingTest') { dependsOn 'destructiveDistroTest' } diff --git a/server/build.gradle b/server/build.gradle index f6db3d53a0dcc..c56f9d5aa288f 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -364,7 +364,9 @@ tasks.named("thirdPartyAudit").configure { 'com.google.protobuf.UnsafeUtil$Android32MemoryAccessor', 'com.google.protobuf.UnsafeUtil$Android64MemoryAccessor', 'com.google.protobuf.UnsafeUtil$JvmMemoryAccessor', - 'com.google.protobuf.UnsafeUtil$MemoryAccessor' + 'com.google.protobuf.UnsafeUtil$MemoryAccessor', + 'org.apache.logging.log4j.core.util.internal.UnsafeUtil', + 'org.apache.logging.log4j.core.util.internal.UnsafeUtil$1' ) } diff --git a/server/licenses/log4j-api-2.20.0.jar.sha1 b/server/licenses/log4j-api-2.20.0.jar.sha1 deleted file mode 100644 index 37154d9861ac0..0000000000000 --- a/server/licenses/log4j-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1fe6082e660daf07c689a89c94dc0f49c26b44bb \ No newline at end of file diff --git a/server/licenses/log4j-api-2.21.0.jar.sha1 b/server/licenses/log4j-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..51446052594aa --- /dev/null +++ b/server/licenses/log4j-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +760192f2b69eacf4a4afc78e5a1d7a8de054fcbd \ No newline at end of file diff --git a/server/licenses/log4j-core-2.20.0.jar.sha1 b/server/licenses/log4j-core-2.20.0.jar.sha1 deleted file mode 100644 index 49c972626563b..0000000000000 --- a/server/licenses/log4j-core-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -eb2a9a47b1396e00b5eee1264296729a70565cc0 \ No newline at end of file diff --git a/server/licenses/log4j-core-2.21.0.jar.sha1 b/server/licenses/log4j-core-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..c88e6f7a25ca9 --- /dev/null +++ b/server/licenses/log4j-core-2.21.0.jar.sha1 @@ -0,0 +1 @@ +122e1a9e0603cc9eae07b0846a6ff01f2454bc49 \ No newline at end of file diff --git a/server/licenses/log4j-jul-2.20.0.jar.sha1 b/server/licenses/log4j-jul-2.20.0.jar.sha1 deleted file mode 100644 index a456651e4569e..0000000000000 --- a/server/licenses/log4j-jul-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8170e6118eac1ab332046c179718a0f107f688e1 \ No newline at end of file diff --git a/server/licenses/log4j-jul-2.21.0.jar.sha1 b/server/licenses/log4j-jul-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..480010840abca --- /dev/null +++ b/server/licenses/log4j-jul-2.21.0.jar.sha1 @@ -0,0 +1 @@ +f0da61113f4a47654677e6a98b1e13ca7de2483d \ No newline at end of file From da851244a273d6fbfc42eb115f2dcfca1c3cc728 Mon Sep 17 00:00:00 2001 From: David Z <38449481+dzane17@users.noreply.github.com> Date: Mon, 23 Oct 2023 11:01:47 -0700 Subject: [PATCH 27/45] Update per request latency check to V_2_12_0 (#10865) Signed-off-by: David Zane --- .../main/java/org/opensearch/action/search/SearchRequest.java | 4 ++-- .../java/org/opensearch/action/search/SearchResponse.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequest.java b/server/src/main/java/org/opensearch/action/search/SearchRequest.java index 9e50213eab5f9..fb026dae630b7 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequest.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequest.java @@ -256,7 +256,7 @@ public SearchRequest(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_7_0)) { pipeline = in.readOptionalString(); } - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { phaseTook = in.readOptionalBoolean(); } } @@ -290,7 +290,7 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_7_0)) { out.writeOptionalString(pipeline); } - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalBoolean(phaseTook); } } diff --git a/server/src/main/java/org/opensearch/action/search/SearchResponse.java b/server/src/main/java/org/opensearch/action/search/SearchResponse.java index 91f0dc0737637..96d07982d03db 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchResponse.java +++ b/server/src/main/java/org/opensearch/action/search/SearchResponse.java @@ -116,7 +116,7 @@ public SearchResponse(StreamInput in) throws IOException { clusters = new Clusters(in); scrollId = in.readOptionalString(); tookInMillis = in.readVLong(); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { phaseTook = in.readOptionalWriteable(PhaseTook::new); } else { phaseTook = null; @@ -557,7 +557,7 @@ public void writeTo(StreamOutput out) throws IOException { clusters.writeTo(out); out.writeOptionalString(scrollId); out.writeVLong(tookInMillis); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(phaseTook); } out.writeVInt(skippedShards); From 8b2173910f754a48773b3283e1a511cbc1a9db78 Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Mon, 23 Oct 2023 16:08:04 -0700 Subject: [PATCH 28/45] =?UTF-8?q?Add=20cluster=20setting=20cluster.restric?= =?UTF-8?q?t.index.replication=5Ftype=20t=E2=80=A6=20(#10866)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add cluster setting CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING to restrict setting of index setting replication type Signed-off-by: Poojita Raj * Add Changelog entry Signed-off-by: Poojita Raj * refactoring Signed-off-by: Poojita Raj --------- Signed-off-by: Poojita Raj --- CHANGELOG.md | 3 ++- .../SegmentReplicationClusterSettingIT.java | 27 +++++++++++++++++++ .../metadata/MetadataCreateIndexService.java | 19 +++++++++++++ .../common/settings/ClusterSettings.java | 3 ++- .../opensearch/indices/IndicesService.java | 11 ++++++++ .../MetadataCreateIndexServiceTests.java | 16 +++++++++-- 6 files changed, 75 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c4563a216974..0b0d9720ad208 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -95,6 +95,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add search query categorizer ([#10255](https://github.com/opensearch-project/OpenSearch/pull/10255)) - Introduce ConcurrentQueryProfiler to profile query using concurrent segment search path and support concurrency during rewrite and create weight ([10352](https://github.com/opensearch-project/OpenSearch/pull/10352)) - [Remote cluster state] Make index and global metadata upload timeout dynamic cluster settings ([#10814](https://github.com/opensearch-project/OpenSearch/pull/10814)) +- Added cluster setting cluster.restrict.index.replication_type to restrict setting of index setting replication type ([#10866](https://github.com/opensearch-project/OpenSearch/pull/10866)) ### Dependencies - Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.1 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208), [#10298](https://github.com/opensearch-project/OpenSearch/pull/10298)) @@ -131,4 +132,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Security [Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.x...HEAD -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.12...2.x \ No newline at end of file +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.12...2.x diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java index a82fd8d845709..186a5ce39f131 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java @@ -19,6 +19,7 @@ import org.opensearch.test.OpenSearchIntegTestCase; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +import static org.opensearch.indices.IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING; import static org.opensearch.indices.IndicesService.CLUSTER_SETTING_REPLICATION_TYPE; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) @@ -123,4 +124,30 @@ public void testIndexReplicationSettingOverridesDocRepClusterSetting() throws Ex assertEquals(indicesService.indexService(anotherIndex).getIndexSettings().isSegRepEnabled(), false); } + public void testIndexReplicationTypeWhenRestrictSettingTrue() { + testRestrictIndexReplicationTypeSetting(true, randomFrom(ReplicationType.values())); + } + + public void testIndexReplicationTypeWhenRestrictSettingFalse() { + testRestrictIndexReplicationTypeSetting(false, randomFrom(ReplicationType.values())); + } + + private void testRestrictIndexReplicationTypeSetting(boolean setRestrict, ReplicationType replicationType) { + String expectedExceptionMsg = + "Validation Failed: 1: index setting [index.replication.type] is not allowed to be set as [cluster.restrict.index.replication_type=true];"; + String clusterManagerName = internalCluster().startNode( + Settings.builder().put(CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING.getKey(), setRestrict).build() + ); + internalCluster().startDataOnlyNodes(1); + + // Test create index fails + Settings indexSettings = Settings.builder().put(indexSettings()).put(SETTING_REPLICATION_TYPE, replicationType).build(); + if (setRestrict) { + IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> createIndex(INDEX_NAME, indexSettings)); + assertEquals(expectedExceptionMsg, exception.getMessage()); + } else { + createIndex(INDEX_NAME, indexSettings); + } + } + } diff --git a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java index 8d76a39712ee3..78a22fe11f072 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java @@ -1252,6 +1252,7 @@ List getIndexSettingsValidationErrors( if (forbidPrivateIndexSettings) { validationErrors.addAll(validatePrivateSettingsNotExplicitlySet(settings, indexScopedSettings)); } + validateIndexReplicationTypeSettings(settings, clusterService.getClusterSettings()).ifPresent(validationErrors::add); if (indexName.isEmpty() || indexName.get().charAt(0) != '.') { // Apply aware replica balance validation only to non system indices int replicaCount = settings.getAsInt( @@ -1306,6 +1307,24 @@ private static List validateIndexCustomPath(Settings settings, @Nullable return validationErrors; } + /** + * Validates {@code index.replication.type} is not set if {@code cluster.restrict.index.replication_type} is set to true. + * + * @param requestSettings settings passed in during index create request + * @param clusterSettings cluster setting + */ + private static Optional validateIndexReplicationTypeSettings(Settings requestSettings, ClusterSettings clusterSettings) { + if (requestSettings.hasValue(SETTING_REPLICATION_TYPE) + && clusterSettings.get(IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING)) { + return Optional.of( + "index setting [index.replication.type] is not allowed to be set as [" + + IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING.getKey() + + "=true]" + ); + } + return Optional.empty(); + } + /** * Validates the settings and mappings for shrinking an index. * diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index a0fca4f0a2ff0..c2c6effc3336f 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -690,7 +690,8 @@ public void apply(Settings value, Settings current, Settings previous) { AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT, - CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT + CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT, + IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING ) ) ); diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 50c551c2be29b..36abc77893d81 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -299,6 +299,17 @@ public class IndicesService extends AbstractLifecycleComponent Property.Final ); + /** + * This setting is used to restrict creation of index where the 'index.replication.type' index setting is set. + * If disabled, the replication type can be specified. + */ + public static final Setting CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING = Setting.boolSetting( + "cluster.restrict.index.replication_type", + false, + Property.NodeScope, + Property.Final + ); + /** * The node's settings. */ diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java index e40826915c848..cace66d8c6d9e 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java @@ -139,6 +139,7 @@ import static org.opensearch.indices.IndicesService.CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING; import static org.opensearch.indices.IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING; import static org.opensearch.indices.IndicesService.CLUSTER_REPLICATION_TYPE_SETTING; +import static org.opensearch.indices.IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING; import static org.opensearch.indices.ShardLimitValidatorTests.createTestShardLimitService; import static org.opensearch.node.Node.NODE_ATTRIBUTES; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; @@ -1177,6 +1178,8 @@ public void testvalidateIndexSettings() { .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.getKey() + "zone.values", "a, b") .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.getKey() + "rack.values", "c, d, e") .put(AwarenessReplicaBalance.CLUSTER_ROUTING_ALLOCATION_AWARENESS_BALANCE_SETTING.getKey(), true) + .put(CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING.getKey(), true) + .put(SETTING_REPLICATION_TYPE, randomFrom(ReplicationType.values())) .build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); when(clusterService.getSettings()).thenReturn(settings); @@ -1200,8 +1203,12 @@ public void testvalidateIndexSettings() { ); List validationErrors = checkerService.getIndexSettingsValidationErrors(settings, false, Optional.empty()); - assertThat(validationErrors.size(), is(1)); - assertThat(validationErrors.get(0), is("expected total copies needs to be a multiple of total awareness attributes [3]")); + assertThat(validationErrors.size(), is(2)); + assertThat( + validationErrors.get(0), + is("index setting [index.replication.type] is not allowed to be set as [cluster.restrict.index.replication_type=true]") + ); + assertThat(validationErrors.get(1), is("expected total copies needs to be a multiple of total awareness attributes [3]")); settings = Settings.builder() .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey(), "zone, rack") @@ -1209,8 +1216,13 @@ public void testvalidateIndexSettings() { .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.getKey() + "rack.values", "c, d, e") .put(AwarenessReplicaBalance.CLUSTER_ROUTING_ALLOCATION_AWARENESS_BALANCE_SETTING.getKey(), true) .put(SETTING_NUMBER_OF_REPLICAS, 2) + .put(CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING.getKey(), false) + .put(SETTING_REPLICATION_TYPE, randomFrom(ReplicationType.values())) .build(); + clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + when(clusterService.getClusterSettings()).thenReturn(clusterSettings); + validationErrors = checkerService.getIndexSettingsValidationErrors(settings, false, Optional.empty()); assertThat(validationErrors.size(), is(0)); From 5a288ed3ecf06a048eba6028c445aa4e642d39c5 Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Tue, 24 Oct 2023 09:02:48 +0530 Subject: [PATCH 29/45] Changing version check to 2.12 for remote cluster state (#10844) Signed-off-by: Dhwanil Patel --- .../opensearch/gateway/remote/ClusterMetadataManifest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java index 97b37d9532f85..4725f40076ce2 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java @@ -262,7 +262,7 @@ public ClusterMetadataManifest(StreamInput in) throws IOException { this.indices = Collections.unmodifiableList(in.readList(UploadedIndexMetadata::new)); this.previousClusterUUID = in.readString(); this.clusterUUIDCommitted = in.readBoolean(); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { this.codecVersion = in.readInt(); this.globalMetadataFileName = in.readString(); } else { @@ -316,7 +316,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeCollection(indices); out.writeString(previousClusterUUID); out.writeBoolean(clusterUUIDCommitted); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeInt(codecVersion); out.writeString(globalMetadataFileName); } From a2b5f0e3a9515f101653afc88dd98e48015f6346 Mon Sep 17 00:00:00 2001 From: Shivansh Arora <31575408+shiv0408@users.noreply.github.com> Date: Tue, 24 Oct 2023 13:58:32 +0530 Subject: [PATCH 30/45] Created new urgent priority threadpool for remote cluster state uploads (#10685) * Created new Urgent WritePriority and a new threadpools and S3Client for this priority which is being used to upload cluster state Signed-off-by: Shivansh Arora --- .../s3/S3BlobStoreRepositoryTests.java | 2 +- .../s3/AmazonAsyncS3Reference.java | 1 + .../s3/AmazonAsyncS3WithCredentials.java | 10 ++- .../repositories/s3/S3AsyncService.java | 17 +++- .../repositories/s3/S3BlobContainer.java | 11 ++- .../repositories/s3/S3BlobStore.java | 5 +- .../repositories/s3/S3Repository.java | 6 ++ .../repositories/s3/S3RepositoryPlugin.java | 22 ++++- .../s3/async/AsyncPartsHandler.java | 15 +++- .../s3/async/AsyncTransferManager.java | 21 ++++- .../s3/RepositoryCredentialsTests.java | 2 +- .../repositories/s3/S3AsyncServiceTests.java | 8 +- .../s3/S3BlobContainerMockClientTests.java | 5 +- .../s3/S3BlobContainerRetriesTests.java | 2 + .../s3/S3BlobStoreContainerTests.java | 8 +- .../repositories/s3/S3RepositoryTests.java | 1 + .../s3/async/AsyncTransferManagerTests.java | 1 + .../blobstore/stream/write/WritePriority.java | 3 +- .../remote/RemoteClusterStateService.java | 4 +- .../blobstore/ChecksumBlobStoreFormat.java | 45 ++++++++-- .../RemoteClusterStateServiceTests.java | 2 +- .../snapshots/BlobStoreFormatTests.java | 87 +++++++++++++++---- 22 files changed, 224 insertions(+), 54 deletions(-) diff --git a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java index 4df30bfd2169e..da2c6e8c1b0ee 100644 --- a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java +++ b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java @@ -249,7 +249,7 @@ protected S3Repository createRepository( ClusterService clusterService, RecoverySettings recoverySettings ) { - return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, false) { + return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, null, false) { @Override public BlobStore blobStore() { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3Reference.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3Reference.java index 0b5fcb6df280e..45170ea1ad209 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3Reference.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3Reference.java @@ -29,6 +29,7 @@ public class AmazonAsyncS3Reference extends RefCountedReleasable { client.client().close(); client.priorityClient().close(); + client.urgentClient().close(); AwsCredentialsProvider credentials = client.credentials(); if (credentials instanceof Closeable) { try { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3WithCredentials.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3WithCredentials.java index fa2db83729d25..f8a313b55d945 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3WithCredentials.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3WithCredentials.java @@ -19,16 +19,19 @@ final class AmazonAsyncS3WithCredentials { private final S3AsyncClient client; private final S3AsyncClient priorityClient; + private final S3AsyncClient urgentClient; private final AwsCredentialsProvider credentials; private AmazonAsyncS3WithCredentials( final S3AsyncClient client, final S3AsyncClient priorityClient, + final S3AsyncClient urgentClient, @Nullable final AwsCredentialsProvider credentials ) { this.client = client; this.credentials = credentials; this.priorityClient = priorityClient; + this.urgentClient = urgentClient; } S3AsyncClient client() { @@ -39,6 +42,10 @@ S3AsyncClient priorityClient() { return priorityClient; } + S3AsyncClient urgentClient() { + return urgentClient; + } + AwsCredentialsProvider credentials() { return credentials; } @@ -46,8 +53,9 @@ AwsCredentialsProvider credentials() { static AmazonAsyncS3WithCredentials create( final S3AsyncClient client, final S3AsyncClient priorityClient, + final S3AsyncClient urgentClient, @Nullable final AwsCredentialsProvider credentials ) { - return new AmazonAsyncS3WithCredentials(client, priorityClient, credentials); + return new AmazonAsyncS3WithCredentials(client, priorityClient, urgentClient, credentials); } } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3AsyncService.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3AsyncService.java index 08215ebdd45e0..262304029a0d3 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3AsyncService.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3AsyncService.java @@ -103,6 +103,7 @@ public synchronized void refreshAndClearCache(Map clie */ public AmazonAsyncS3Reference client( RepositoryMetadata repositoryMetadata, + AsyncExecutorContainer urgentExecutorBuilder, AsyncExecutorContainer priorityExecutorBuilder, AsyncExecutorContainer normalExecutorBuilder ) { @@ -119,7 +120,7 @@ public AmazonAsyncS3Reference client( return existing; } final AmazonAsyncS3Reference clientReference = new AmazonAsyncS3Reference( - buildClient(clientSettings, priorityExecutorBuilder, normalExecutorBuilder) + buildClient(clientSettings, urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder) ); clientReference.incRef(); clientsCache = MapBuilder.newMapBuilder(clientsCache).put(clientSettings, clientReference).immutableMap(); @@ -165,6 +166,7 @@ S3ClientSettings settings(RepositoryMetadata repositoryMetadata) { // proxy for testing synchronized AmazonAsyncS3WithCredentials buildClient( final S3ClientSettings clientSettings, + AsyncExecutorContainer urgentExecutorBuilder, AsyncExecutorContainer priorityExecutorBuilder, AsyncExecutorContainer normalExecutorBuilder ) { @@ -195,6 +197,17 @@ synchronized AmazonAsyncS3WithCredentials buildClient( builder.forcePathStyle(true); } + builder.httpClient(buildHttpClient(clientSettings, urgentExecutorBuilder.getAsyncTransferEventLoopGroup())); + builder.asyncConfiguration( + ClientAsyncConfiguration.builder() + .advancedOption( + SdkAdvancedAsyncClientOption.FUTURE_COMPLETION_EXECUTOR, + urgentExecutorBuilder.getFutureCompletionExecutor() + ) + .build() + ); + final S3AsyncClient urgentClient = SocketAccess.doPrivileged(builder::build); + builder.httpClient(buildHttpClient(clientSettings, priorityExecutorBuilder.getAsyncTransferEventLoopGroup())); builder.asyncConfiguration( ClientAsyncConfiguration.builder() @@ -217,7 +230,7 @@ synchronized AmazonAsyncS3WithCredentials buildClient( ); final S3AsyncClient client = SocketAccess.doPrivileged(builder::build); - return AmazonAsyncS3WithCredentials.create(client, priorityClient, credentials); + return AmazonAsyncS3WithCredentials.create(client, priorityClient, urgentClient, credentials); } static ClientOverrideConfiguration buildOverrideConfiguration(final S3ClientSettings clientSettings) { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java index 24aee99242957..c1180aab0e0c7 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java @@ -195,9 +195,14 @@ public void asyncBlobUpload(WriteContext writeContext, ActionListener comp StreamContext streamContext = SocketAccess.doPrivileged(() -> writeContext.getStreamProvider(partSize)); try (AmazonAsyncS3Reference amazonS3Reference = SocketAccess.doPrivileged(blobStore::asyncClientReference)) { - S3AsyncClient s3AsyncClient = writeContext.getWritePriority() == WritePriority.HIGH - ? amazonS3Reference.get().priorityClient() - : amazonS3Reference.get().client(); + S3AsyncClient s3AsyncClient; + if (writeContext.getWritePriority() == WritePriority.URGENT) { + s3AsyncClient = amazonS3Reference.get().urgentClient(); + } else if (writeContext.getWritePriority() == WritePriority.HIGH) { + s3AsyncClient = amazonS3Reference.get().priorityClient(); + } else { + s3AsyncClient = amazonS3Reference.get().client(); + } CompletableFuture completableFuture = blobStore.getAsyncTransferManager() .uploadObject(s3AsyncClient, uploadRequest, streamContext, blobStore.getStatsMetricPublisher()); completableFuture.whenComplete((response, throwable) -> { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java index f568d871dd31a..e8e043357e126 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java @@ -84,6 +84,7 @@ class S3BlobStore implements BlobStore { private final StatsMetricPublisher statsMetricPublisher = new StatsMetricPublisher(); private final AsyncTransferManager asyncTransferManager; + private final AsyncExecutorContainer urgentExecutorBuilder; private final AsyncExecutorContainer priorityExecutorBuilder; private final AsyncExecutorContainer normalExecutorBuilder; private final boolean multipartUploadEnabled; @@ -100,6 +101,7 @@ class S3BlobStore implements BlobStore { int bulkDeletesSize, RepositoryMetadata repositoryMetadata, AsyncTransferManager asyncTransferManager, + AsyncExecutorContainer urgentExecutorBuilder, AsyncExecutorContainer priorityExecutorBuilder, AsyncExecutorContainer normalExecutorBuilder ) { @@ -116,6 +118,7 @@ class S3BlobStore implements BlobStore { this.asyncTransferManager = asyncTransferManager; this.normalExecutorBuilder = normalExecutorBuilder; this.priorityExecutorBuilder = priorityExecutorBuilder; + this.urgentExecutorBuilder = urgentExecutorBuilder; } @Override @@ -139,7 +142,7 @@ public AmazonS3Reference clientReference() { } public AmazonAsyncS3Reference asyncClientReference() { - return s3AsyncService.client(repositoryMetadata, priorityExecutorBuilder, normalExecutorBuilder); + return s3AsyncService.client(repositoryMetadata, urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder); } int getMaxRetries() { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java index aaf5b79891cdc..728a99b1220a6 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java @@ -234,6 +234,7 @@ class S3Repository extends MeteredBlobStoreRepository { private final AsyncTransferManager asyncUploadUtils; private final S3AsyncService s3AsyncService; private final boolean multipartUploadEnabled; + private final AsyncExecutorContainer urgentExecutorBuilder; private final AsyncExecutorContainer priorityExecutorBuilder; private final AsyncExecutorContainer normalExecutorBuilder; private final Path pluginConfigPath; @@ -248,6 +249,7 @@ class S3Repository extends MeteredBlobStoreRepository { final ClusterService clusterService, final RecoverySettings recoverySettings, final AsyncTransferManager asyncUploadUtils, + final AsyncExecutorContainer urgentExecutorBuilder, final AsyncExecutorContainer priorityExecutorBuilder, final AsyncExecutorContainer normalExecutorBuilder, final S3AsyncService s3AsyncService, @@ -260,6 +262,7 @@ class S3Repository extends MeteredBlobStoreRepository { clusterService, recoverySettings, asyncUploadUtils, + urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder, s3AsyncService, @@ -278,6 +281,7 @@ class S3Repository extends MeteredBlobStoreRepository { final ClusterService clusterService, final RecoverySettings recoverySettings, final AsyncTransferManager asyncUploadUtils, + final AsyncExecutorContainer urgentExecutorBuilder, final AsyncExecutorContainer priorityExecutorBuilder, final AsyncExecutorContainer normalExecutorBuilder, final S3AsyncService s3AsyncService, @@ -290,6 +294,7 @@ class S3Repository extends MeteredBlobStoreRepository { this.multipartUploadEnabled = multipartUploadEnabled; this.pluginConfigPath = pluginConfigPath; this.asyncUploadUtils = asyncUploadUtils; + this.urgentExecutorBuilder = urgentExecutorBuilder; this.priorityExecutorBuilder = priorityExecutorBuilder; this.normalExecutorBuilder = normalExecutorBuilder; @@ -352,6 +357,7 @@ protected S3BlobStore createBlobStore() { bulkDeletesSize, metadata, asyncUploadUtils, + urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder ); diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java index c6450e49d08e2..9ed232464d080 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java @@ -75,6 +75,9 @@ * A plugin to add a repository type that writes to and from the AWS S3. */ public class S3RepositoryPlugin extends Plugin implements RepositoryPlugin, ReloadablePlugin { + + private static final String URGENT_FUTURE_COMPLETION = "urgent_future_completion"; + private static final String URGENT_STREAM_READER = "urgent_stream_reader"; private static final String PRIORITY_FUTURE_COMPLETION = "priority_future_completion"; private static final String PRIORITY_STREAM_READER = "priority_stream_reader"; private static final String FUTURE_COMPLETION = "future_completion"; @@ -85,6 +88,7 @@ public class S3RepositoryPlugin extends Plugin implements RepositoryPlugin, Relo private final Path configPath; + private AsyncExecutorContainer urgentExecutorBuilder; private AsyncExecutorContainer priorityExecutorBuilder; private AsyncExecutorContainer normalExecutorBuilder; @@ -96,6 +100,10 @@ public S3RepositoryPlugin(final Settings settings, final Path configPath) { public List> getExecutorBuilders(Settings settings) { List> executorBuilders = new ArrayList<>(); int halfProcMaxAt5 = halfAllocatedProcessorsMaxFive(allocatedProcessors(settings)); + executorBuilders.add( + new FixedExecutorBuilder(settings, URGENT_FUTURE_COMPLETION, urgentPoolCount(settings), 10_000, URGENT_FUTURE_COMPLETION) + ); + executorBuilders.add(new ScalingExecutorBuilder(URGENT_STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); executorBuilders.add( new FixedExecutorBuilder(settings, PRIORITY_FUTURE_COMPLETION, priorityPoolCount(settings), 10_000, PRIORITY_FUTURE_COMPLETION) ); @@ -128,6 +136,10 @@ private static int allocatedProcessors(Settings settings) { return OpenSearchExecutors.allocatedProcessors(settings); } + private static int urgentPoolCount(Settings settings) { + return boundedBy((allocatedProcessors(settings) + 7) / 8, 1, 2); + } + private static int priorityPoolCount(Settings settings) { return boundedBy((allocatedProcessors(settings) + 1) / 2, 2, 4); } @@ -150,8 +162,14 @@ public Collection createComponents( final IndexNameExpressionResolver expressionResolver, final Supplier repositoriesServiceSupplier ) { + int urgentEventLoopThreads = urgentPoolCount(clusterService.getSettings()); int priorityEventLoopThreads = priorityPoolCount(clusterService.getSettings()); int normalEventLoopThreads = normalPoolCount(clusterService.getSettings()); + this.urgentExecutorBuilder = new AsyncExecutorContainer( + threadPool.executor(URGENT_FUTURE_COMPLETION), + threadPool.executor(URGENT_STREAM_READER), + new AsyncTransferEventLoopGroup(urgentEventLoopThreads) + ); this.priorityExecutorBuilder = new AsyncExecutorContainer( threadPool.executor(PRIORITY_FUTURE_COMPLETION), threadPool.executor(PRIORITY_STREAM_READER), @@ -176,7 +194,8 @@ protected S3Repository createRepository( AsyncTransferManager asyncUploadUtils = new AsyncTransferManager( S3Repository.PARALLEL_MULTIPART_UPLOAD_MINIMUM_PART_SIZE_SETTING.get(clusterService.getSettings()).getBytes(), normalExecutorBuilder.getStreamReader(), - priorityExecutorBuilder.getStreamReader() + priorityExecutorBuilder.getStreamReader(), + urgentExecutorBuilder.getStreamReader() ); return new S3Repository( metadata, @@ -185,6 +204,7 @@ protected S3Repository createRepository( clusterService, recoverySettings, asyncUploadUtils, + urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder, s3AsyncService, diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java index 6007d9f9c8a1c..933ee6dc29513 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java @@ -48,6 +48,7 @@ public class AsyncPartsHandler { * @param s3AsyncClient S3 client to use for upload * @param executorService Thread pool for regular upload * @param priorityExecutorService Thread pool for priority uploads + * @param urgentExecutorService Thread pool for urgent uploads * @param uploadRequest request for upload * @param streamContext Stream context used in supplying individual file parts * @param uploadId Upload Id against which multi-part is being performed @@ -60,6 +61,7 @@ public static List> uploadParts( S3AsyncClient s3AsyncClient, ExecutorService executorService, ExecutorService priorityExecutorService, + ExecutorService urgentExecutorService, UploadRequest uploadRequest, StreamContext streamContext, String uploadId, @@ -83,6 +85,7 @@ public static List> uploadParts( s3AsyncClient, executorService, priorityExecutorService, + urgentExecutorService, completedParts, inputStreamContainers, futures, @@ -129,6 +132,7 @@ private static void uploadPart( S3AsyncClient s3AsyncClient, ExecutorService executorService, ExecutorService priorityExecutorService, + ExecutorService urgentExecutorService, AtomicReferenceArray completedParts, AtomicReferenceArray inputStreamContainers, List> futures, @@ -138,9 +142,14 @@ private static void uploadPart( ) { Integer partNumber = uploadPartRequest.partNumber(); - ExecutorService streamReadExecutor = uploadRequest.getWritePriority() == WritePriority.HIGH - ? priorityExecutorService - : executorService; + ExecutorService streamReadExecutor; + if (uploadRequest.getWritePriority() == WritePriority.URGENT) { + streamReadExecutor = urgentExecutorService; + } else if (uploadRequest.getWritePriority() == WritePriority.HIGH) { + streamReadExecutor = priorityExecutorService; + } else { + streamReadExecutor = executorService; + } // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered // data can be retried instead of retrying whole file by the application. InputStream inputStream = new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)); diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java index a52745e33073e..4f1ab9764702e 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java @@ -61,6 +61,7 @@ public final class AsyncTransferManager { private static final Logger log = LogManager.getLogger(AsyncTransferManager.class); private final ExecutorService executorService; private final ExecutorService priorityExecutorService; + private final ExecutorService urgentExecutorService; private final long minimumPartSize; /** @@ -75,10 +76,16 @@ public final class AsyncTransferManager { * @param executorService The stream reader {@link ExecutorService} for normal priority uploads * @param priorityExecutorService The stream read {@link ExecutorService} for high priority uploads */ - public AsyncTransferManager(long minimumPartSize, ExecutorService executorService, ExecutorService priorityExecutorService) { + public AsyncTransferManager( + long minimumPartSize, + ExecutorService executorService, + ExecutorService priorityExecutorService, + ExecutorService urgentExecutorService + ) { this.executorService = executorService; this.priorityExecutorService = priorityExecutorService; this.minimumPartSize = minimumPartSize; + this.urgentExecutorService = urgentExecutorService; } /** @@ -162,6 +169,7 @@ private void doUploadInParts( s3AsyncClient, executorService, priorityExecutorService, + urgentExecutorService, uploadRequest, streamContext, uploadId, @@ -308,9 +316,14 @@ private void uploadInOneChunk( putObjectRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); putObjectRequestBuilder.checksumCRC32(base64StringFromLong(uploadRequest.getExpectedChecksum())); } - ExecutorService streamReadExecutor = uploadRequest.getWritePriority() == WritePriority.HIGH - ? priorityExecutorService - : executorService; + ExecutorService streamReadExecutor; + if (uploadRequest.getWritePriority() == WritePriority.URGENT) { + streamReadExecutor = urgentExecutorService; + } else if (uploadRequest.getWritePriority() == WritePriority.HIGH) { + streamReadExecutor = priorityExecutorService; + } else { + streamReadExecutor = executorService; + } // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered // data can be retried instead of retrying whole file by the application. InputStream inputStream = new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)); diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java index a4bfe11383b4f..8e1926d40302f 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java @@ -302,7 +302,7 @@ protected S3Repository createRepository( ClusterService clusterService, RecoverySettings recoverySettings ) { - return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, false) { + return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, null, false) { @Override protected void assertSnapshotOrGenericThread() { // eliminate thread name check as we create repo manually on test/main threads diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3AsyncServiceTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3AsyncServiceTests.java index e9fe557ab751a..de9ad46bb222d 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3AsyncServiceTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3AsyncServiceTests.java @@ -44,12 +44,12 @@ public void testCachedClientsAreReleased() { final S3ClientSettings otherClientSettings = s3AsyncService.settings(metadata2); assertSame(clientSettings, otherClientSettings); final AmazonAsyncS3Reference reference = SocketAccess.doPrivileged( - () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer) + () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer, asyncExecutorContainer) ); reference.close(); s3AsyncService.close(); final AmazonAsyncS3Reference referenceReloaded = SocketAccess.doPrivileged( - () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer) + () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer, asyncExecutorContainer) ); assertNotSame(referenceReloaded, reference); referenceReloaded.close(); @@ -79,12 +79,12 @@ public void testCachedClientsWithCredentialsAreReleased() { final S3ClientSettings otherClientSettings = s3AsyncService.settings(metadata2); assertSame(clientSettings, otherClientSettings); final AmazonAsyncS3Reference reference = SocketAccess.doPrivileged( - () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer) + () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer, asyncExecutorContainer) ); reference.close(); s3AsyncService.close(); final AmazonAsyncS3Reference referenceReloaded = SocketAccess.doPrivileged( - () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer) + () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer, asyncExecutorContainer) ); assertNotSame(referenceReloaded, reference); referenceReloaded.close(); diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java index 6eb8faa746d34..7c67519f2f3b0 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java @@ -266,10 +266,11 @@ public void verifySingleChunkUploadCallCount(boolean finalizeUploadFailure) { @Override public AmazonAsyncS3Reference client( RepositoryMetadata repositoryMetadata, + AsyncExecutorContainer urgentExecutorBuilder, AsyncExecutorContainer priorityExecutorBuilder, AsyncExecutorContainer normalExecutorBuilder ) { - return new AmazonAsyncS3Reference(AmazonAsyncS3WithCredentials.create(asyncClient, asyncClient, null)); + return new AmazonAsyncS3Reference(AmazonAsyncS3WithCredentials.create(asyncClient, asyncClient, asyncClient, null)); } } @@ -393,9 +394,11 @@ private S3BlobStore createBlobStore() { new AsyncTransferManager( S3Repository.PARALLEL_MULTIPART_UPLOAD_MINIMUM_PART_SIZE_SETTING.getDefault(Settings.EMPTY).getBytes(), asyncExecutorContainer.getStreamReader(), + asyncExecutorContainer.getStreamReader(), asyncExecutorContainer.getStreamReader() ), asyncExecutorContainer, + asyncExecutorContainer, asyncExecutorContainer ); } diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java index a2214f5218991..ceab06bd051e9 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java @@ -221,9 +221,11 @@ protected AsyncMultiStreamBlobContainer createBlobContainer( new AsyncTransferManager( S3Repository.PARALLEL_MULTIPART_UPLOAD_MINIMUM_PART_SIZE_SETTING.getDefault(Settings.EMPTY).getBytes(), asyncExecutorContainer.getStreamReader(), + asyncExecutorContainer.getStreamReader(), asyncExecutorContainer.getStreamReader() ), asyncExecutorContainer, + asyncExecutorContainer, asyncExecutorContainer ) ) { diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java index 2701cae6a733b..58ad290a31e85 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java @@ -935,7 +935,7 @@ public void testReadBlobAsyncMultiPart() throws Exception { final S3AsyncClient s3AsyncClient = mock(S3AsyncClient.class); final AmazonAsyncS3Reference amazonAsyncS3Reference = new AmazonAsyncS3Reference( - AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, null) + AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, s3AsyncClient, null) ); final S3BlobStore blobStore = mock(S3BlobStore.class); @@ -993,7 +993,7 @@ public void testReadBlobAsyncSinglePart() throws Exception { final S3AsyncClient s3AsyncClient = mock(S3AsyncClient.class); final AmazonAsyncS3Reference amazonAsyncS3Reference = new AmazonAsyncS3Reference( - AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, null) + AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, s3AsyncClient, null) ); final S3BlobStore blobStore = mock(S3BlobStore.class); final BlobPath blobPath = new BlobPath(); @@ -1048,7 +1048,7 @@ public void testReadBlobAsyncFailure() throws Exception { final S3AsyncClient s3AsyncClient = mock(S3AsyncClient.class); final AmazonAsyncS3Reference amazonAsyncS3Reference = new AmazonAsyncS3Reference( - AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, null) + AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, s3AsyncClient, null) ); final S3BlobStore blobStore = mock(S3BlobStore.class); @@ -1091,7 +1091,7 @@ public void testReadBlobAsyncOnCompleteFailureMissingData() throws Exception { final S3AsyncClient s3AsyncClient = mock(S3AsyncClient.class); final AmazonAsyncS3Reference amazonAsyncS3Reference = new AmazonAsyncS3Reference( - AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, null) + AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, s3AsyncClient, null) ); final S3BlobStore blobStore = mock(S3BlobStore.class); diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java index e65ca69a5047b..6fec535ae6301 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java @@ -168,6 +168,7 @@ private S3Repository createS3Repo(RepositoryMetadata metadata) { null, null, null, + null, false ) { @Override diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java index 97a746cdeed93..2437547a80a6f 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java @@ -64,6 +64,7 @@ public void setUp() throws Exception { asyncTransferManager = new AsyncTransferManager( ByteSizeUnit.MB.toBytes(5), Executors.newSingleThreadExecutor(), + Executors.newSingleThreadExecutor(), Executors.newSingleThreadExecutor() ); super.setUp(); diff --git a/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java b/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java index b8c0b52f93a3c..3f341c878c3c7 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java +++ b/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java @@ -15,5 +15,6 @@ */ public enum WritePriority { NORMAL, - HIGH + HIGH, + URGENT } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index ae4a3fab9852d..025ad075d83b6 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -378,7 +378,7 @@ private String writeGlobalMetadata(ClusterState clusterState) throws IOException result.set(globalMetadataContainer.path().buildAsString() + globalMetadataFilename); }, ex -> { throw new GlobalMetadataTransferException(ex.getMessage(), ex); }), latch); - GLOBAL_METADATA_FORMAT.writeAsync( + GLOBAL_METADATA_FORMAT.writeAsyncWithUrgentPriority( clusterState.metadata(), globalMetadataContainer, globalMetadataFilename, @@ -510,7 +510,7 @@ private void writeIndexMetadataAsync( ex -> latchedActionListener.onFailure(new IndexMetadataTransferException(indexMetadata.getIndex().toString(), ex)) ); - INDEX_METADATA_FORMAT.writeAsync( + INDEX_METADATA_FORMAT.writeAsyncWithUrgentPriority( indexMetadata, indexMetadataContainer, indexMetadataFilename, diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java index e280141c12bc1..3e6052a5ef820 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java @@ -197,21 +197,56 @@ public void write( } /** - * Writes blob with resolving the blob name using {@link #blobName} method. - * Leverages the multipart upload if supported by the blobContainer. + * Internally calls {@link #writeAsyncWithPriority} with {@link WritePriority#NORMAL} + */ + public void writeAsync( + final T obj, + final BlobContainer blobContainer, + final String name, + final Compressor compressor, + ActionListener listener, + final ToXContent.Params params + ) throws IOException { + // use NORMAL priority by default + this.writeAsyncWithPriority(obj, blobContainer, name, compressor, WritePriority.NORMAL, listener, params); + } + + /** + * Internally calls {@link #writeAsyncWithPriority} with {@link WritePriority#URGENT} + *

+ * NOTE: We use this method to upload urgent priority objects like cluster state to remote stores. + * Use {@link #writeAsync(ToXContent, BlobContainer, String, Compressor, ActionListener, ToXContent.Params)} for + * other use cases. + */ + public void writeAsyncWithUrgentPriority( + final T obj, + final BlobContainer blobContainer, + final String name, + final Compressor compressor, + ActionListener listener, + final ToXContent.Params params + ) throws IOException { + this.writeAsyncWithPriority(obj, blobContainer, name, compressor, WritePriority.URGENT, listener, params); + } + + /** + * Method to writes blob with resolving the blob name using {@link #blobName} method with specified + * {@link WritePriority}. Leverages the multipart upload if supported by the blobContainer. * * @param obj object to be serialized * @param blobContainer blob container * @param name blob name * @param compressor whether to use compression + * @param priority write priority to be used * @param listener listener to listen to write result * @param params ToXContent params */ - public void writeAsync( + private void writeAsyncWithPriority( final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor, + final WritePriority priority, ActionListener listener, final ToXContent.Params params ) throws IOException { @@ -222,7 +257,7 @@ public void writeAsync( } final String blobName = blobName(name); final BytesReference bytes = serialize(obj, blobName, compressor, params); - final String resourceDescription = "ChecksumBlobStoreFormat.writeAsync(blob=\"" + blobName + "\")"; + final String resourceDescription = "ChecksumBlobStoreFormat.writeAsyncWithPriority(blob=\"" + blobName + "\")"; try (IndexInput input = new ByteArrayIndexInput(resourceDescription, BytesReference.toBytes(bytes))) { long expectedChecksum; try { @@ -242,7 +277,7 @@ public void writeAsync( blobName, bytes.length(), true, - WritePriority.HIGH, + priority, (size, position) -> new OffsetRangeIndexInputStream(input, size, position), expectedChecksum, ((AsyncMultiStreamBlobContainer) blobContainer).remoteIntegrityCheckSupported() diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 4be5fc03c2a6d..173e15b8eca37 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -273,7 +273,7 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { new BytesArray(writtenBytes) ); - assertEquals(capturedWriteContext.getWritePriority(), WritePriority.HIGH); + assertEquals(capturedWriteContext.getWritePriority(), WritePriority.URGENT); assertEquals(writtenIndexMetadata.getNumberOfShards(), 1); assertEquals(writtenIndexMetadata.getNumberOfReplicas(), 0); assertEquals(writtenIndexMetadata.getIndex().getName(), "test-index"); diff --git a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java index c114b56bd0b39..c5f36fcc01983 100644 --- a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java +++ b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java @@ -43,6 +43,7 @@ import org.opensearch.common.blobstore.fs.FsBlobStore; import org.opensearch.common.blobstore.stream.read.ReadContext; import org.opensearch.common.blobstore.stream.write.WriteContext; +import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.compress.DeflateCompressor; import org.opensearch.common.io.Streams; import org.opensearch.common.io.stream.BytesStreamOutput; @@ -65,8 +66,13 @@ import java.util.Map; import java.util.concurrent.CountDownLatch; +import org.mockito.ArgumentCaptor; + import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.greaterThan; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; public class BlobStoreFormatTests extends OpenSearchTestCase { @@ -128,44 +134,36 @@ public void testBlobStoreAsyncOperations() throws IOException, InterruptedExcept BlobPath.cleanPath(), null ); + MockFsVerifyingBlobContainer spyContainer = spy(mockBlobContainer); ChecksumBlobStoreFormat checksumSMILE = new ChecksumBlobStoreFormat<>(BLOB_CODEC, "%s", BlobObj::fromXContent); - + ArgumentCaptor> actionListenerArgumentCaptor = ArgumentCaptor.forClass(ActionListener.class); + ArgumentCaptor writeContextArgumentCaptor = ArgumentCaptor.forClass(WriteContext.class); CountDownLatch latch = new CountDownLatch(2); - ActionListener actionListener = new ActionListener<>() { - @Override - public void onResponse(Void unused) { - logger.info("---> Async write succeeded"); - latch.countDown(); - } - - @Override - public void onFailure(Exception e) { - logger.info("---> Failure in async write"); - throw new RuntimeException("async write should not fail"); - } - }; - // Write blobs in different formats checksumSMILE.writeAsync( new BlobObj("checksum smile"), - mockBlobContainer, + spyContainer, "check-smile", CompressorRegistry.none(), - actionListener, + getVoidActionListener(latch), ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS ); checksumSMILE.writeAsync( new BlobObj("checksum smile compressed"), - mockBlobContainer, + spyContainer, "check-smile-comp", CompressorRegistry.getCompressor(DeflateCompressor.NAME), - actionListener, + getVoidActionListener(latch), ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS ); latch.await(); + verify(spyContainer, times(2)).asyncBlobUpload(writeContextArgumentCaptor.capture(), actionListenerArgumentCaptor.capture()); + assertEquals(2, writeContextArgumentCaptor.getAllValues().size()); + writeContextArgumentCaptor.getAllValues() + .forEach(writeContext -> assertEquals(WritePriority.NORMAL, writeContext.getWritePriority())); // Assert that all checksum blobs can be read assertEquals(checksumSMILE.read(mockBlobContainer.getDelegate(), "check-smile", xContentRegistry()).getText(), "checksum smile"); assertEquals( @@ -174,6 +172,39 @@ public void onFailure(Exception e) { ); } + public void testBlobStorePriorityAsyncOperation() throws IOException, InterruptedException { + BlobStore blobStore = createTestBlobStore(); + MockFsVerifyingBlobContainer mockBlobContainer = new MockFsVerifyingBlobContainer( + (FsBlobStore) blobStore, + BlobPath.cleanPath(), + null + ); + MockFsVerifyingBlobContainer spyContainer = spy(mockBlobContainer); + ChecksumBlobStoreFormat checksumSMILE = new ChecksumBlobStoreFormat<>(BLOB_CODEC, "%s", BlobObj::fromXContent); + + ArgumentCaptor> actionListenerArgumentCaptor = ArgumentCaptor.forClass(ActionListener.class); + ArgumentCaptor writeContextArgumentCaptor = ArgumentCaptor.forClass(WriteContext.class); + CountDownLatch latch = new CountDownLatch(1); + + // Write blobs in different formats + checksumSMILE.writeAsyncWithUrgentPriority( + new BlobObj("cluster state diff"), + spyContainer, + "cluster-state-diff", + CompressorRegistry.none(), + getVoidActionListener(latch), + ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS + ); + latch.await(); + + verify(spyContainer).asyncBlobUpload(writeContextArgumentCaptor.capture(), actionListenerArgumentCaptor.capture()); + assertEquals(WritePriority.URGENT, writeContextArgumentCaptor.getValue().getWritePriority()); + assertEquals( + checksumSMILE.read(mockBlobContainer.getDelegate(), "cluster-state-diff", xContentRegistry()).getText(), + "cluster state diff" + ); + } + public void testBlobStoreOperations() throws IOException { BlobStore blobStore = createTestBlobStore(); BlobContainer blobContainer = blobStore.blobContainer(BlobPath.cleanPath()); @@ -228,6 +259,24 @@ public void testBlobCorruption() throws IOException { } } + private ActionListener getVoidActionListener(CountDownLatch latch) { + ActionListener actionListener = new ActionListener<>() { + @Override + public void onResponse(Void unused) { + logger.info("---> Async write succeeded"); + latch.countDown(); + } + + @Override + public void onFailure(Exception e) { + logger.info("---> Failure in async write"); + throw new RuntimeException("async write should not fail"); + } + }; + + return actionListener; + } + protected BlobStore createTestBlobStore() throws IOException { return new FsBlobStore(randomIntBetween(1, 8) * 1024, createTempDir(), false); } From 54e74a84437238c6154b0a15d209a9a1ecbaa4bb Mon Sep 17 00:00:00 2001 From: Aman Khare <85096200+amkhar@users.noreply.github.com> Date: Tue, 24 Oct 2023 14:28:29 +0530 Subject: [PATCH 31/45] Add cluster state stats (#10670) * Add cluster state update stats along with remote upload stats around success/ failure, latency metric Signed-off-by: Aman Khare --- CHANGELOG.md | 1 + .../discovery/ClusterManagerDisruptionIT.java | 3 + .../remote/RemoteClusterStateServiceIT.java | 43 ++++++ .../coordination/CoordinationState.java | 6 + .../cluster/coordination/Coordinator.java | 12 +- .../coordination/InMemoryPersistedState.java | 5 + .../coordination/PersistedStateStats.java | 126 ++++++++++++++++++ .../cluster/service/ClusterStateStats.java | 120 +++++++++++++++++ .../cluster/service/MasterService.java | 23 +++- .../opensearch/discovery/DiscoveryStats.java | 21 ++- .../opensearch/gateway/GatewayMetaState.java | 13 ++ .../remote/RemoteClusterStateService.java | 20 ++- .../remote/RemotePersistenceStats.java | 37 +++++ .../cluster/node/stats/NodeStatsTests.java | 29 +++- .../cluster/service/MasterServiceTests.java | 3 + .../GatewayMetaStatePersistedStateTests.java | 22 +++ .../RemoteClusterStateServiceTests.java | 34 +++++ .../AbstractCoordinatorTestCase.java | 5 + 18 files changed, 511 insertions(+), 12 deletions(-) create mode 100644 server/src/main/java/org/opensearch/cluster/coordination/PersistedStateStats.java create mode 100644 server/src/main/java/org/opensearch/cluster/service/ClusterStateStats.java create mode 100644 server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b0d9720ad208..8c7e3ee151d64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -96,6 +96,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Introduce ConcurrentQueryProfiler to profile query using concurrent segment search path and support concurrency during rewrite and create weight ([10352](https://github.com/opensearch-project/OpenSearch/pull/10352)) - [Remote cluster state] Make index and global metadata upload timeout dynamic cluster settings ([#10814](https://github.com/opensearch-project/OpenSearch/pull/10814)) - Added cluster setting cluster.restrict.index.replication_type to restrict setting of index setting replication type ([#10866](https://github.com/opensearch-project/OpenSearch/pull/10866)) +- Add cluster state stats ([#10670](https://github.com/opensearch-project/OpenSearch/pull/10670)) ### Dependencies - Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.1 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208), [#10298](https://github.com/opensearch-project/OpenSearch/pull/10298)) diff --git a/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java index 1463c45aa9b2f..79f6ba6dfa642 100644 --- a/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java @@ -39,6 +39,7 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.NoClusterManagerBlockService; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.service.ClusterStateStats; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.xcontent.MediaTypeRegistry; @@ -199,6 +200,8 @@ public void testIsolateClusterManagerAndVerifyClusterStateConsensus() throws Exc } } + ClusterStateStats clusterStateStats = internalCluster().clusterService().getClusterManagerService().getClusterStateStats(); + assertTrue(clusterStateStats.getUpdateFailed() > 0); }); } diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java index 7304304e522f8..59eef3c06844b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java @@ -8,9 +8,12 @@ package org.opensearch.gateway.remote; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.settings.Settings; +import org.opensearch.discovery.DiscoveryStats; import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.blobstore.BlobStoreRepository; @@ -19,6 +22,7 @@ import java.nio.charset.StandardCharsets; import java.util.Base64; import java.util.Map; +import java.util.stream.Collectors; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; @@ -94,6 +98,45 @@ public void testFullClusterRestoreStaleDelete() throws Exception { assertEquals(shardCount, indexMetadataMap.values().stream().findFirst().get().getNumberOfShards()); } + public void testRemoteStateStats() { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 1; + prepareCluster(clusterManagerNodeCount, dataNodeCount, INDEX_NAME, replicaCount, shardCount); + String clusterManagerNode = internalCluster().getClusterManagerName(); + String dataNode = internalCluster().getDataNodeNames().stream().collect(Collectors.toList()).get(0); + + // Fetch _nodes/stats + NodesStatsResponse nodesStatsResponse = client().admin() + .cluster() + .prepareNodesStats(clusterManagerNode) + .addMetric(NodesStatsRequest.Metric.DISCOVERY.metricName()) + .get(); + + // assert cluster state stats + DiscoveryStats discoveryStats = nodesStatsResponse.getNodes().get(0).getDiscoveryStats(); + + assertNotNull(discoveryStats.getClusterStateStats()); + assertTrue(discoveryStats.getClusterStateStats().getUpdateSuccess() > 1); + assertEquals(0, discoveryStats.getClusterStateStats().getUpdateFailed()); + assertTrue(discoveryStats.getClusterStateStats().getUpdateTotalTimeInMillis() > 0); + // assert remote state stats + assertTrue(discoveryStats.getClusterStateStats().getPersistenceStats().get(0).getSuccessCount() > 1); + assertEquals(0, discoveryStats.getClusterStateStats().getPersistenceStats().get(0).getFailedCount()); + assertTrue(discoveryStats.getClusterStateStats().getPersistenceStats().get(0).getTotalTimeInMillis() > 0); + + NodesStatsResponse nodesStatsResponseDataNode = client().admin() + .cluster() + .prepareNodesStats(dataNode) + .addMetric(NodesStatsRequest.Metric.DISCOVERY.metricName()) + .get(); + // assert cluster state stats for data node + DiscoveryStats dataNodeDiscoveryStats = nodesStatsResponseDataNode.getNodes().get(0).getDiscoveryStats(); + assertNotNull(dataNodeDiscoveryStats.getClusterStateStats()); + assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getUpdateSuccess()); + } + private void setReplicaCount(int replicaCount) { client().admin() .indices() diff --git a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java index a339852e6ed8d..987a3e3ffa7d3 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java @@ -638,6 +638,12 @@ public interface PersistedState extends Closeable { */ void setLastAcceptedState(ClusterState clusterState); + /** + * Returns the stats for the persistence layer for {@link CoordinationState}. + * @return PersistedStateStats + */ + PersistedStateStats getStats(); + /** * Marks the last accepted cluster state as committed. * After a successful call to this method, {@link #getLastAcceptedState()} should return the last cluster state that was set, diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index eb30460ca1b7f..a4ffab7fb70c9 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -56,6 +56,7 @@ import org.opensearch.cluster.service.ClusterApplier; import org.opensearch.cluster.service.ClusterApplier.ClusterApplyListener; import org.opensearch.cluster.service.ClusterManagerService; +import org.opensearch.cluster.service.ClusterStateStats; import org.opensearch.common.Booleans; import org.opensearch.common.Nullable; import org.opensearch.common.Priority; @@ -865,7 +866,16 @@ protected void doStart() { @Override public DiscoveryStats stats() { - return new DiscoveryStats(new PendingClusterStateStats(0, 0, 0), publicationHandler.stats()); + ClusterStateStats clusterStateStats = clusterManagerService.getClusterStateStats(); + ArrayList stats = new ArrayList<>(); + Stream.of(PersistedStateRegistry.PersistedStateType.values()).forEach(stateType -> { + if (persistedStateRegistry.getPersistedState(stateType) != null + && persistedStateRegistry.getPersistedState(stateType).getStats() != null) { + stats.add(persistedStateRegistry.getPersistedState(stateType).getStats()); + } + }); + clusterStateStats.setPersistenceStats(stats); + return new DiscoveryStats(new PendingClusterStateStats(0, 0, 0), publicationHandler.stats(), clusterStateStats); } @Override diff --git a/server/src/main/java/org/opensearch/cluster/coordination/InMemoryPersistedState.java b/server/src/main/java/org/opensearch/cluster/coordination/InMemoryPersistedState.java index 67ef82ee7b2e9..b77ede5471534 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/InMemoryPersistedState.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/InMemoryPersistedState.java @@ -65,6 +65,11 @@ public void setLastAcceptedState(ClusterState clusterState) { this.acceptedState = clusterState; } + @Override + public PersistedStateStats getStats() { + return null; + } + @Override public long getCurrentTerm() { return currentTerm; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PersistedStateStats.java b/server/src/main/java/org/opensearch/cluster/coordination/PersistedStateStats.java new file mode 100644 index 0000000000000..1dc20e564ade2 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/coordination/PersistedStateStats.java @@ -0,0 +1,126 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.coordination; + +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Persisted cluster state related stats. + * + * @opensearch.internal + */ +public class PersistedStateStats implements Writeable, ToXContentObject { + private String statsName; + private AtomicLong totalTimeInMillis = new AtomicLong(0); + private AtomicLong failedCount = new AtomicLong(0); + private AtomicLong successCount = new AtomicLong(0); + private Map extendedFields = new HashMap<>(); // keeping minimal extensibility + + public PersistedStateStats(String statsName) { + this.statsName = statsName; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(successCount.get()); + out.writeVLong(failedCount.get()); + out.writeVLong(totalTimeInMillis.get()); + if (extendedFields.size() > 0) { + out.writeBoolean(true); + out.writeVInt(extendedFields.size()); + for (Map.Entry extendedField : extendedFields.entrySet()) { + out.writeString(extendedField.getKey()); + out.writeVLong(extendedField.getValue().get()); + } + } else { + out.writeBoolean(false); + } + } + + public PersistedStateStats(StreamInput in) throws IOException { + this.successCount = new AtomicLong(in.readVLong()); + this.failedCount = new AtomicLong(in.readVLong()); + this.totalTimeInMillis = new AtomicLong(in.readVLong()); + if (in.readBoolean()) { + int extendedFieldsSize = in.readVInt(); + this.extendedFields = new HashMap<>(); + for (int fieldNumber = 0; fieldNumber < extendedFieldsSize; fieldNumber++) { + extendedFields.put(in.readString(), new AtomicLong(in.readVLong())); + } + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(statsName); + builder.field(Fields.SUCCESS_COUNT, getSuccessCount()); + builder.field(Fields.FAILED_COUNT, getFailedCount()); + builder.field(Fields.TOTAL_TIME_IN_MILLIS, getTotalTimeInMillis()); + if (extendedFields.size() > 0) { + for (Map.Entry extendedField : extendedFields.entrySet()) { + builder.field(extendedField.getKey(), extendedField.getValue().get()); + } + } + builder.endObject(); + return builder; + } + + public void stateFailed() { + failedCount.incrementAndGet(); + } + + public void stateSucceeded() { + successCount.incrementAndGet(); + } + + /** + * Expects user to send time taken in milliseconds. + * + * @param timeTakenInUpload time taken in uploading the cluster state to remote + */ + public void stateTook(long timeTakenInUpload) { + totalTimeInMillis.addAndGet(timeTakenInUpload); + } + + public long getTotalTimeInMillis() { + return totalTimeInMillis.get(); + } + + public long getFailedCount() { + return failedCount.get(); + } + + public long getSuccessCount() { + return successCount.get(); + } + + protected void addToExtendedFields(String extendedField, AtomicLong extendedFieldValue) { + this.extendedFields.put(extendedField, extendedFieldValue); + } + + /** + * Fields for parsing and toXContent + * + * @opensearch.internal + */ + static final class Fields { + static final String SUCCESS_COUNT = "success_count"; + static final String TOTAL_TIME_IN_MILLIS = "total_time_in_millis"; + static final String FAILED_COUNT = "failed_count"; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterStateStats.java b/server/src/main/java/org/opensearch/cluster/service/ClusterStateStats.java new file mode 100644 index 0000000000000..96683ce720d0b --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterStateStats.java @@ -0,0 +1,120 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.service; + +import org.opensearch.cluster.coordination.PersistedStateStats; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Cluster state related stats. + * + * @opensearch.internal + */ +public class ClusterStateStats implements Writeable, ToXContentObject { + + private AtomicLong updateSuccess = new AtomicLong(0); + private AtomicLong updateTotalTimeInMillis = new AtomicLong(0); + private AtomicLong updateFailed = new AtomicLong(0); + private List persistenceStats = new ArrayList<>(); + + public ClusterStateStats() {} + + public long getUpdateSuccess() { + return updateSuccess.get(); + } + + public long getUpdateTotalTimeInMillis() { + return updateTotalTimeInMillis.get(); + } + + public long getUpdateFailed() { + return updateFailed.get(); + } + + public List getPersistenceStats() { + return persistenceStats; + } + + public void stateUpdated() { + updateSuccess.incrementAndGet(); + } + + public void stateUpdateFailed() { + updateFailed.incrementAndGet(); + } + + public void stateUpdateTook(long stateUpdateTime) { + updateTotalTimeInMillis.addAndGet(stateUpdateTime); + } + + public ClusterStateStats setPersistenceStats(List persistenceStats) { + this.persistenceStats = persistenceStats; + return this; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(updateSuccess.get()); + out.writeVLong(updateTotalTimeInMillis.get()); + out.writeVLong(updateFailed.get()); + out.writeVInt(persistenceStats.size()); + for (PersistedStateStats stats : persistenceStats) { + stats.writeTo(out); + } + } + + public ClusterStateStats(StreamInput in) throws IOException { + this.updateSuccess = new AtomicLong(in.readVLong()); + this.updateTotalTimeInMillis = new AtomicLong(in.readVLong()); + this.updateFailed = new AtomicLong(in.readVLong()); + int persistedStatsSize = in.readVInt(); + this.persistenceStats = new ArrayList<>(); + for (int statsNumber = 0; statsNumber < persistedStatsSize; statsNumber++) { + PersistedStateStats stats = new PersistedStateStats(in); + this.persistenceStats.add(stats); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(Fields.CLUSTER_STATE_STATS); + builder.startObject(Fields.OVERALL); + builder.field(Fields.UPDATE_COUNT, getUpdateSuccess()); + builder.field(Fields.TOTAL_TIME_IN_MILLIS, getUpdateTotalTimeInMillis()); + builder.field(Fields.FAILED_COUNT, getUpdateFailed()); + builder.endObject(); + for (PersistedStateStats stats : persistenceStats) { + stats.toXContent(builder, params); + } + builder.endObject(); + return builder; + } + + /** + * Fields for parsing and toXContent + * + * @opensearch.internal + */ + static final class Fields { + static final String CLUSTER_STATE_STATS = "cluster_state_stats"; + static final String OVERALL = "overall"; + static final String UPDATE_COUNT = "update_count"; + static final String TOTAL_TIME_IN_MILLIS = "total_time_in_millis"; + static final String FAILED_COUNT = "failed_count"; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/service/MasterService.java b/server/src/main/java/org/opensearch/cluster/service/MasterService.java index 563b69dfd0e2a..07c3f93ae6486 100644 --- a/server/src/main/java/org/opensearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/MasterService.java @@ -112,7 +112,9 @@ public class MasterService extends AbstractLifecycleComponent { static final String CLUSTER_MANAGER_UPDATE_THREAD_NAME = "clusterManagerService#updateTask"; - /** @deprecated As of 2.2, because supporting inclusive language, replaced by {@link #CLUSTER_MANAGER_UPDATE_THREAD_NAME} */ + /** + * @deprecated As of 2.2, because supporting inclusive language, replaced by {@link #CLUSTER_MANAGER_UPDATE_THREAD_NAME} + */ @Deprecated static final String MASTER_UPDATE_THREAD_NAME = "masterService#updateTask"; @@ -130,6 +132,7 @@ public class MasterService extends AbstractLifecycleComponent { private volatile Batcher taskBatcher; protected final ClusterManagerTaskThrottler clusterManagerTaskThrottler; private final ClusterManagerThrottlingStats throttlingStats; + private final ClusterStateStats stateStats; public MasterService(Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) { this.nodeName = Objects.requireNonNull(Node.NODE_NAME_SETTING.get(settings)); @@ -147,6 +150,7 @@ public MasterService(Settings settings, ClusterSettings clusterSettings, ThreadP this::getMinNodeVersion, throttlingStats ); + this.stateStats = new ClusterStateStats(); this.threadPool = threadPool; } @@ -339,7 +343,7 @@ private TimeValue getTimeSince(long startTimeNanos) { return TimeValue.timeValueMillis(TimeValue.nsecToMSec(threadPool.preciseRelativeTimeInNanos() - startTimeNanos)); } - protected void publish(ClusterChangedEvent clusterChangedEvent, TaskOutputs taskOutputs, long startTimeMillis) { + protected void publish(ClusterChangedEvent clusterChangedEvent, TaskOutputs taskOutputs, long startTimeNanos) { final PlainActionFuture fut = new PlainActionFuture() { @Override protected boolean blockingAllowed() { @@ -352,8 +356,12 @@ protected boolean blockingAllowed() { try { FutureUtils.get(fut); onPublicationSuccess(clusterChangedEvent, taskOutputs); + final long durationMillis = getTimeSince(startTimeNanos).millis(); + stateStats.stateUpdateTook(durationMillis); + stateStats.stateUpdated(); } catch (Exception e) { - onPublicationFailed(clusterChangedEvent, taskOutputs, startTimeMillis, e); + stateStats.stateUpdateFailed(); + onPublicationFailed(clusterChangedEvent, taskOutputs, startTimeNanos, e); } } @@ -464,7 +472,6 @@ public Builder incrementVersion(ClusterState clusterState) { * @param source the source of the cluster state update task * @param updateTask the full context for the cluster state update * task - * */ public & ClusterStateTaskListener> void submitStateUpdateTask( String source, @@ -490,7 +497,6 @@ public & Cluster * @param listener callback after the cluster state update task * completes * @param the type of the cluster state update task state - * */ public void submitStateUpdateTask( String source, @@ -947,7 +953,7 @@ void onNoLongerClusterManager() { /** * Functionality for register task key to cluster manager node. * - * @param taskKey - task key of task + * @param taskKey - task key of task * @param throttlingEnabled - throttling is enabled for task or not i.e does data node perform retries on it or not * @return throttling task key which needs to be passed while submitting task to cluster manager */ @@ -966,7 +972,6 @@ public ClusterManagerTaskThrottler.ThrottlingKey registerClusterManagerTask(Stri * that share the same executor will be executed * batches on this executor * @param the type of the cluster state update task state - * */ public void submitStateUpdateTasks( final String source, @@ -996,4 +1001,8 @@ public void submitStateUpdateTasks( } } + public ClusterStateStats getClusterStateStats() { + return stateStats; + } + } diff --git a/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java b/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java index 665ecf77d7aa7..ea93ccd09ed39 100644 --- a/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java +++ b/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java @@ -32,8 +32,10 @@ package org.opensearch.discovery; +import org.opensearch.Version; import org.opensearch.cluster.coordination.PendingClusterStateStats; import org.opensearch.cluster.coordination.PublishClusterStateStats; +import org.opensearch.cluster.service.ClusterStateStats; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; @@ -51,21 +53,31 @@ public class DiscoveryStats implements Writeable, ToXContentFragment { private final PendingClusterStateStats queueStats; private final PublishClusterStateStats publishStats; + private final ClusterStateStats clusterStateStats; - public DiscoveryStats(PendingClusterStateStats queueStats, PublishClusterStateStats publishStats) { + public DiscoveryStats(PendingClusterStateStats queueStats, PublishClusterStateStats publishStats, ClusterStateStats clusterStateStats) { this.queueStats = queueStats; this.publishStats = publishStats; + this.clusterStateStats = clusterStateStats; } public DiscoveryStats(StreamInput in) throws IOException { queueStats = in.readOptionalWriteable(PendingClusterStateStats::new); publishStats = in.readOptionalWriteable(PublishClusterStateStats::new); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + clusterStateStats = in.readOptionalWriteable(ClusterStateStats::new); + } else { + clusterStateStats = null; + } } @Override public void writeTo(StreamOutput out) throws IOException { out.writeOptionalWriteable(queueStats); out.writeOptionalWriteable(publishStats); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeOptionalWriteable(clusterStateStats); + } } @Override @@ -77,6 +89,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (publishStats != null) { publishStats.toXContent(builder, params); } + if (clusterStateStats != null) { + clusterStateStats.toXContent(builder, params); + } builder.endObject(); return builder; } @@ -92,4 +107,8 @@ public PendingClusterStateStats getQueueStats() { public PublishClusterStateStats getPublishStats() { return publishStats; } + + public ClusterStateStats getClusterStateStats() { + return clusterStateStats; + } } diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index f855449c708d2..350a361a49a62 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -47,6 +47,7 @@ import org.opensearch.cluster.coordination.InMemoryPersistedState; import org.opensearch.cluster.coordination.PersistedStateRegistry; import org.opensearch.cluster.coordination.PersistedStateRegistry.PersistedStateType; +import org.opensearch.cluster.coordination.PersistedStateStats; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.IndexTemplateMetadata; import org.opensearch.cluster.metadata.Manifest; @@ -615,6 +616,12 @@ public void setLastAcceptedState(ClusterState clusterState) { lastAcceptedState = clusterState; } + @Override + public PersistedStateStats getStats() { + // Note: These stats are not published yet, will come in future + return null; + } + private PersistedClusterStateService.Writer getWriterSafe() { final PersistedClusterStateService.Writer writer = persistenceWriter.get(); if (writer == null) { @@ -717,10 +724,16 @@ assert verifyManifestAndClusterState(lastAcceptedManifest, lastAcceptedState) == lastAcceptedManifest = manifest; lastAcceptedState = clusterState; } catch (Exception e) { + remoteClusterStateService.writeMetadataFailed(); handleExceptionOnWrite(e); } } + @Override + public PersistedStateStats getStats() { + return remoteClusterStateService.getStats(); + } + private boolean verifyManifestAndClusterState(ClusterMetadataManifest manifest, ClusterState clusterState) { assert manifest != null : "ClusterMetadataManifest is null"; assert clusterState != null : "ClusterState is null"; diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 025ad075d83b6..329ebd0dcd2b8 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -159,7 +159,7 @@ public class RemoteClusterStateService implements Closeable { private volatile TimeValue globalMetadataUploadTimeout; private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); - + private final RemotePersistenceStats remoteStateStats; public static final int INDEX_METADATA_CURRENT_CODEC_VERSION = 1; public static final int MANIFEST_CURRENT_CODEC_VERSION = ClusterMetadataManifest.CODEC_V1; public static final int GLOBAL_METADATA_CURRENT_CODEC_VERSION = 1; @@ -193,6 +193,7 @@ public RemoteClusterStateService( clusterSettings.addSettingsUpdateConsumer(SLOW_WRITE_LOGGING_THRESHOLD, this::setSlowWriteLoggingThreshold); clusterSettings.addSettingsUpdateConsumer(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, this::setIndexMetadataUploadTimeout); clusterSettings.addSettingsUpdateConsumer(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, this::setGlobalMetadataUploadTimeout); + this.remoteStateStats = new RemotePersistenceStats(); } private BlobStoreTransferService getBlobStoreTransferService() { @@ -233,6 +234,8 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri false ); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); + remoteStateStats.stateSucceeded(); + remoteStateStats.stateTook(durationMillis); if (durationMillis >= slowWriteLoggingThreshold.getMillis()) { logger.warn( "writing cluster state took [{}ms] which is above the warn threshold of [{}]; " + "wrote full state with [{}] indices", @@ -334,6 +337,8 @@ public ClusterMetadataManifest writeIncrementalMetadata( deleteStaleClusterMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), RETAINED_MANIFESTS); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); + remoteStateStats.stateSucceeded(); + remoteStateStats.stateTook(durationMillis); if (durationMillis >= slowWriteLoggingThreshold.getMillis()) { logger.warn( "writing cluster state took [{}ms] which is above the warn threshold of [{}]; " @@ -1059,6 +1064,10 @@ public static String encodeString(String content) { return Base64.getUrlEncoder().withoutPadding().encodeToString(content.getBytes(StandardCharsets.UTF_8)); } + public void writeMetadataFailed() { + getStats().stateFailed(); + } + /** * Exception for IndexMetadata transfer failures to remote */ @@ -1093,7 +1102,7 @@ public GlobalMetadataTransferException(String errorDesc, Throwable cause) { * @param clusterName name of the cluster * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged */ - private void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { + void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { clusterUUIDs.forEach(clusterUUID -> { getBlobStoreTransferService().deleteAsync( ThreadPool.Names.REMOTE_PURGE, @@ -1113,6 +1122,7 @@ public void onFailure(Exception e) { ), e ); + remoteStateStats.cleanUpAttemptFailed(); } } ); @@ -1228,8 +1238,10 @@ private void deleteClusterMetadata( logger.error("Error while fetching Remote Cluster Metadata manifests", e); } catch (IOException e) { logger.error("Error while deleting stale Remote Cluster Metadata files", e); + remoteStateStats.cleanUpAttemptFailed(); } catch (Exception e) { logger.error("Unexpected error while deleting stale Remote Cluster Metadata files", e); + remoteStateStats.cleanUpAttemptFailed(); } } @@ -1260,4 +1272,8 @@ public void deleteStaleClusterUUIDs(ClusterState clusterState, ClusterMetadataMa deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote)); }); } + + public RemotePersistenceStats getStats() { + return remoteStateStats; + } } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java new file mode 100644 index 0000000000000..f2330846fa23e --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.cluster.coordination.PersistedStateStats; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Remote state related extended stats. + * + * @opensearch.internal + */ +public class RemotePersistenceStats extends PersistedStateStats { + static final String CLEANUP_ATTEMPT_FAILED_COUNT = "cleanup_attempt_failed_count"; + static final String REMOTE_UPLOAD = "remote_upload"; + private AtomicLong cleanupAttemptFailedCount = new AtomicLong(0); + + public RemotePersistenceStats() { + super(REMOTE_UPLOAD); + addToExtendedFields(CLEANUP_ATTEMPT_FAILED_COUNT, cleanupAttemptFailedCount); + } + + public void cleanUpAttemptFailed() { + cleanupAttemptFailedCount.incrementAndGet(); + } + + public long getCleanupAttemptFailedCount() { + return cleanupAttemptFailedCount.get(); + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index ebdd012006fb2..3050d1674a95b 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -36,10 +36,12 @@ import org.opensearch.action.admin.indices.stats.CommonStatsFlags; import org.opensearch.action.search.SearchRequestStats; import org.opensearch.cluster.coordination.PendingClusterStateStats; +import org.opensearch.cluster.coordination.PersistedStateStats; import org.opensearch.cluster.coordination.PublishClusterStateStats; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.WeightedRoutingStats; import org.opensearch.cluster.service.ClusterManagerThrottlingStats; +import org.opensearch.cluster.service.ClusterStateStats; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.metrics.OperationStats; import org.opensearch.core.common.io.stream.StreamInput; @@ -47,6 +49,7 @@ import org.opensearch.core.indices.breaker.AllCircuitBreakerStats; import org.opensearch.core.indices.breaker.CircuitBreakerStats; import org.opensearch.discovery.DiscoveryStats; +import org.opensearch.gateway.remote.RemotePersistenceStats; import org.opensearch.http.HttpStats; import org.opensearch.index.ReplicationStats; import org.opensearch.index.SegmentReplicationRejectionStats; @@ -72,6 +75,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -349,6 +353,25 @@ public void testSerialization() throws IOException { assertEquals(queueStats.getTotal(), deserializedDiscoveryStats.getQueueStats().getTotal()); assertEquals(queueStats.getPending(), deserializedDiscoveryStats.getQueueStats().getPending()); } + ClusterStateStats stateStats = discoveryStats.getClusterStateStats(); + if (stateStats == null) { + assertNull(deserializedDiscoveryStats.getClusterStateStats()); + } else { + assertEquals(stateStats.getUpdateFailed(), deserializedDiscoveryStats.getClusterStateStats().getUpdateFailed()); + assertEquals(stateStats.getUpdateSuccess(), deserializedDiscoveryStats.getClusterStateStats().getUpdateSuccess()); + assertEquals( + stateStats.getUpdateTotalTimeInMillis(), + deserializedDiscoveryStats.getClusterStateStats().getUpdateTotalTimeInMillis() + ); + assertEquals(1, deserializedDiscoveryStats.getClusterStateStats().getPersistenceStats().size()); + PersistedStateStats deserializedRemoteStateStats = deserializedDiscoveryStats.getClusterStateStats() + .getPersistenceStats() + .get(0); + PersistedStateStats remoteStateStats = stateStats.getPersistenceStats().get(0); + assertEquals(remoteStateStats.getFailedCount(), deserializedRemoteStateStats.getFailedCount()); + assertEquals(remoteStateStats.getSuccessCount(), deserializedRemoteStateStats.getSuccessCount()); + assertEquals(remoteStateStats.getTotalTimeInMillis(), deserializedRemoteStateStats.getTotalTimeInMillis()); + } } IngestStats ingestStats = nodeStats.getIngestStats(); IngestStats deserializedIngestStats = deserializedNodeStats.getIngestStats(); @@ -725,12 +748,16 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { ScriptStats scriptStats = frequently() ? new ScriptStats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) : null; + ClusterStateStats stateStats = new ClusterStateStats(); + RemotePersistenceStats remoteStateStats = new RemotePersistenceStats(); + stateStats.setPersistenceStats(Arrays.asList(remoteStateStats)); DiscoveryStats discoveryStats = frequently() ? new DiscoveryStats( randomBoolean() ? new PendingClusterStateStats(randomInt(), randomInt(), randomInt()) : null, randomBoolean() ? new PublishClusterStateStats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) - : null + : null, + randomBoolean() ? stateStats : null ) : null; IngestStats ingestStats = null; diff --git a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java index 9cdbe04e0a0e4..4c0ca826f5dcc 100644 --- a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java @@ -691,6 +691,9 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS submittedTasksPerThread.get(entry.getKey()).get() ); } + // verify stats values after state is published + assertEquals(1, clusterManagerService.getClusterStateStats().getUpdateSuccess()); + assertEquals(0, clusterManagerService.getClusterStateStats().getUpdateFailed()); } } diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index 1d5c2a0f01b5c..fd113ed4313d7 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -68,6 +68,7 @@ import org.opensearch.gateway.PersistedClusterStateService.Writer; import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.gateway.remote.RemotePersistenceStats; import org.opensearch.index.recovery.RemoteStoreRestoreService; import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult; import org.opensearch.node.Node; @@ -104,6 +105,7 @@ import static org.hamcrest.Matchers.nullValue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.Mockito.doCallRealMethod; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -779,6 +781,26 @@ public void testRemotePersistedStateExceptionOnFullStateUpload() throws IOExcept assertThrows(OpenSearchException.class, () -> remotePersistedState.setLastAcceptedState(clusterState)); } + public void testRemotePersistedStateFailureStats() throws IOException { + RemotePersistenceStats remoteStateStats = new RemotePersistenceStats(); + final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); + final String previousClusterUUID = "prev-cluster-uuid"; + Mockito.doThrow(IOException.class).when(remoteClusterStateService).writeFullMetadata(Mockito.any(), Mockito.any()); + when(remoteClusterStateService.getStats()).thenReturn(remoteStateStats); + doCallRealMethod().when(remoteClusterStateService).writeMetadataFailed(); + CoordinationState.PersistedState remotePersistedState = new RemotePersistedState(remoteClusterStateService, previousClusterUUID); + + final long clusterTerm = randomNonNegativeLong(); + final ClusterState clusterState = createClusterState( + randomNonNegativeLong(), + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + ); + + assertThrows(OpenSearchException.class, () -> remotePersistedState.setLastAcceptedState(clusterState)); + assertEquals(1, remoteClusterStateService.getStats().getFailedCount()); + assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); + } + public void testGatewayForRemoteState() throws IOException { MockGatewayMetaState gateway = null; try { diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 173e15b8eca37..5a43864f40c0c 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -324,6 +324,7 @@ public void testWriteFullMetadataInParallelFailureForIndexMetadata() throws IOEx RemoteClusterStateService.IndexMetadataTransferException.class, () -> remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10)) ); + assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); } public void testFailWriteIncrementalMetadataNonClusterManagerNode() throws IOException { @@ -331,6 +332,7 @@ public void testFailWriteIncrementalMetadataNonClusterManagerNode() throws IOExc remoteClusterStateService.start(); final ClusterMetadataManifest manifest = remoteClusterStateService.writeIncrementalMetadata(clusterState, clusterState, null); Assert.assertThat(manifest, nullValue()); + assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); } public void testFailWriteIncrementalMetadataWhenTermChanged() { @@ -991,6 +993,38 @@ public void testDeleteStaleClusterUUIDs() throws IOException { } } + public void testRemoteStateStats() throws IOException { + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + mockBlobStoreObjects(); + remoteClusterStateService.start(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid"); + + assertTrue(remoteClusterStateService.getStats() != null); + assertEquals(1, remoteClusterStateService.getStats().getSuccessCount()); + assertEquals(0, remoteClusterStateService.getStats().getCleanupAttemptFailedCount()); + assertEquals(0, remoteClusterStateService.getStats().getFailedCount()); + } + + public void testRemoteStateCleanupFailureStats() throws IOException { + BlobContainer blobContainer = mock(BlobContainer.class); + doThrow(IOException.class).when(blobContainer).delete(); + when(blobStore.blobContainer(any())).thenReturn(blobContainer); + BlobPath blobPath = new BlobPath().add("random-path"); + when((blobStoreRepository.basePath())).thenReturn(blobPath); + remoteClusterStateService.start(); + remoteClusterStateService.deleteStaleUUIDsClusterMetadata("cluster1", Arrays.asList("cluster-uuid1")); + try { + assertBusy(() -> { + // wait for stats to get updated + assertTrue(remoteClusterStateService.getStats() != null); + assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); + assertEquals(1, remoteClusterStateService.getStats().getCleanupAttemptFailedCount()); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + public void testFileNames() { final Index index = new Index("test-index", "index-uuid"); final Settings idxSettings = Settings.builder() diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index d24cc24d28579..28d7706fb1493 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -1016,6 +1016,11 @@ public void setLastAcceptedState(ClusterState clusterState) { delegate.setLastAcceptedState(clusterState); } + @Override + public PersistedStateStats getStats() { + return null; + } + @Override public void close() { assertTrue(openPersistedStates.remove(this)); From 6f36752d9e84e95ce2280347cc26b0c9138b2d57 Mon Sep 17 00:00:00 2001 From: Sachin Kale Date: Tue, 24 Oct 2023 14:31:01 +0530 Subject: [PATCH 32/45] Sync translog to remote on primary activate (#10839) --------- Signed-off-by: Sachin Kale Co-authored-by: Sachin Kale --- .../remotestore/RemoteRestoreSnapshotIT.java | 94 +++++++++++++++++++ .../remotestore/RemoteStoreStatsIT.java | 8 +- .../opensearch/index/shard/IndexShard.java | 19 +++- .../index/shard/IndexShardTests.java | 1 + 4 files changed, 116 insertions(+), 6 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java index 865b2d13f189e..9e0b2a66467de 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java @@ -11,6 +11,7 @@ import org.opensearch.action.DocWriteResponse; import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; import org.opensearch.action.admin.indices.get.GetIndexRequest; import org.opensearch.action.admin.indices.get.GetIndexResponse; import org.opensearch.action.delete.DeleteResponse; @@ -20,8 +21,13 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.io.PathUtils; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.core.index.Index; import org.opensearch.core.rest.RestStatus; +import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.snapshots.AbstractSnapshotIntegTestCase; import org.opensearch.snapshots.SnapshotInfo; @@ -32,11 +38,15 @@ import org.junit.Before; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Optional; import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; @@ -345,6 +355,90 @@ public void testRestoreInSameRemoteStoreEnabledIndex() throws IOException { assertDocsPresentInIndex(client, indexName1, numDocsInIndex1 + 4); } + public void testRemoteRestoreIndexRestoredFromSnapshot() throws IOException, ExecutionException, InterruptedException { + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNodes(2); + + String indexName1 = "testindex1"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String snapshotName1 = "test-restore-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + createRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, true)); + + Settings indexSettings = getIndexSettings(1, 0).build(); + createIndex(indexName1, indexSettings); + + final int numDocsInIndex1 = randomIntBetween(20, 30); + indexDocuments(client(), indexName1, numDocsInIndex1); + flushAndRefresh(indexName1); + ensureGreen(indexName1); + + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo1 = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(Arrays.asList(indexName1))); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); + + assertAcked(client().admin().indices().delete(new DeleteIndexRequest(indexName1)).get()); + assertFalse(indexExists(indexName1)); + + RestoreSnapshotResponse restoreSnapshotResponse1 = client().admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(false) + .setIndices(indexName1) + .get(); + + assertEquals(restoreSnapshotResponse1.status(), RestStatus.ACCEPTED); + ensureGreen(indexName1); + assertDocsPresentInIndex(client(), indexName1, numDocsInIndex1); + + // Make sure remote translog is empty + String indexUUID = client().admin() + .indices() + .prepareGetSettings(indexName1) + .get() + .getSetting(indexName1, IndexMetadata.SETTING_INDEX_UUID); + + Path remoteTranslogMetadataPath = Path.of(String.valueOf(remoteRepoPath), indexUUID, "/0/translog/metadata"); + Path remoteTranslogDataPath = Path.of(String.valueOf(remoteRepoPath), indexUUID, "/0/translog/data"); + + try ( + Stream translogMetadata = Files.list(remoteTranslogMetadataPath); + Stream translogData = Files.list(remoteTranslogDataPath) + ) { + assertTrue(translogData.count() > 0); + assertTrue(translogMetadata.count() > 0); + } + + // Clear the local data before stopping the node. This will make sure that remote translog is empty. + IndexShard indexShard = getIndexShard(primaryNodeName(indexName1), indexName1); + try (Stream files = Files.list(indexShard.shardPath().resolveTranslog())) { + IOUtils.deleteFilesIgnoringExceptions(files.collect(Collectors.toList())); + } + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNodeName(indexName1))); + + ensureRed(indexName1); + + client().admin() + .cluster() + .restoreRemoteStore(new RestoreRemoteStoreRequest().indices(indexName1).restoreAllShards(false), PlainActionFuture.newFuture()); + + ensureGreen(indexName1); + assertDocsPresentInIndex(client(), indexName1, numDocsInIndex1); + } + + protected IndexShard getIndexShard(String node, String indexName) { + final Index index = resolveIndex(indexName); + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, node); + IndexService indexService = indicesService.indexService(index); + assertNotNull(indexService); + final Optional shardId = indexService.shardIds().stream().findFirst(); + return shardId.map(indexService::getShard).orElse(null); + } + public void testRestoreShallowCopySnapshotWithDifferentRepo() throws IOException { String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(); String primary = internalCluster().startDataOnlyNode(); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java index 5e91176ed0473..b1dbb0a900bc7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java @@ -581,21 +581,23 @@ public void testNonZeroPrimaryStatsOnNewlyCreatedIndexWithZeroDocs() throws Exce .getRemoteStoreStats(); Arrays.stream(remoteStoreStats).forEach(statObject -> { RemoteSegmentTransferTracker.Stats segmentStats = statObject.getSegmentStats(); + RemoteTranslogTransferTracker.Stats translogStats = statObject.getTranslogStats(); if (statObject.getShardRouting().primary()) { assertTrue( segmentStats.totalUploadsSucceeded == 1 && segmentStats.totalUploadsStarted == segmentStats.totalUploadsSucceeded && segmentStats.totalUploadsFailed == 0 ); + // On primary shard creation, we upload to remote translog post primary mode activation. + // This changes upload stats to non-zero for primary shard. + assertNonZeroTranslogUploadStatsNoFailures(translogStats); } else { assertTrue( segmentStats.directoryFileTransferTrackerStats.transferredBytesStarted == 0 && segmentStats.directoryFileTransferTrackerStats.transferredBytesSucceeded == 0 ); + assertZeroTranslogUploadStats(translogStats); } - - RemoteTranslogTransferTracker.Stats translogStats = statObject.getTranslogStats(); - assertZeroTranslogUploadStats(translogStats); assertZeroTranslogDownloadStats(translogStats); }); }, 5, TimeUnit.SECONDS); diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index fb4e9056153aa..3c348035ebbdd 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -640,7 +640,7 @@ public void updateShardState( if (currentRouting.initializing() && currentRouting.isRelocationTarget() == false && newRouting.active()) { // the cluster-manager started a recovering primary, activate primary mode. replicationTracker.activatePrimaryMode(getLocalCheckpoint()); - ensurePeerRecoveryRetentionLeasesExist(); + postActivatePrimaryMode(); } } else { assert currentRouting.primary() == false : "term is only increased as part of primary promotion"; @@ -711,8 +711,7 @@ public void updateShardState( // are brought up to date. checkpointPublisher.publish(this, getLatestReplicationCheckpoint()); } - - ensurePeerRecoveryRetentionLeasesExist(); + postActivatePrimaryMode(); /* * If this shard was serving as a replica shard when another shard was promoted to primary then * its Lucene index was reset during the primary term transition. In particular, the Lucene index @@ -3393,6 +3392,20 @@ assert getLocalCheckpoint() == primaryContext.getCheckpointStates().get(routingE synchronized (mutex) { replicationTracker.activateWithPrimaryContext(primaryContext); // make changes to primaryMode flag only under mutex } + postActivatePrimaryMode(); + } + + private void postActivatePrimaryMode() { + if (indexSettings.isRemoteStoreEnabled()) { + // We make sure to upload translog (even if it does not contain any operations) to remote translog. + // This helps to get a consistent state in remote store where both remote segment store and remote + // translog contains data. + try { + getEngine().translogManager().syncTranslog(); + } catch (IOException e) { + logger.error("Failed to sync translog to remote from new primary", e); + } + } ensurePeerRecoveryRetentionLeasesExist(); } diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index 9ef9bec01cb38..fa3cf7676f55c 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -2745,6 +2745,7 @@ public void testRelocatedForRemoteTranslogBackedIndexWithAsyncDurability() throw AllocationId.newRelocation(routing.allocationId()) ); IndexShardTestCase.updateRoutingEntry(indexShard, routing); + indexDoc(indexShard, "_doc", "0"); assertTrue(indexShard.isSyncNeeded()); try { indexShard.relocated(routing.getTargetRelocatingShard().allocationId().getId(), primaryContext -> {}, () -> {}); From 5bd413c588f48589c6fd6c4de4e87550271aecf8 Mon Sep 17 00:00:00 2001 From: Peter Nied Date: Tue, 24 Oct 2023 07:38:18 -0400 Subject: [PATCH 33/45] GHA to verify checklist items completion in PR descriptions (#10800) Signed-off-by: Peter Nied --- .github/workflows/pull-request-checks.yml | 28 +++++++++++++++++++++++ CHANGELOG.md | 1 + 2 files changed, 29 insertions(+) create mode 100644 .github/workflows/pull-request-checks.yml diff --git a/.github/workflows/pull-request-checks.yml b/.github/workflows/pull-request-checks.yml new file mode 100644 index 0000000000000..11998e36c2dbb --- /dev/null +++ b/.github/workflows/pull-request-checks.yml @@ -0,0 +1,28 @@ +name: Pull Request Checks + +on: + pull_request: + types: + [ + opened, + edited, + review_requested, + synchronize, + reopened, + ready_for_review, + ] + +jobs: + verify-description-checklist: + name: Verify Description Checklist + runs-on: ubuntu-latest + steps: + - uses: peternied/check-pull-request-description-checklist@v1 + with: + checklist-items: | + New functionality includes testing. + All tests pass + New functionality has been documented. + New functionality has javadoc added + Commits are signed per the DCO using --signoff + Commit changes are listed out in CHANGELOG.md file (See: [Changelog](../blob/main/CONTRIBUTING.md#changelog)) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c7e3ee151d64..b40878066960a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote cluster state] Download functionality of global metadata from remote store ([#10535](https://github.com/opensearch-project/OpenSearch/pull/10535)) - [Remote cluster state] Restore global metadata from remote store when local state is lost after quorum loss ([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) - [AdmissionControl] Added changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting ([#9286](https://github.com/opensearch-project/OpenSearch/pull/9286)) +- GHA to verify checklist items completion in PR descriptions ([#10800](https://github.com/opensearch-project/OpenSearch/pull/10800)) ### Dependencies - Bump `log4j-core` from 2.18.0 to 2.19.0 From 91ac0846dbeb7379ae0772bf9f144cd628e9deac Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Wed, 25 Oct 2023 14:01:27 +0530 Subject: [PATCH 34/45] link previous cluster uuid to current cluster uuid even if current cluster uuid is not committed (#10832) * link previous cluster uuid to current cluster uuid even if current cluster uuid is not committed Signed-off-by: bansvaru --- .../opensearch/gateway/GatewayMetaState.java | 29 +++++++------- .../GatewayMetaStatePersistedStateTests.java | 39 +++++++++++++++++++ 2 files changed, 52 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index 350a361a49a62..c3056276706a0 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -695,24 +695,21 @@ public void setLastAcceptedState(ClusterState clusterState) { try { final ClusterMetadataManifest manifest; if (shouldWriteFullClusterState(clusterState)) { - if (clusterState.metadata().clusterUUIDCommitted() == true) { - final Optional latestManifest = remoteClusterStateService.getLatestClusterMetadataManifest( - clusterState.getClusterName().value(), + final Optional latestManifest = remoteClusterStateService.getLatestClusterMetadataManifest( + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID() + ); + if (latestManifest.isPresent()) { + // The previous UUID should not change for the current UUID. So fetching the latest manifest + // from remote store and getting the previous UUID. + previousClusterUUID = latestManifest.get().getPreviousClusterUUID(); + } else { + // When the user starts the cluster with remote state disabled but later enables the remote state, + // there will not be any manifest for the current cluster UUID. + logger.error( + "Latest manifest is not present in remote store for cluster UUID: {}", clusterState.metadata().clusterUUID() ); - if (latestManifest.isPresent()) { - // The previous UUID should not change for the current UUID. So fetching the latest manifest - // from remote store and getting the previous UUID. - previousClusterUUID = latestManifest.get().getPreviousClusterUUID(); - } else { - // When the user starts the cluster with remote state disabled but later enables the remote state, - // there will not be any manifest for the current cluster UUID. - logger.error( - "Latest manifest is not present in remote store for cluster UUID: {}", - clusterState.metadata().clusterUUID() - ); - previousClusterUUID = ClusterState.UNKNOWN_UUID; - } } manifest = remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID); } else { diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index fd113ed4313d7..74bae7b5eb7cf 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -87,10 +87,12 @@ import java.util.Collections; import java.util.List; import java.util.Locale; +import java.util.Optional; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; +import org.mockito.ArgumentCaptor; import org.mockito.Mockito; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_INDEX_UUID; @@ -765,6 +767,43 @@ public void testRemotePersistedState() throws IOException { assertThat(remotePersistedState.getLastAcceptedState().metadata().clusterUUIDCommitted(), equalTo(true)); } + public void testRemotePersistedStateNotCommitted() throws IOException { + final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); + final String previousClusterUUID = "prev-cluster-uuid"; + final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder() + .previousClusterUUID(previousClusterUUID) + .clusterTerm(1L) + .stateVersion(5L) + .build(); + Mockito.when(remoteClusterStateService.getLatestClusterMetadataManifest(Mockito.any(), Mockito.any())) + .thenReturn(Optional.of(manifest)); + Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any())).thenReturn(manifest); + + Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())).thenReturn(manifest); + CoordinationState.PersistedState remotePersistedState = new RemotePersistedState( + remoteClusterStateService, + ClusterState.UNKNOWN_UUID + ); + + assertThat(remotePersistedState.getLastAcceptedState(), nullValue()); + assertThat(remotePersistedState.getCurrentTerm(), equalTo(0L)); + + final long clusterTerm = randomNonNegativeLong(); + ClusterState clusterState = createClusterState( + randomNonNegativeLong(), + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + ); + clusterState = ClusterState.builder(clusterState) + .metadata(Metadata.builder(clusterState.getMetadata()).clusterUUID(randomAlphaOfLength(10)).clusterUUIDCommitted(false).build()) + .build(); + + remotePersistedState.setLastAcceptedState(clusterState); + ArgumentCaptor previousClusterUUIDCaptor = ArgumentCaptor.forClass(String.class); + ArgumentCaptor clusterStateCaptor = ArgumentCaptor.forClass(ClusterState.class); + Mockito.verify(remoteClusterStateService).writeFullMetadata(clusterStateCaptor.capture(), previousClusterUUIDCaptor.capture()); + assertEquals(previousClusterUUID, previousClusterUUIDCaptor.getValue()); + } + public void testRemotePersistedStateExceptionOnFullStateUpload() throws IOException { final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); final String previousClusterUUID = "prev-cluster-uuid"; From b5299f13e0ca9a5f6979e8cb50137682e777b095 Mon Sep 17 00:00:00 2001 From: Sachin Kale Date: Wed, 25 Oct 2023 14:46:44 +0530 Subject: [PATCH 35/45] Delete corrupted file to re-download from remote store (#10891) --------- Signed-off-by: Sachin Kale Co-authored-by: Sachin Kale --- .../opensearch/index/shard/IndexShard.java | 5 +- .../index/shard/IndexShardTests.java | 52 +++++++++++++++++++ .../org/opensearch/test/CorruptionUtils.java | 2 +- 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 3c348035ebbdd..5b6257084e440 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -4962,7 +4962,8 @@ private String copySegmentFiles( return segmentNFile; } - private boolean localDirectoryContains(Directory localDirectory, String file, long checksum) { + // Visible for testing + boolean localDirectoryContains(Directory localDirectory, String file, long checksum) throws IOException { try (IndexInput indexInput = localDirectory.openInput(file, IOContext.DEFAULT)) { if (checksum == CodecUtil.retrieveChecksum(indexInput)) { return true; @@ -4981,6 +4982,8 @@ private boolean localDirectoryContains(Directory localDirectory, String file, lo logger.debug("File {} does not exist in local FS, downloading from remote store", file); } catch (IOException e) { logger.warn("Exception while reading checksum of file: {}, this can happen if file is corrupted", file); + // For any other exception on reading checksum, we delete the file to re-download again + localDirectory.deleteFile(file); } return false; } diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index fa3cf7676f55c..f5f8cd1dcfb3f 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -32,6 +32,7 @@ package org.opensearch.index.shard; import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexCommit; @@ -45,6 +46,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; import org.apache.lucene.tests.mockfile.ExtrasFS; import org.apache.lucene.tests.store.BaseDirectoryWrapper; import org.apache.lucene.util.BytesRef; @@ -91,6 +93,7 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.indices.breaker.NoneCircuitBreakerService; +import org.opensearch.core.util.FileSystemUtils; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentBuilder; @@ -163,11 +166,13 @@ import org.junit.Assert; import java.io.IOException; +import java.nio.channels.FileChannel; import java.nio.charset.Charset; import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.SimpleFileVisitor; +import java.nio.file.StandardOpenOption; import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.Arrays; @@ -4907,6 +4912,53 @@ public void testRecordsForceMerges() throws IOException { closeShards(shard); } + public void testLocalDirectoryContains() throws IOException { + IndexShard indexShard = newStartedShard(true); + int numDocs = between(1, 10); + for (int i = 0; i < numDocs; i++) { + indexDoc(indexShard, "_doc", Integer.toString(i)); + } + flushShard(indexShard); + indexShard.store().incRef(); + Directory localDirectory = indexShard.store().directory(); + Path shardPath = indexShard.shardPath().getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME); + Path tempDir = createTempDir(); + for (String file : localDirectory.listAll()) { + if (file.equals("write.lock") || file.startsWith("extra")) { + continue; + } + boolean corrupted = randomBoolean(); + long checksum = 0; + try (IndexInput indexInput = localDirectory.openInput(file, IOContext.DEFAULT)) { + checksum = CodecUtil.retrieveChecksum(indexInput); + } + if (corrupted) { + Files.copy(shardPath.resolve(file), tempDir.resolve(file)); + try (FileChannel raf = FileChannel.open(shardPath.resolve(file), StandardOpenOption.READ, StandardOpenOption.WRITE)) { + CorruptionUtils.corruptAt(shardPath.resolve(file), raf, (int) (raf.size() - 8)); + } + } + if (corrupted == false) { + assertTrue(indexShard.localDirectoryContains(localDirectory, file, checksum)); + } else { + assertFalse(indexShard.localDirectoryContains(localDirectory, file, checksum)); + assertFalse(Files.exists(shardPath.resolve(file))); + } + } + try (Stream files = Files.list(tempDir)) { + files.forEach(p -> { + try { + Files.copy(p, shardPath.resolve(p.getFileName())); + } catch (IOException e) { + // Ignore + } + }); + } + FileSystemUtils.deleteSubDirectories(tempDir); + indexShard.store().decRef(); + closeShards(indexShard); + } + private void populateSampleRemoteSegmentStats(RemoteSegmentTransferTracker tracker) { tracker.addUploadBytesStarted(30L); tracker.addUploadBytesSucceeded(10L); diff --git a/test/framework/src/main/java/org/opensearch/test/CorruptionUtils.java b/test/framework/src/main/java/org/opensearch/test/CorruptionUtils.java index 0dce5e78bf91f..67522bb618cf1 100644 --- a/test/framework/src/main/java/org/opensearch/test/CorruptionUtils.java +++ b/test/framework/src/main/java/org/opensearch/test/CorruptionUtils.java @@ -121,7 +121,7 @@ public static void corruptFile(Random random, Path... files) throws IOException } } - static void corruptAt(Path path, FileChannel channel, int position) throws IOException { + public static void corruptAt(Path path, FileChannel channel, int position) throws IOException { // read channel.position(position); long filePointer = channel.position(); From a890e518aea1a706249001c3420c1740bd1a06dd Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Wed, 25 Oct 2023 20:05:07 +0530 Subject: [PATCH 36/45] Fix exception handling for global metadata upload (#10889) Signed-off-by: Dhwanil Patel --- .../gateway/remote/RemoteClusterStateService.java | 8 ++++++-- .../gateway/remote/RemoteClusterStateServiceTests.java | 8 +++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 329ebd0dcd2b8..57b1b972e08c0 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -369,6 +369,8 @@ public ClusterMetadataManifest writeIncrementalMetadata( private String writeGlobalMetadata(ClusterState clusterState) throws IOException { AtomicReference result = new AtomicReference(); + AtomicReference exceptionReference = new AtomicReference(); + final BlobContainer globalMetadataContainer = globalMetadataContainer( clusterState.getClusterName().value(), clusterState.metadata().clusterUUID() @@ -381,7 +383,7 @@ private String writeGlobalMetadata(ClusterState clusterState) throws IOException LatchedActionListener completionListener = new LatchedActionListener<>(ActionListener.wrap(resp -> { logger.trace(String.format(Locale.ROOT, "GlobalMetadata uploaded successfully.")); result.set(globalMetadataContainer.path().buildAsString() + globalMetadataFilename); - }, ex -> { throw new GlobalMetadataTransferException(ex.getMessage(), ex); }), latch); + }, ex -> { exceptionReference.set(ex); }), latch); GLOBAL_METADATA_FORMAT.writeAsyncWithUrgentPriority( clusterState.metadata(), @@ -408,7 +410,9 @@ private String writeGlobalMetadata(ClusterState clusterState) throws IOException Thread.currentThread().interrupt(); throw exception; } - + if (exceptionReference.get() != null) { + throw new GlobalMetadataTransferException(exceptionReference.get().getMessage(), exceptionReference.get()); + } return result.get(); } diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 5a43864f40c0c..ca88653f529f6 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -294,7 +294,13 @@ public void testWriteFullMetadataFailureForGlobalMetadata() throws IOException { ArgumentCaptor> actionListenerArgumentCaptor = ArgumentCaptor.forClass(ActionListener.class); doAnswer((i) -> { - actionListenerArgumentCaptor.getValue().onFailure(new RuntimeException("Cannot upload to remote")); + // For async write action listener will be called from different thread, replicating same behaviour here. + new Thread(new Runnable() { + @Override + public void run() { + actionListenerArgumentCaptor.getValue().onFailure(new RuntimeException("Cannot upload to remote")); + } + }).start(); return null; }).when(container).asyncBlobUpload(any(WriteContext.class), actionListenerArgumentCaptor.capture()); From 3e64a7b92192a37546b92b811e04c4155dafbdfd Mon Sep 17 00:00:00 2001 From: Aman Khare <85096200+amkhar@users.noreply.github.com> Date: Wed, 25 Oct 2023 21:13:52 +0530 Subject: [PATCH 37/45] Change version to 2.12.0 for cluster state stats (#10915) Signed-off-by: Aman Khare Co-authored-by: Aman Khare --- .../main/java/org/opensearch/discovery/DiscoveryStats.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java b/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java index ea93ccd09ed39..fb341ac2ac569 100644 --- a/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java +++ b/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java @@ -64,7 +64,7 @@ public DiscoveryStats(PendingClusterStateStats queueStats, PublishClusterStateSt public DiscoveryStats(StreamInput in) throws IOException { queueStats = in.readOptionalWriteable(PendingClusterStateStats::new); publishStats = in.readOptionalWriteable(PublishClusterStateStats::new); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { clusterStateStats = in.readOptionalWriteable(ClusterStateStats::new); } else { clusterStateStats = null; @@ -75,7 +75,7 @@ public DiscoveryStats(StreamInput in) throws IOException { public void writeTo(StreamOutput out) throws IOException { out.writeOptionalWriteable(queueStats); out.writeOptionalWriteable(publishStats); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(clusterStateStats); } } From 6779633d76052585f5e20df5ac9388e14d75ffb2 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Wed, 25 Oct 2023 09:58:16 -0700 Subject: [PATCH 38/45] Fix flaky test testSendCorruptBytesToReplica (#10897) --- .../SegmentReplicationDisruptionIT.java | 167 ++++++++++++++++++ .../replication/SegmentReplicationIT.java | 136 -------------- 2 files changed, 167 insertions(+), 136 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationDisruptionIT.java diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationDisruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationDisruptionIT.java new file mode 100644 index 0000000000000..66b26b5d25cfe --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationDisruptionIT.java @@ -0,0 +1,167 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication; + +import org.apache.lucene.tests.util.LuceneTestCase; +import org.opensearch.action.admin.indices.recovery.RecoveryResponse; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.bytes.BytesArray; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.recovery.FileChunkRequest; +import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.transport.MockTransportService; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportService; +import org.junit.Before; + +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; + +/** + * These tests simulate corruption cases during replication. They are skipped on WindowsFS simulation where file renaming + * can fail with an access denied IOException because deletion is not permitted. + */ +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +@LuceneTestCase.SuppressFileSystems("WindowsFS") +public class SegmentReplicationDisruptionIT extends SegmentReplicationBaseIT { + @Before + private void setup() { + internalCluster().startClusterManagerOnlyNode(); + } + + public void testSendCorruptBytesToReplica() throws Exception { + final String primaryNode = internalCluster().startDataOnlyNode(); + createIndex( + INDEX_NAME, + Settings.builder() + .put(indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put("index.refresh_interval", -1) + .build() + ); + ensureYellow(INDEX_NAME); + final String replicaNode = internalCluster().startDataOnlyNode(); + ensureGreen(INDEX_NAME); + + MockTransportService primaryTransportService = ((MockTransportService) internalCluster().getInstance( + TransportService.class, + primaryNode + )); + CountDownLatch latch = new CountDownLatch(1); + AtomicBoolean failed = new AtomicBoolean(false); + primaryTransportService.addSendBehavior( + internalCluster().getInstance(TransportService.class, replicaNode), + (connection, requestId, action, request, options) -> { + if (action.equals(SegmentReplicationTargetService.Actions.FILE_CHUNK) && failed.getAndSet(true) == false) { + FileChunkRequest req = (FileChunkRequest) request; + TransportRequest corrupt = new FileChunkRequest( + req.recoveryId(), + ((FileChunkRequest) request).requestSeqNo(), + ((FileChunkRequest) request).shardId(), + ((FileChunkRequest) request).metadata(), + ((FileChunkRequest) request).position(), + new BytesArray("test"), + false, + 0, + 0L + ); + connection.sendRequest(requestId, action, corrupt, options); + latch.countDown(); + } else { + connection.sendRequest(requestId, action, request, options); + } + } + ); + for (int i = 0; i < 100; i++) { + client().prepareIndex(INDEX_NAME) + .setId(String.valueOf(i)) + .setSource(jsonBuilder().startObject().field("field", i).endObject()) + .get(); + } + final long originalRecoveryTime = getRecoveryStopTime(replicaNode); + assertNotEquals(originalRecoveryTime, 0); + refresh(INDEX_NAME); + latch.await(); + assertTrue(failed.get()); + waitForNewPeerRecovery(replicaNode, originalRecoveryTime); + // reset checkIndex to ensure our original shard doesn't throw + resetCheckIndexStatus(); + waitForSearchableDocs(100, primaryNode, replicaNode); + } + + public void testWipeSegmentBetweenSyncs() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + final String primaryNode = internalCluster().startDataOnlyNode(); + createIndex( + INDEX_NAME, + Settings.builder() + .put(indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put("index.refresh_interval", -1) + .build() + ); + ensureYellow(INDEX_NAME); + final String replicaNode = internalCluster().startDataOnlyNode(); + ensureGreen(INDEX_NAME); + + for (int i = 0; i < 10; i++) { + client().prepareIndex(INDEX_NAME) + .setId(String.valueOf(i)) + .setSource(jsonBuilder().startObject().field("field", i).endObject()) + .get(); + } + refresh(INDEX_NAME); + ensureGreen(INDEX_NAME); + final long originalRecoveryTime = getRecoveryStopTime(replicaNode); + + final IndexShard indexShard = getIndexShard(replicaNode, INDEX_NAME); + waitForSearchableDocs(INDEX_NAME, 10, List.of(replicaNode)); + indexShard.store().directory().deleteFile("_0.si"); + + for (int i = 11; i < 21; i++) { + client().prepareIndex(INDEX_NAME) + .setId(String.valueOf(i)) + .setSource(jsonBuilder().startObject().field("field", i).endObject()) + .get(); + } + refresh(INDEX_NAME); + waitForNewPeerRecovery(replicaNode, originalRecoveryTime); + resetCheckIndexStatus(); + waitForSearchableDocs(20, primaryNode, replicaNode); + } + + private void waitForNewPeerRecovery(String replicaNode, long originalRecoveryTime) throws Exception { + assertBusy(() -> { + // assert we have a peer recovery after the original + final long time = getRecoveryStopTime(replicaNode); + assertNotEquals(time, 0); + assertNotEquals(originalRecoveryTime, time); + + }, 1, TimeUnit.MINUTES); + } + + private long getRecoveryStopTime(String nodeName) { + final RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries(INDEX_NAME).get(); + final List recoveryStates = recoveryResponse.shardRecoveryStates().get(INDEX_NAME); + for (RecoveryState recoveryState : recoveryStates) { + if (recoveryState.getTargetNode().getName().equals(nodeName)) { + return recoveryState.getTimer().stopTime(); + } + } + return 0L; + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java index 81556cc270151..f48df082a25dc 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java @@ -24,7 +24,6 @@ import org.apache.lucene.util.BytesRef; import org.opensearch.action.admin.indices.alias.Alias; import org.opensearch.action.admin.indices.flush.FlushRequest; -import org.opensearch.action.admin.indices.recovery.RecoveryResponse; import org.opensearch.action.admin.indices.stats.IndicesStatsRequest; import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; import org.opensearch.action.get.GetResponse; @@ -59,7 +58,6 @@ import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; -import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.xcontent.XContentBuilder; @@ -73,7 +71,6 @@ import org.opensearch.index.engine.NRTReplicationReaderManager; import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.recovery.FileChunkRequest; -import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.node.NodeClosedException; @@ -85,7 +82,6 @@ import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.transport.MockTransportService; -import org.opensearch.transport.TransportRequest; import org.opensearch.transport.TransportService; import org.junit.Before; @@ -98,7 +94,6 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import static java.util.Arrays.asList; @@ -1781,135 +1776,4 @@ public void testRealtimeTermVectorRequestsUnSuccessful() throws IOException { assertThat(response.getIndex(), equalTo(INDEX_NAME)); } - - public void testSendCorruptBytesToReplica() throws Exception { - // this test stubs transport calls specific to node-node replication. - assumeFalse( - "Skipping the test as its not compatible with segment replication with remote store.", - segmentReplicationWithRemoteEnabled() - ); - final String primaryNode = internalCluster().startDataOnlyNode(); - createIndex( - INDEX_NAME, - Settings.builder() - .put(indexSettings()) - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) - .put("index.refresh_interval", -1) - .build() - ); - ensureYellow(INDEX_NAME); - final String replicaNode = internalCluster().startDataOnlyNode(); - ensureGreen(INDEX_NAME); - - MockTransportService primaryTransportService = ((MockTransportService) internalCluster().getInstance( - TransportService.class, - primaryNode - )); - CountDownLatch latch = new CountDownLatch(1); - AtomicBoolean failed = new AtomicBoolean(false); - primaryTransportService.addSendBehavior( - internalCluster().getInstance(TransportService.class, replicaNode), - (connection, requestId, action, request, options) -> { - if (action.equals(SegmentReplicationTargetService.Actions.FILE_CHUNK) && failed.getAndSet(true) == false) { - FileChunkRequest req = (FileChunkRequest) request; - logger.info("SENDING CORRUPT file chunk [{}] lastChunk: {}", req, req.lastChunk()); - TransportRequest corrupt = new FileChunkRequest( - req.recoveryId(), - ((FileChunkRequest) request).requestSeqNo(), - ((FileChunkRequest) request).shardId(), - ((FileChunkRequest) request).metadata(), - ((FileChunkRequest) request).position(), - new BytesArray("test"), - false, - 0, - 0L - ); - connection.sendRequest(requestId, action, corrupt, options); - latch.countDown(); - } else { - connection.sendRequest(requestId, action, request, options); - } - } - ); - for (int i = 0; i < 100; i++) { - client().prepareIndex(INDEX_NAME) - .setId(String.valueOf(i)) - .setSource(jsonBuilder().startObject().field("field", i).endObject()) - .get(); - } - final long originalRecoveryTime = getRecoveryStopTime(replicaNode); - assertNotEquals(originalRecoveryTime, 0); - refresh(INDEX_NAME); - latch.await(); - assertTrue(failed.get()); - waitForNewPeerRecovery(replicaNode, originalRecoveryTime); - // reset checkIndex to ensure our original shard doesn't throw - resetCheckIndexStatus(); - waitForSearchableDocs(100, primaryNode, replicaNode); - } - - public void testWipeSegmentBetweenSyncs() throws Exception { - internalCluster().startClusterManagerOnlyNode(); - final String primaryNode = internalCluster().startDataOnlyNode(); - createIndex( - INDEX_NAME, - Settings.builder() - .put(indexSettings()) - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) - .put("index.refresh_interval", -1) - .build() - ); - ensureYellow(INDEX_NAME); - final String replicaNode = internalCluster().startDataOnlyNode(); - ensureGreen(INDEX_NAME); - - for (int i = 0; i < 10; i++) { - client().prepareIndex(INDEX_NAME) - .setId(String.valueOf(i)) - .setSource(jsonBuilder().startObject().field("field", i).endObject()) - .get(); - } - refresh(INDEX_NAME); - ensureGreen(INDEX_NAME); - final long originalRecoveryTime = getRecoveryStopTime(replicaNode); - - final IndexShard indexShard = getIndexShard(replicaNode, INDEX_NAME); - waitForSearchableDocs(INDEX_NAME, 10, List.of(replicaNode)); - indexShard.store().directory().deleteFile("_0.si"); - - for (int i = 11; i < 21; i++) { - client().prepareIndex(INDEX_NAME) - .setId(String.valueOf(i)) - .setSource(jsonBuilder().startObject().field("field", i).endObject()) - .get(); - } - refresh(INDEX_NAME); - waitForNewPeerRecovery(replicaNode, originalRecoveryTime); - resetCheckIndexStatus(); - waitForSearchableDocs(20, primaryNode, replicaNode); - } - - private void waitForNewPeerRecovery(String replicaNode, long originalRecoveryTime) throws Exception { - assertBusy(() -> { - // assert we have a peer recovery after the original - final long time = getRecoveryStopTime(replicaNode); - assertNotEquals(time, 0); - assertNotEquals(originalRecoveryTime, time); - - }, 1, TimeUnit.MINUTES); - } - - private long getRecoveryStopTime(String nodeName) { - final RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries(INDEX_NAME).get(); - final List recoveryStates = recoveryResponse.shardRecoveryStates().get(INDEX_NAME); - logger.info("Recovery states {}", recoveryResponse); - for (RecoveryState recoveryState : recoveryStates) { - if (recoveryState.getTargetNode().getName().equals(nodeName)) { - return recoveryState.getTimer().stopTime(); - } - } - return 0L; - } } From 44a9f180a5643e3b4dee1150c5a68d1591270ced Mon Sep 17 00:00:00 2001 From: Aman Khare <85096200+amkhar@users.noreply.github.com> Date: Thu, 26 Oct 2023 00:21:12 +0530 Subject: [PATCH 39/45] Fix flaky testClusterStateBatchedUpdates test (#10922) Signed-off-by: Aman Khare Co-authored-by: Aman Khare --- .../org/opensearch/cluster/service/MasterServiceTests.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java index 4c0ca826f5dcc..85f6c129944fa 100644 --- a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java @@ -487,6 +487,9 @@ public void onFailure(String source, Exception e) { } }); assertBusy(mockAppender::assertAllExpectationsMatched); + // verify stats values after state is published + assertEquals(1, clusterManagerService.getClusterStateStats().getUpdateSuccess()); + assertEquals(0, clusterManagerService.getClusterStateStats().getUpdateFailed()); } } } @@ -691,9 +694,6 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS submittedTasksPerThread.get(entry.getKey()).get() ); } - // verify stats values after state is published - assertEquals(1, clusterManagerService.getClusterStateStats().getUpdateSuccess()); - assertEquals(0, clusterManagerService.getClusterStateStats().getUpdateFailed()); } } From fb6fe1bf4518a4a4a8507564fbd4db1971f29151 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Wed, 25 Oct 2023 14:20:48 -0700 Subject: [PATCH 40/45] Fix flaky test IndexShardTests.testLocalDirectoryContains (#10929) This test is breaking for WindowsFS only. Moving it to a separate file where it is skipped on WindowsFS. Signed-off-by: Marc Handalian --- .../index/shard/IndexShardTests.java | 52 ------------- .../RemoteIndexShardCorruptionTests.java | 75 +++++++++++++++++++ 2 files changed, 75 insertions(+), 52 deletions(-) create mode 100644 server/src/test/java/org/opensearch/index/shard/RemoteIndexShardCorruptionTests.java diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index f5f8cd1dcfb3f..fa3cf7676f55c 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -32,7 +32,6 @@ package org.opensearch.index.shard; import org.apache.logging.log4j.Logger; -import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexCommit; @@ -46,7 +45,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; import org.apache.lucene.tests.mockfile.ExtrasFS; import org.apache.lucene.tests.store.BaseDirectoryWrapper; import org.apache.lucene.util.BytesRef; @@ -93,7 +91,6 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.indices.breaker.NoneCircuitBreakerService; -import org.opensearch.core.util.FileSystemUtils; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentBuilder; @@ -166,13 +163,11 @@ import org.junit.Assert; import java.io.IOException; -import java.nio.channels.FileChannel; import java.nio.charset.Charset; import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.SimpleFileVisitor; -import java.nio.file.StandardOpenOption; import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; import java.util.Arrays; @@ -4912,53 +4907,6 @@ public void testRecordsForceMerges() throws IOException { closeShards(shard); } - public void testLocalDirectoryContains() throws IOException { - IndexShard indexShard = newStartedShard(true); - int numDocs = between(1, 10); - for (int i = 0; i < numDocs; i++) { - indexDoc(indexShard, "_doc", Integer.toString(i)); - } - flushShard(indexShard); - indexShard.store().incRef(); - Directory localDirectory = indexShard.store().directory(); - Path shardPath = indexShard.shardPath().getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME); - Path tempDir = createTempDir(); - for (String file : localDirectory.listAll()) { - if (file.equals("write.lock") || file.startsWith("extra")) { - continue; - } - boolean corrupted = randomBoolean(); - long checksum = 0; - try (IndexInput indexInput = localDirectory.openInput(file, IOContext.DEFAULT)) { - checksum = CodecUtil.retrieveChecksum(indexInput); - } - if (corrupted) { - Files.copy(shardPath.resolve(file), tempDir.resolve(file)); - try (FileChannel raf = FileChannel.open(shardPath.resolve(file), StandardOpenOption.READ, StandardOpenOption.WRITE)) { - CorruptionUtils.corruptAt(shardPath.resolve(file), raf, (int) (raf.size() - 8)); - } - } - if (corrupted == false) { - assertTrue(indexShard.localDirectoryContains(localDirectory, file, checksum)); - } else { - assertFalse(indexShard.localDirectoryContains(localDirectory, file, checksum)); - assertFalse(Files.exists(shardPath.resolve(file))); - } - } - try (Stream files = Files.list(tempDir)) { - files.forEach(p -> { - try { - Files.copy(p, shardPath.resolve(p.getFileName())); - } catch (IOException e) { - // Ignore - } - }); - } - FileSystemUtils.deleteSubDirectories(tempDir); - indexShard.store().decRef(); - closeShards(indexShard); - } - private void populateSampleRemoteSegmentStats(RemoteSegmentTransferTracker tracker) { tracker.addUploadBytesStarted(30L); tracker.addUploadBytesSucceeded(10L); diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardCorruptionTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardCorruptionTests.java new file mode 100644 index 0000000000000..21bf580712761 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardCorruptionTests.java @@ -0,0 +1,75 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.shard; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.opensearch.core.util.FileSystemUtils; +import org.opensearch.test.CorruptionUtils; + +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.stream.Stream; + +@LuceneTestCase.SuppressFileSystems("WindowsFS") +public class RemoteIndexShardCorruptionTests extends IndexShardTestCase { + + public void testLocalDirectoryContains() throws IOException { + IndexShard indexShard = newStartedShard(true); + int numDocs = between(1, 10); + for (int i = 0; i < numDocs; i++) { + indexDoc(indexShard, "_doc", Integer.toString(i)); + } + flushShard(indexShard); + indexShard.store().incRef(); + Directory localDirectory = indexShard.store().directory(); + Path shardPath = indexShard.shardPath().getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME); + Path tempDir = createTempDir(); + for (String file : localDirectory.listAll()) { + if (file.equals("write.lock") || file.startsWith("extra")) { + continue; + } + boolean corrupted = randomBoolean(); + long checksum = 0; + try (IndexInput indexInput = localDirectory.openInput(file, IOContext.DEFAULT)) { + checksum = CodecUtil.retrieveChecksum(indexInput); + } + if (corrupted) { + Files.copy(shardPath.resolve(file), tempDir.resolve(file)); + try (FileChannel raf = FileChannel.open(shardPath.resolve(file), StandardOpenOption.READ, StandardOpenOption.WRITE)) { + CorruptionUtils.corruptAt(shardPath.resolve(file), raf, (int) (raf.size() - 8)); + } + } + if (corrupted == false) { + assertTrue(indexShard.localDirectoryContains(localDirectory, file, checksum)); + } else { + assertFalse(indexShard.localDirectoryContains(localDirectory, file, checksum)); + assertFalse(Files.exists(shardPath.resolve(file))); + } + } + try (Stream files = Files.list(tempDir)) { + files.forEach(p -> { + try { + Files.copy(p, shardPath.resolve(p.getFileName())); + } catch (IOException e) { + // Ignore + } + }); + } + FileSystemUtils.deleteSubDirectories(tempDir); + indexShard.store().decRef(); + closeShards(indexShard); + } +} From b17d4a8d6fa2463adbf5fc7fbaa1b1bcc4424121 Mon Sep 17 00:00:00 2001 From: Ashish Date: Thu, 26 Oct 2023 06:20:41 +0530 Subject: [PATCH 41/45] [Remote Store] Fix refresh lag bug on primary term change (#10918) * [Remote Store] Fix refresh lag bug on primary term change Signed-off-by: Ashish Singh * Add Integ Tests Signed-off-by: Ashish Singh * Incorporate PR review feedback Signed-off-by: Ashish Singh * Empty-Commit Signed-off-by: Ashish Singh --------- Signed-off-by: Ashish Singh --- .../remotestore/RemoteStoreStatsIT.java | 23 ++++++++++++++++ .../shard/RemoteStoreRefreshListener.java | 27 ++++++++++++------- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java index b1dbb0a900bc7..2d3ab135d0377 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java @@ -655,6 +655,29 @@ public void testStatsCorrectnessOnFailover() { logger.info("Test completed"); } + public void testZeroLagOnCreateIndex() throws InterruptedException { + setup(); + String clusterManagerNode = internalCluster().getClusterManagerName(); + + int numOfShards = randomIntBetween(1, 3); + createIndex(INDEX_NAME, remoteStoreIndexSettings(1, numOfShards)); + ensureGreen(INDEX_NAME); + long currentTimeNs = System.nanoTime(); + while (currentTimeNs == System.nanoTime()) { + Thread.sleep(10); + } + + for (int i = 0; i < numOfShards; i++) { + RemoteStoreStatsResponse response = client(clusterManagerNode).admin() + .cluster() + .prepareRemoteStoreStats(INDEX_NAME, String.valueOf(i)) + .get(); + for (RemoteStoreStats remoteStoreStats : response.getRemoteStoreStats()) { + assertEquals(0, remoteStoreStats.getSegmentStats().refreshTimeLagMs); + } + } + } + private void indexDocs() { for (int i = 0; i < randomIntBetween(5, 10); i++) { if (randomBoolean()) { diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index 3e97b07abfb5d..464adc88ae16f 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -86,7 +86,7 @@ public final class RemoteStoreRefreshListener extends CloseableRetryableRefreshL private final RemoteSegmentStoreDirectory remoteDirectory; private final RemoteSegmentTransferTracker segmentTracker; private final Map localSegmentChecksumMap; - private long primaryTerm; + private volatile long primaryTerm; private volatile Iterator backoffDelayIterator; private final SegmentReplicationCheckpointPublisher checkpointPublisher; @@ -126,10 +126,9 @@ protected void runAfterRefreshExactlyOnce(boolean didRefresh) { // We have 2 separate methods to check if sync needs to be done or not. This is required since we use the return boolean // from isReadyForUpload to schedule refresh retries as the index shard or the primary mode are not in complete // ready state. - if (shouldSync(didRefresh) && isReadyForUpload()) { - segmentTracker.updateLocalRefreshTimeAndSeqNo(); + if (shouldSync(didRefresh, true) && isReadyForUpload()) { try { - initializeRemoteDirectoryOnTermUpdate(); + segmentTracker.updateLocalRefreshTimeAndSeqNo(); try (GatedCloseable segmentInfosGatedCloseable = indexShard.getSegmentInfosSnapshot()) { Collection localSegmentsPostRefresh = segmentInfosGatedCloseable.get().files(true); updateLocalSizeMapAndTracker(localSegmentsPostRefresh); @@ -150,7 +149,7 @@ protected void runAfterRefreshExactlyOnce(boolean didRefresh) { @Override protected boolean performAfterRefreshWithPermit(boolean didRefresh) { boolean successful; - if (shouldSync(didRefresh)) { + if (shouldSync(didRefresh, false)) { successful = syncSegments(); } else { successful = true; @@ -158,10 +157,15 @@ protected boolean performAfterRefreshWithPermit(boolean didRefresh) { return successful; } - private boolean shouldSync(boolean didRefresh) { - return this.primaryTerm != indexShard.getOperationPrimaryTerm() - // If the readers change, didRefresh is always true. - || didRefresh + /** + * This checks if there is a sync required to remote. + * + * @param didRefresh if the readers changed. + * @param skipPrimaryTermCheck consider change in primary term or not for should sync + * @return true if sync is needed + */ + private boolean shouldSync(boolean didRefresh, boolean skipPrimaryTermCheck) { + boolean shouldSync = didRefresh // If the readers change, didRefresh is always true. // The third condition exists for uploading the zero state segments where the refresh has not changed the reader // reference, but it is important to upload the zero state segments so that the restore does not break. || remoteDirectory.getSegmentsUploadedToRemoteStore().isEmpty() @@ -169,6 +173,10 @@ private boolean shouldSync(boolean didRefresh) { // we update the primary term and the same condition would not evaluate to true again in syncSegments. // Below check ensures that if there is commit, then that gets picked up by both 1st and 2nd shouldSync call. || isRefreshAfterCommitSafe(); + if (shouldSync || skipPrimaryTermCheck) { + return shouldSync; + } + return this.primaryTerm != indexShard.getOperationPrimaryTerm(); } private boolean syncSegments() { @@ -188,6 +196,7 @@ private boolean syncSegments() { try { try { + initializeRemoteDirectoryOnTermUpdate(); // if a new segments_N file is present in local that is not uploaded to remote store yet, it // is considered as a first refresh post commit. A cleanup of stale commit files is triggered. // This is done to avoid delete post each refresh. From 003b2cf30103282b031f4462ed53488edb60b9a4 Mon Sep 17 00:00:00 2001 From: Suraj Singh Date: Wed, 25 Oct 2023 18:22:19 -0700 Subject: [PATCH 42/45] [Segment Replication] Handle exceptions on local file read during replication (#10933) * Handle exceptions on file read Signed-off-by: Suraj Singh * Address review comments Signed-off-by: Suraj Singh --------- Signed-off-by: Suraj Singh --- .../replication/SegmentReplicationTarget.java | 11 +- .../index/shard/RemoteIndexShardTests.java | 159 ++++++++++++++---- 2 files changed, 134 insertions(+), 36 deletions(-) diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java index cd6dbe8af90d9..cc71ef816e525 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java @@ -232,6 +232,7 @@ private List getFiles(CheckpointInfoResponse checkpointInfo) return missingFiles; } + // pkg private for tests private boolean validateLocalChecksum(StoreFileMetadata file) { try (IndexInput indexInput = indexShard.store().directory().openInput(file.name(), IOContext.DEFAULT)) { String checksum = Store.digestToString(CodecUtil.retrieveChecksum(indexInput)); @@ -243,7 +244,15 @@ private boolean validateLocalChecksum(StoreFileMetadata file) { return false; } } catch (IOException e) { - throw new UncheckedIOException("Error reading " + file, e); + logger.warn("Error reading " + file, e); + // Delete file on exceptions so that it can be re-downloaded. This is safe to do as this file is local only + // and not referenced by reader. + try { + indexShard.store().directory().deleteFile(file.name()); + } catch (IOException ex) { + throw new UncheckedIOException("Error reading " + file, e); + } + return false; } } diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java index 703a7d457d5b6..2ce0bdc607189 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java @@ -31,18 +31,20 @@ import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.replication.common.ReplicationFailedException; import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.CorruptionUtils; import org.hamcrest.MatcherAssert; import org.junit.Assert; import java.io.IOException; +import java.nio.channels.FileChannel; import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; import java.util.stream.Collectors; @@ -371,37 +373,9 @@ public void testSegRepSucceedsOnPreviousCopiedFiles() throws Exception { final SegmentReplicationSourceFactory sourceFactory = mock(SegmentReplicationSourceFactory.class); final SegmentReplicationTargetService targetService = newTargetService(sourceFactory); - Runnable[] runAfterGetFiles = { () -> { throw new RuntimeException("Simulated"); }, () -> {} }; - AtomicInteger index = new AtomicInteger(0); - RemoteStoreReplicationSource testRSReplicationSource = new RemoteStoreReplicationSource(replica) { - @Override - public void getCheckpointMetadata( - long replicationId, - ReplicationCheckpoint checkpoint, - ActionListener listener - ) { - super.getCheckpointMetadata(replicationId, checkpoint, listener); - } - - @Override - public void getSegmentFiles( - long replicationId, - ReplicationCheckpoint checkpoint, - List filesToFetch, - IndexShard indexShard, - BiConsumer fileProgressTracker, - ActionListener listener - ) { - super.getSegmentFiles(replicationId, checkpoint, filesToFetch, indexShard, (fileName, bytesRecovered) -> {}, listener); - runAfterGetFiles[index.getAndIncrement()].run(); - } - - @Override - public String getDescription() { - return "TestRemoteStoreReplicationSource"; - } - }; - when(sourceFactory.get(any())).thenReturn(testRSReplicationSource); + when(sourceFactory.get(any())).thenReturn( + getRemoteStoreReplicationSource(replica, () -> { throw new RuntimeException("Simulated"); }) + ); CountDownLatch latch = new CountDownLatch(1); // Start first round of segment replication. This should fail with simulated error but with replica having @@ -412,6 +386,7 @@ public String getDescription() { new SegmentReplicationTargetService.SegmentReplicationListener() { @Override public void onReplicationDone(SegmentReplicationState state) { + latch.countDown(); Assert.fail("Replication should fail with simulated error"); } @@ -421,9 +396,9 @@ public void onReplicationFailure( ReplicationFailedException e, boolean sendShardFailure ) { + latch.countDown(); assertFalse(sendShardFailure); logger.error("Replication error", e); - latch.countDown(); } } ); @@ -439,7 +414,8 @@ public void onReplicationFailure( assertEquals("Files should be copied to disk", false, onDiskFiles.isEmpty()); assertEquals(target.state().getStage(), SegmentReplicationState.Stage.GET_FILES); - // Start next round of segment replication + // Start next round of segment replication and not throwing exception resulting in commit on replica + when(sourceFactory.get(any())).thenReturn(getRemoteStoreReplicationSource(replica, () -> {})); CountDownLatch waitForSecondRound = new CountDownLatch(1); final SegmentReplicationTarget newTarget = targetService.startReplication( replica, @@ -456,9 +432,9 @@ public void onReplicationFailure( ReplicationFailedException e, boolean sendShardFailure ) { + waitForSecondRound.countDown(); logger.error("Replication error", e); Assert.fail("Replication should not fail"); - waitForSecondRound.countDown(); } } ); @@ -471,6 +447,119 @@ public void onReplicationFailure( } } + /** + * This test validates that local non-readable (corrupt, partially) on disk are deleted vs failing the + * replication event. This test mimics local files (not referenced by reader) by throwing exception post file copy and + * blocking update of reader. Once this is done, it corrupts one segment file and ensure that file is deleted in next + * round of segment replication by ensuring doc count. + */ + public void testNoFailuresOnFileReads() throws Exception { + try (ReplicationGroup shards = createGroup(1, getIndexSettings(), new NRTReplicationEngineFactory())) { + shards.startAll(); + IndexShard primary = shards.getPrimary(); + final IndexShard replica = shards.getReplicas().get(0); + + final int docCount = 10; + shards.indexDocs(docCount); + primary.refresh("Test"); + + final SegmentReplicationSourceFactory sourceFactory = mock(SegmentReplicationSourceFactory.class); + final SegmentReplicationTargetService targetService = newTargetService(sourceFactory); + when(sourceFactory.get(any())).thenReturn( + getRemoteStoreReplicationSource(replica, () -> { throw new RuntimeException("Simulated"); }) + ); + CountDownLatch waitOnReplicationCompletion = new CountDownLatch(1); + + // Start first round of segment replication. This should fail with simulated error but with replica having + // files in its local store but not in active reader. + SegmentReplicationTarget segmentReplicationTarget = targetService.startReplication( + replica, + primary.getLatestReplicationCheckpoint(), + new SegmentReplicationTargetService.SegmentReplicationListener() { + @Override + public void onReplicationDone(SegmentReplicationState state) { + waitOnReplicationCompletion.countDown(); + Assert.fail("Replication should fail with simulated error"); + } + + @Override + public void onReplicationFailure( + SegmentReplicationState state, + ReplicationFailedException e, + boolean sendShardFailure + ) { + waitOnReplicationCompletion.countDown(); + assertFalse(sendShardFailure); + } + } + ); + waitOnReplicationCompletion.await(); + assertBusy(() -> { assertEquals("Target should be closed", 0, segmentReplicationTarget.refCount()); }); + String fileToCorrupt = null; + // Corrupt one data file + Path shardPath = replica.shardPath().getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME); + for (String file : replica.store().directory().listAll()) { + if (file.equals("write.lock") || file.startsWith("extra") || file.startsWith("segment")) { + continue; + } + fileToCorrupt = file; + logger.info("--> Corrupting file {}", fileToCorrupt); + try (FileChannel raf = FileChannel.open(shardPath.resolve(file), StandardOpenOption.READ, StandardOpenOption.WRITE)) { + CorruptionUtils.corruptAt(shardPath.resolve(file), raf, (int) (raf.size() - 8)); + } + break; + } + Assert.assertNotNull(fileToCorrupt); + + // Ingest more data and start next round of segment replication + shards.indexDocs(docCount); + primary.refresh("Post corruption"); + replicateSegments(primary, List.of(replica)); + + assertDocCount(primary, 2 * docCount); + assertDocCount(replica, 2 * docCount); + + final Store.RecoveryDiff diff = Store.segmentReplicationDiff(primary.getSegmentMetadataMap(), replica.getSegmentMetadataMap()); + assertTrue(diff.missing.isEmpty()); + assertTrue(diff.different.isEmpty()); + + // clean up + shards.removeReplica(replica); + closeShards(replica); + } + } + + private RemoteStoreReplicationSource getRemoteStoreReplicationSource(IndexShard shard, Runnable postGetFilesRunnable) { + return new RemoteStoreReplicationSource(shard) { + @Override + public void getCheckpointMetadata( + long replicationId, + ReplicationCheckpoint checkpoint, + ActionListener listener + ) { + super.getCheckpointMetadata(replicationId, checkpoint, listener); + } + + @Override + public void getSegmentFiles( + long replicationId, + ReplicationCheckpoint checkpoint, + List filesToFetch, + IndexShard indexShard, + BiConsumer fileProgressTracker, + ActionListener listener + ) { + super.getSegmentFiles(replicationId, checkpoint, filesToFetch, indexShard, (fileName, bytesRecovered) -> {}, listener); + postGetFilesRunnable.run(); + } + + @Override + public String getDescription() { + return "TestRemoteStoreReplicationSource"; + } + }; + } + @Override protected void validateShardIdleWithNoReplicas(IndexShard primary) { // ensure search idle conditions are met. From fe8b2d545da09e3d80f74cd57a7ae991e1005abe Mon Sep 17 00:00:00 2001 From: Ashish Date: Thu, 26 Oct 2023 07:23:18 +0530 Subject: [PATCH 43/45] [Remote Store] Fix shard failure on flush due to upload timeout (#10926) --- .../transfer/TranslogTransferManager.java | 10 +- .../TranslogTransferManagerTests.java | 94 +++++++++++++++++++ 2 files changed, 100 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java index ece6f6d5a534f..2f6055df87804 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java @@ -42,7 +42,6 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; import static org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot; @@ -156,14 +155,17 @@ public boolean transferSnapshot(TransferSnapshot transferSnapshot, TranslogTrans try { if (latch.await(TRANSFER_TIMEOUT_IN_MILLIS, TimeUnit.MILLISECONDS) == false) { - Exception ex = new TimeoutException("Timed out waiting for transfer of snapshot " + transferSnapshot + " to complete"); + Exception ex = new TranslogUploadFailedException( + "Timed out waiting for transfer of snapshot " + transferSnapshot + " to complete" + ); exceptionList.forEach(ex::addSuppressed); throw ex; } } catch (InterruptedException ex) { - exceptionList.forEach(ex::addSuppressed); + Exception exception = new TranslogUploadFailedException("Failed to upload " + transferSnapshot, ex); + exceptionList.forEach(exception::addSuppressed); Thread.currentThread().interrupt(); - throw ex; + throw exception; } if (exceptionList.isEmpty()) { TransferFileSnapshot tlogMetadata = prepareMetadata(transferSnapshot); diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java index af596e7df02c2..e34bc078896f9 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java @@ -10,6 +10,7 @@ import org.apache.lucene.tests.util.LuceneTestCase; import org.opensearch.action.LatchedActionListener; +import org.opensearch.common.SetOnce; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.BlobPath; @@ -35,6 +36,7 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.LinkedList; @@ -180,6 +182,93 @@ public void onUploadFailed(TransferSnapshot transferSnapshot, Exception ex) { assertEquals(4, fileTransferTracker.allUploaded().size()); } + public void testTransferSnapshotOnUploadTimeout() throws Exception { + doAnswer(invocationOnMock -> { + Thread.sleep(31 * 1000); + return null; + }).when(transferService).uploadBlobs(anySet(), anyMap(), any(ActionListener.class), any(WritePriority.class)); + FileTransferTracker fileTransferTracker = new FileTransferTracker( + new ShardId("index", "indexUUid", 0), + remoteTranslogTransferTracker + ); + TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, + transferService, + remoteBaseTransferPath, + fileTransferTracker, + remoteTranslogTransferTracker + ); + SetOnce exception = new SetOnce<>(); + translogTransferManager.transferSnapshot(createTransferSnapshot(), new TranslogTransferListener() { + @Override + public void onUploadComplete(TransferSnapshot transferSnapshot) {} + + @Override + public void onUploadFailed(TransferSnapshot transferSnapshot, Exception ex) { + exception.set(ex); + } + }); + assertNotNull(exception.get()); + assertTrue(exception.get() instanceof TranslogUploadFailedException); + assertEquals("Timed out waiting for transfer of snapshot test-to-string to complete", exception.get().getMessage()); + } + + public void testTransferSnapshotOnThreadInterrupt() throws Exception { + SetOnce uploadThread = new SetOnce<>(); + doAnswer(invocationOnMock -> { + uploadThread.set(new Thread(() -> { + ActionListener listener = invocationOnMock.getArgument(2); + try { + Thread.sleep(31 * 1000); + } catch (InterruptedException ignore) { + List list = new ArrayList<>(invocationOnMock.getArgument(0)); + listener.onFailure(new FileTransferException(list.get(0), ignore)); + } + })); + uploadThread.get().start(); + return null; + }).when(transferService).uploadBlobs(anySet(), anyMap(), any(ActionListener.class), any(WritePriority.class)); + FileTransferTracker fileTransferTracker = new FileTransferTracker( + new ShardId("index", "indexUUid", 0), + remoteTranslogTransferTracker + ); + TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, + transferService, + remoteBaseTransferPath, + fileTransferTracker, + remoteTranslogTransferTracker + ); + SetOnce exception = new SetOnce<>(); + + Thread thread = new Thread(() -> { + try { + translogTransferManager.transferSnapshot(createTransferSnapshot(), new TranslogTransferListener() { + @Override + public void onUploadComplete(TransferSnapshot transferSnapshot) {} + + @Override + public void onUploadFailed(TransferSnapshot transferSnapshot, Exception ex) { + exception.set(ex); + } + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + thread.start(); + + Thread.sleep(1000); + // Interrupt the thread + thread.interrupt(); + assertBusy(() -> { + assertNotNull(exception.get()); + assertTrue(exception.get() instanceof TranslogUploadFailedException); + assertEquals("Failed to upload test-to-string", exception.get().getMessage()); + }); + uploadThread.get().interrupt(); + } + private TransferSnapshot createTransferSnapshot() { return new TransferSnapshot() { @Override @@ -232,6 +321,11 @@ public Set getTranslogFileSnapshots() { public TranslogTransferMetadata getTranslogTransferMetadata() { return new TranslogTransferMetadata(primaryTerm, generation, minTranslogGeneration, randomInt(5)); } + + @Override + public String toString() { + return "test-to-string"; + } }; } From d1c94b524ea54f3fce3e40e211bb7108800c07de Mon Sep 17 00:00:00 2001 From: Dhwanil Patel Date: Thu, 26 Oct 2023 11:35:37 +0530 Subject: [PATCH 44/45] Improved logging around remote cluster state (#10892) * Improved logging around remote cluster state Signed-off-by: Dhwanil Patel --- .../remote/RemoteClusterStateService.java | 28 +++++++++++++------ .../recovery/RemoteStoreRestoreService.java | 1 + 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 57b1b972e08c0..fa4eef79c46dc 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -244,9 +244,8 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri allUploadedIndexMetadata.size() ); } else { - // todo change to debug logger.info( - "writing cluster state took [{}ms]; " + "wrote full state with [{}] indices", + "writing cluster state took [{}ms]; " + "wrote full state with [{}] indices and global metadata", durationMillis, allUploadedIndexMetadata.size() ); @@ -285,6 +284,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( if (updateGlobalMetadata || previousManifest.getGlobalMetadataFileName() == null) { globalMetadataFile = writeGlobalMetadata(clusterState); } else { + logger.debug("Global metadata has not updated in cluster state, skipping upload of it"); globalMetadataFile = previousManifest.getGlobalMetadataFileName(); } @@ -305,7 +305,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( for (final IndexMetadata indexMetadata : clusterState.metadata().indices().values()) { final Long previousVersion = previousStateIndexMetadataVersionByName.get(indexMetadata.getIndex().getName()); if (previousVersion == null || indexMetadata.getVersion() != previousVersion) { - logger.trace( + logger.debug( "updating metadata for [{}], changing version from [{}] to [{}]", indexMetadata.getIndex(), previousVersion, @@ -342,18 +342,22 @@ public ClusterMetadataManifest writeIncrementalMetadata( if (durationMillis >= slowWriteLoggingThreshold.getMillis()) { logger.warn( "writing cluster state took [{}ms] which is above the warn threshold of [{}]; " - + "wrote metadata for [{}] indices and skipped [{}] unchanged indices", + + "wrote metadata for [{}] indices and skipped [{}] unchanged indices, global metadata updated : [{}]", durationMillis, slowWriteLoggingThreshold, numIndicesUpdated, - numIndicesUnchanged + numIndicesUnchanged, + updateGlobalMetadata ); } else { - logger.trace( - "writing cluster state took [{}ms]; " + "wrote metadata for [{}] indices and skipped [{}] unchanged indices", + logger.info( + "writing cluster state for version [{}] took [{}ms]; " + + "wrote metadata for [{}] indices and skipped [{}] unchanged indices, global metadata updated : [{}]", + manifest.getStateVersion(), durationMillis, numIndicesUpdated, - numIndicesUnchanged + numIndicesUnchanged, + updateGlobalMetadata ); } return manifest; @@ -605,6 +609,11 @@ private void writeMetadataManifest(String clusterName, String clusterUUID, Clust blobStoreRepository.getCompressor(), FORMAT_PARAMS ); + logger.debug( + "Metadata manifest file [{}] written during [{}] phase. ", + fileName, + uploadManifest.isCommitted() ? "commit" : "publish" + ); } private String fetchPreviousClusterUUID(String clusterName, String clusterUUID) { @@ -912,6 +921,7 @@ private List createClusterChain(final Map { indexMetadataMap.put(indexMetadata.getIndex().getName(), new Tuple<>(true, indexMetadata)); From 746ca09ac3ce33b27e94c871a94f0ded076e19e3 Mon Sep 17 00:00:00 2001 From: Sooraj Sinha <81695996+soosinha@users.noreply.github.com> Date: Thu, 26 Oct 2023 11:55:13 +0530 Subject: [PATCH 45/45] Fix valid cluster UUID logic for uncommitted cluster UUIDs (#10916) Signed-off-by: Sooraj Sinha --- .../remote/RemoteClusterStateService.java | 36 +++++++------- .../RemoteClusterStateServiceTests.java | 49 +++++++++++++++---- 2 files changed, 59 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index fa4eef79c46dc..b3309b1fd8a63 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -881,25 +881,31 @@ private Map getLatestManifestForAllClusterUUIDs * @return List of cluster UUIDs. The first element is the most recent cluster UUID in the chain */ private List createClusterChain(final Map manifestsByClusterUUID, final String clusterName) { - final Map clusterUUIDGraph = manifestsByClusterUUID.values() + final List validClusterManifests = manifestsByClusterUUID.values() .stream() + .filter(this::isValidClusterUUID) + .collect(Collectors.toList()); + final Map clusterUUIDGraph = validClusterManifests.stream() .collect(Collectors.toMap(ClusterMetadataManifest::getClusterUUID, ClusterMetadataManifest::getPreviousClusterUUID)); - final List validClusterUUIDs = manifestsByClusterUUID.values() - .stream() - .filter(m -> !isInvalidClusterUUID(m) && !clusterUUIDGraph.containsValue(m.getClusterUUID())) + final List topLevelClusterUUIDs = validClusterManifests.stream() .map(ClusterMetadataManifest::getClusterUUID) + .filter(clusterUUID -> !clusterUUIDGraph.containsValue(clusterUUID)) .collect(Collectors.toList()); - if (validClusterUUIDs.isEmpty()) { - logger.info("There is no valid previous cluster UUID"); + + if (topLevelClusterUUIDs.isEmpty()) { + // This can occur only when there are no valid cluster UUIDs + assert validClusterManifests.isEmpty() : "There are no top level cluster UUIDs even when there are valid cluster UUIDs"; + logger.info("There is no valid previous cluster UUID. All cluster UUIDs evaluated are: {}", manifestsByClusterUUID.keySet()); return Collections.emptyList(); } - if (validClusterUUIDs.size() > 1) { + if (topLevelClusterUUIDs.size() > 1) { + logger.info("Top level cluster UUIDs: {}", topLevelClusterUUIDs); // If the valid cluster UUIDs are more that 1, it means there was some race condition where // more then 2 cluster manager nodes tried to become active cluster manager and published // 2 cluster UUIDs which followed the same previous UUID. final Map manifestsByClusterUUIDTrimmed = trimClusterUUIDs( manifestsByClusterUUID, - validClusterUUIDs, + topLevelClusterUUIDs, clusterName ); if (manifestsByClusterUUID.size() == manifestsByClusterUUIDTrimmed.size()) { @@ -908,14 +914,14 @@ private List createClusterChain(final Map validChain = new ArrayList<>(); - String currentUUID = validClusterUUIDs.get(0); + String currentUUID = topLevelClusterUUIDs.get(0); while (currentUUID != null && !ClusterState.UNKNOWN_UUID.equals(currentUUID)) { validChain.add(currentUUID); // Getting the previous cluster UUID of a cluster UUID from the clusterUUID Graph @@ -942,11 +948,7 @@ private Map trimClusterUUIDs( // Here we compare the manifest of current UUID to that of previous UUID // In case currentUUID's latest manifest is same as previous UUIDs latest manifest, // that means it was restored from previousUUID and no IndexMetadata update was performed on it. - if (ClusterState.UNKNOWN_UUID.equals(currentManifest.getPreviousClusterUUID())) { - if (currentManifest.getIndices().isEmpty()) { - trimmedUUIDs.remove(clusterUUID); - } - } else { + if (!ClusterState.UNKNOWN_UUID.equals(currentManifest.getPreviousClusterUUID())) { ClusterMetadataManifest previousManifest = trimmedUUIDs.get(currentManifest.getPreviousClusterUUID()); if (isMetadataEqual(currentManifest, previousManifest, clusterName) && isGlobalMetadataEqual(currentManifest, previousManifest, clusterName)) { @@ -985,8 +987,8 @@ private boolean isGlobalMetadataEqual(ClusterMetadataManifest first, ClusterMeta return Metadata.isGlobalResourcesMetadataEquals(firstGlobalMetadata, secondGlobalMetadata); } - private boolean isInvalidClusterUUID(ClusterMetadataManifest manifest) { - return !manifest.isClusterUUIDCommitted(); + private boolean isValidClusterUUID(ClusterMetadataManifest manifest) { + return manifest.isClusterUUIDCommitted(); } /** diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index ca88653f529f6..586618bd1ecff 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -911,7 +911,7 @@ public void testGetValidPreviousClusterUUIDWithMultipleChains() throws IOExcepti "cluster-uuid3", "cluster-uuid1" ); - mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, randomBoolean()); + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, randomBoolean(), Collections.emptyMap()); remoteClusterStateService.start(); String previousClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster"); @@ -933,6 +933,23 @@ public void testGetValidPreviousClusterUUIDWithInvalidMultipleChains() throws IO assertThrows(IllegalStateException.class, () -> remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster")); } + public void testGetValidPreviousClusterUUIDWhenLastUUIDUncommitted() throws IOException { + Map clusterUUIDsPointers = Map.of( + "cluster-uuid1", + ClusterState.UNKNOWN_UUID, + "cluster-uuid2", + "cluster-uuid1", + "cluster-uuid3", + "cluster-uuid2" + ); + Map clusterUUIDCommitted = Map.of("cluster-uuid1", true, "cluster-uuid2", true, "cluster-uuid3", false); + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, clusterUUIDCommitted); + + remoteClusterStateService.start(); + String previousClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster"); + assertThat(previousClusterUUID, equalTo("cluster-uuid2")); + } + public void testDeleteStaleClusterUUIDs() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); ClusterMetadataManifest clusterMetadataManifest = ClusterMetadataManifest.builder() @@ -1128,11 +1145,21 @@ public void testGlobalMetadataUploadWaitTimeSetting() { } private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { - mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false); + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, Collections.emptyMap()); } - private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers, boolean differGlobalMetadata) - throws IOException { + private void mockObjectsForGettingPreviousClusterUUID( + Map clusterUUIDsPointers, + Map clusterUUIDCommitted + ) throws IOException { + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, clusterUUIDCommitted); + } + + private void mockObjectsForGettingPreviousClusterUUID( + Map clusterUUIDsPointers, + boolean differGlobalMetadata, + Map clusterUUIDCommitted + ) throws IOException { final BlobPath blobPath = mock(BlobPath.class); when((blobStoreRepository.basePath())).thenReturn(blobPath); when(blobPath.add(anyString())).thenReturn(blobPath); @@ -1155,7 +1182,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste clusterUUIDsPointers.get("cluster-uuid1"), randomAlphaOfLength(10), uploadedIndexMetadataList1, - "test-metadata1" + "test-metadata1", + clusterUUIDCommitted.getOrDefault("cluster-uuid1", true) ); Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build(); IndexMetadata indexMetadata1 = IndexMetadata.builder("index1") @@ -1184,7 +1212,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste clusterUUIDsPointers.get("cluster-uuid2"), randomAlphaOfLength(10), uploadedIndexMetadataList2, - "test-metadata2" + "test-metadata2", + clusterUUIDCommitted.getOrDefault("cluster-uuid2", true) ); IndexMetadata indexMetadata3 = IndexMetadata.builder("index1") .settings(indexSettings) @@ -1229,7 +1258,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste clusterUUIDsPointers.get("cluster-uuid3"), randomAlphaOfLength(10), uploadedIndexMetadataList3, - "test-metadata3" + "test-metadata3", + clusterUUIDCommitted.getOrDefault("cluster-uuid3", true) ); mockBlobContainerForGlobalMetadata(blobContainer3, clusterManifest3, metadata3); mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3, ClusterMetadataManifest.CODEC_V1); @@ -1257,7 +1287,8 @@ private ClusterMetadataManifest generateClusterMetadataManifest( String previousClusterUUID, String stateUUID, List uploadedIndexMetadata, - String globalMetadataFileName + String globalMetadataFileName, + Boolean isUUIDCommitted ) { return ClusterMetadataManifest.builder() .indices(uploadedIndexMetadata) @@ -1269,7 +1300,7 @@ private ClusterMetadataManifest generateClusterMetadataManifest( .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) .previousClusterUUID(previousClusterUUID) .committed(true) - .clusterUUIDCommitted(true) + .clusterUUIDCommitted(isUUIDCommitted) .globalMetadataFileName(globalMetadataFileName) .codecVersion(ClusterMetadataManifest.CODEC_V1) .build();