diff --git a/.github/workflows/assemble.yml b/.github/workflows/assemble.yml index 294627622a136..b3838b8e5ae97 100644 --- a/.github/workflows/assemble.yml +++ b/.github/workflows/assemble.yml @@ -30,8 +30,11 @@ jobs: - name: Setup docker (missing on MacOS) id: setup_docker if: runner.os == 'macos' + continue-on-error: true run: | - exit 0; + brew install docker colima coreutils + gtimeout 15m colima start + shell: bash - name: Run Gradle (assemble) if: runner.os == 'macos' && steps.setup_docker.outcome != 'success' run: | @@ -45,4 +48,4 @@ jobs: - name: Run Gradle (assemble) if: runner.os == 'macos' && steps.setup_docker.outcome == 'success' run: | - exit 0; + ./gradlew assemble --parallel --no-build-cache -PDISABLE_BUILD_CACHE -Druntime.java=${{ matrix.java }} diff --git a/CHANGELOG.md b/CHANGELOG.md index fd389fde76c1b..e09dc9d192b4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,8 +53,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add canRemain method to TargetPoolAllocationDecider to move shards from local to remote pool for hot to warm tiering ([#15010](https://github.com/opensearch-project/OpenSearch/pull/15010)) - ClusterManagerTaskThrottler Improvements ([#15508](https://github.com/opensearch-project/OpenSearch/pull/15508)) - Reset DiscoveryNodes in all transport node actions request ([#15131](https://github.com/opensearch-project/OpenSearch/pull/15131)) +- Adding WithFieldName interface for QueryBuilders with fieldName ([#15705](https://github.com/opensearch-project/OpenSearch/pull/15705)) - Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471)) +- Static RemotePublication setting added, removed experimental feature flag ([#15478](https://github.com/opensearch-project/OpenSearch/pull/15478)) - MultiTermQueries in keyword fields now default to `indexed` approach and gated behind cluster setting ([#15637](https://github.com/opensearch-project/OpenSearch/pull/15637)) +- [Remote Publication] Upload incremental cluster state on master re-election ([#15145](https://github.com/opensearch-project/OpenSearch/pull/15145)) - Making _cat/allocation API use indexLevelStats ([#15292](https://github.com/opensearch-project/OpenSearch/pull/15292)) - Memory optimisations in _cluster/health API ([#15492](https://github.com/opensearch-project/OpenSearch/pull/15492)) @@ -112,6 +115,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix unchecked cast in dynamic action map getter ([#15394](https://github.com/opensearch-project/OpenSearch/pull/15394)) - Fix null values indexed as "null" strings in flat_object field ([#14069](https://github.com/opensearch-project/OpenSearch/pull/14069)) - Fix terms query on wildcard field returns nothing ([#15607](https://github.com/opensearch-project/OpenSearch/pull/15607)) +- Fix remote snapshot file_cache exceeding capacity ([#15077](https://github.com/opensearch-project/OpenSearch/pull/15077)) ### Security diff --git a/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/core/index/query/CorrelationQueryBuilder.java b/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/core/index/query/CorrelationQueryBuilder.java index 806ac0389b5f3..e95b68e855cca 100644 --- a/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/core/index/query/CorrelationQueryBuilder.java +++ b/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/core/index/query/CorrelationQueryBuilder.java @@ -23,6 +23,7 @@ import org.opensearch.index.query.AbstractQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.WithFieldName; import org.opensearch.plugin.correlation.core.index.mapper.VectorFieldMapper; import java.io.IOException; @@ -36,7 +37,7 @@ * * @opensearch.internal */ -public class CorrelationQueryBuilder extends AbstractQueryBuilder { +public class CorrelationQueryBuilder extends AbstractQueryBuilder implements WithFieldName { private static final Logger log = LogManager.getLogger(CorrelationQueryBuilder.class); protected static final ParseField VECTOR_FIELD = new ParseField("vector"); @@ -205,6 +206,7 @@ public void setFieldName(String fieldName) { * get field name * @return field name */ + @Override public String fieldName() { return fieldName; } diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java index 47ec3f25bcd64..cf17a58d937de 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java @@ -35,12 +35,12 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT; import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING; import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.RETAINED_MANIFESTS; import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.SKIP_CLEANUP_STATE_CHANGES; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.gateway.remote.RemoteUploadStats.REMOTE_UPLOAD; import static org.opensearch.gateway.remote.routingtable.RemoteIndexRoutingTable.INDEX_ROUTING_TABLE; import static org.opensearch.indices.IndicesService.CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING; @@ -189,7 +189,7 @@ public void testRemoteCleanupDeleteStaleIndexRoutingFiles() throws Exception { RemoteStoreEnums.PathType.HASHED_PREFIX.toString() ) .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, REMOTE_ROUTING_TABLE_REPO) - .put(REMOTE_PUBLICATION_EXPERIMENTAL, true); + .put(REMOTE_PUBLICATION_SETTING_KEY, true); int shardCount = randomIntBetween(1, 2); int replicaCount = 1; diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteRoutingTableServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteRoutingTableServiceIT.java index 0a8c13adb034f..d143cbd7c3450 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteRoutingTableServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteRoutingTableServiceIT.java @@ -37,8 +37,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.gateway.remote.routingtable.RemoteIndexRoutingTable.INDEX_ROUTING_TABLE; import static org.opensearch.indices.IndicesService.CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; @@ -66,7 +66,7 @@ protected Settings nodeSettings(int nodeOrdinal) { RemoteStoreEnums.PathType.HASHED_PREFIX.toString() ) .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, REMOTE_ROUTING_TABLE_REPO) - .put(REMOTE_PUBLICATION_EXPERIMENTAL, true) + .put(REMOTE_PUBLICATION_SETTING_KEY, true) .put( RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING.getKey(), RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java index 0778782c5bec5..faab3645ae894 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java @@ -20,6 +20,7 @@ import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.settings.Settings; import org.opensearch.discovery.DiscoveryStats; +import org.opensearch.gateway.GatewayMetaState; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.model.RemoteClusterMetadataManifest; import org.opensearch.gateway.remote.model.RemoteRoutingTableBlobStore; @@ -43,16 +44,17 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.ExecutionException; import java.util.function.Function; import java.util.stream.Collectors; import static org.opensearch.action.admin.cluster.node.info.NodesInfoRequest.Metric.SETTINGS; import static org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest.Metric.DISCOVERY; import static org.opensearch.cluster.metadata.Metadata.isGlobalStateEquals; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL_SETTING; import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.DISCOVERY_NODES; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; import static org.opensearch.gateway.remote.model.RemoteClusterBlocks.CLUSTER_BLOCKS; import static org.opensearch.gateway.remote.model.RemoteCoordinationMetadata.COORDINATION_METADATA; @@ -74,7 +76,7 @@ public class RemoteStatePublicationIT extends RemoteStoreBaseIntegTestCase { private static final String REMOTE_STATE_PREFIX = "!"; private static final String REMOTE_ROUTING_PREFIX = "_"; private boolean isRemoteStateEnabled = true; - private String isRemotePublicationEnabled = "true"; + private boolean isRemotePublicationEnabled = true; private boolean hasRemoteStateCharPrefix; private boolean hasRemoteRoutingCharPrefix; @@ -82,16 +84,11 @@ public class RemoteStatePublicationIT extends RemoteStoreBaseIntegTestCase { public void setup() { asyncUploadMockFsRepo = false; isRemoteStateEnabled = true; - isRemotePublicationEnabled = "true"; + isRemotePublicationEnabled = true; hasRemoteStateCharPrefix = randomBoolean(); hasRemoteRoutingCharPrefix = randomBoolean(); } - @Override - protected Settings featureFlagSettings() { - return Settings.builder().put(super.featureFlagSettings()).put(REMOTE_PUBLICATION_EXPERIMENTAL, isRemotePublicationEnabled).build(); - } - @Override protected Settings nodeSettings(int nodeOrdinal) { String routingTableRepoName = "remote-routing-repo"; @@ -109,6 +106,7 @@ protected Settings nodeSettings(int nodeOrdinal) { return Settings.builder() .put(super.nodeSettings(nodeOrdinal)) .put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), isRemoteStateEnabled) + .put(REMOTE_PUBLICATION_SETTING_KEY, isRemotePublicationEnabled) .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, routingTableRepoName) .put(routingTableRepoTypeAttributeKey, ReloadableFsRepository.TYPE) .put(routingTableRepoSettingsAttributeKeyPrefix + "location", segmentRepoPath) @@ -116,6 +114,7 @@ protected Settings nodeSettings(int nodeOrdinal) { RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING.getKey(), RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE ) + .put(REMOTE_PUBLICATION_SETTING_KEY, isRemotePublicationEnabled) .put( RemoteClusterStateService.CLUSTER_REMOTE_STORE_STATE_PATH_PREFIX.getKey(), hasRemoteStateCharPrefix ? REMOTE_STATE_PREFIX : "" @@ -235,6 +234,7 @@ public void testRemotePublicationDownloadStats() { assertDataNodeDownloadStats(nodesStatsResponseDataNode); } + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/15767") public void testRemotePublicationDisabledByRollingRestart() throws Exception { prepareCluster(3, 2, INDEX_NAME, 1, 2); ensureStableCluster(5); @@ -248,7 +248,7 @@ public void testRemotePublicationDisabledByRollingRestart() throws Exception { @Override public Settings onNodeStopped(String nodeName) { restartedMasters.add(nodeName); - return Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, false).build(); + return Settings.builder().put(REMOTE_PUBLICATION_SETTING_KEY, false).build(); } @Override @@ -287,9 +287,7 @@ public void doAfterNodes(int n, Client client) { .addMetric(SETTINGS.metricName()) .get(); // if masterRestarted is true Publication Setting should be false, and vice versa - assertTrue( - REMOTE_PUBLICATION_EXPERIMENTAL_SETTING.get(nodesInfoResponse.getNodes().get(0).getSettings()) != activeCMRestarted - ); + assertTrue(REMOTE_PUBLICATION_SETTING.get(nodesInfoResponse.getNodes().get(0).getSettings()) != activeCMRestarted); followingCMs.forEach(node -> { PersistedStateRegistry registry = internalCluster().getInstance(PersistedStateRegistry.class, node); @@ -336,7 +334,7 @@ public void doAfterNodes(int n, Client client) { .addMetric(SETTINGS.metricName()) .get(); // if masterRestarted is true Publication Setting should be false, and vice versa - assertFalse(REMOTE_PUBLICATION_EXPERIMENTAL_SETTING.get(nodesInfoResponse.getNodes().get(0).getSettings())); + assertFalse(REMOTE_PUBLICATION_SETTING.get(nodesInfoResponse.getNodes().get(0).getSettings())); followingCMs.forEach(node -> { PersistedStateRegistry registry = internalCluster().getInstance(PersistedStateRegistry.class, node); @@ -346,6 +344,59 @@ public void doAfterNodes(int n, Client client) { }); } + public void testMasterReElectionUsesIncrementalUpload() throws IOException { + prepareCluster(3, 2, INDEX_NAME, 1, 1); + PersistedStateRegistry persistedStateRegistry = internalCluster().getClusterManagerNodeInstance(PersistedStateRegistry.class); + GatewayMetaState.RemotePersistedState remotePersistedState = (GatewayMetaState.RemotePersistedState) persistedStateRegistry + .getPersistedState(PersistedStateRegistry.PersistedStateType.REMOTE); + ClusterMetadataManifest manifest = remotePersistedState.getLastAcceptedManifest(); + // force elected master to step down + internalCluster().stopCurrentClusterManagerNode(); + ensureStableCluster(4); + + persistedStateRegistry = internalCluster().getClusterManagerNodeInstance(PersistedStateRegistry.class); + CoordinationState.PersistedState persistedStateAfterElection = persistedStateRegistry.getPersistedState( + PersistedStateRegistry.PersistedStateType.REMOTE + ); + ClusterMetadataManifest manifestAfterElection = persistedStateAfterElection.getLastAcceptedManifest(); + + // coordination metadata is updated, it will be unequal + assertNotEquals(manifest.getCoordinationMetadata(), manifestAfterElection.getCoordinationMetadata()); + // all other attributes are not uploaded again and will be pointing to same files in manifest after new master is elected + assertEquals(manifest.getClusterUUID(), manifestAfterElection.getClusterUUID()); + assertEquals(manifest.getIndices(), manifestAfterElection.getIndices()); + assertEquals(manifest.getSettingsMetadata(), manifestAfterElection.getSettingsMetadata()); + assertEquals(manifest.getTemplatesMetadata(), manifestAfterElection.getTemplatesMetadata()); + assertEquals(manifest.getCustomMetadataMap(), manifestAfterElection.getCustomMetadataMap()); + assertEquals(manifest.getRoutingTableVersion(), manifest.getRoutingTableVersion()); + assertEquals(manifest.getIndicesRouting(), manifestAfterElection.getIndicesRouting()); + } + + public void testVotingConfigAreCommitted() throws ExecutionException, InterruptedException { + prepareCluster(3, 2, INDEX_NAME, 1, 2); + ensureStableCluster(5); + ensureGreen(INDEX_NAME); + // add two new nodes to the cluster, to update the voting config + internalCluster().startClusterManagerOnlyNodes(2, Settings.EMPTY); + ensureStableCluster(7); + + internalCluster().getInstances(PersistedStateRegistry.class).forEach(persistedStateRegistry -> { + CoordinationState.PersistedState localState = persistedStateRegistry.getPersistedState( + PersistedStateRegistry.PersistedStateType.LOCAL + ); + CoordinationState.PersistedState remoteState = persistedStateRegistry.getPersistedState( + PersistedStateRegistry.PersistedStateType.REMOTE + ); + if (remoteState != null) { + assertEquals( + localState.getLastAcceptedState().getLastCommittedConfiguration(), + remoteState.getLastAcceptedState().getLastCommittedConfiguration() + ); + assertEquals(5, remoteState.getLastAcceptedState().getLastCommittedConfiguration().getNodeIds().size()); + } + }); + } + private void assertDataNodeDownloadStats(NodesStatsResponse nodesStatsResponse) { // assert cluster state stats for data node DiscoveryStats dataNodeDiscoveryStats = nodesStatsResponse.getNodes().get(0).getDiscoveryStats(); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java index 280fd13f0fdcf..e8df2c8686610 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java @@ -76,29 +76,4 @@ protected void verifyRestoredData(Map indexStats, String indexName protected void verifyRestoredData(Map indexStats, String indexName) throws Exception { verifyRestoredData(indexStats, indexName, true); } - - public void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) { - prepareCluster(numClusterManagerNodes, numDataOnlyNodes, indices, replicaCount, shardCount, Settings.EMPTY); - } - - public void prepareCluster( - int numClusterManagerNodes, - int numDataOnlyNodes, - String indices, - int replicaCount, - int shardCount, - Settings settings - ) { - prepareCluster(numClusterManagerNodes, numDataOnlyNodes, settings); - for (String index : indices.split(",")) { - createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount)); - ensureYellowAndNoInitializingShards(index); - ensureGreen(index); - } - } - - public void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, Settings settings) { - internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes, settings); - internalCluster().startDataOnlyNodes(numDataOnlyNodes, settings); - } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java index ba06bb463e5a8..bcb0d54c0a25c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java @@ -351,13 +351,7 @@ protected void restore(boolean restoreAllShards, String... indices) { } protected void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) { - internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes); - internalCluster().startDataOnlyNodes(numDataOnlyNodes); - for (String index : indices.split(",")) { - createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount)); - ensureYellowAndNoInitializingShards(index); - ensureGreen(index); - } + prepareCluster(numClusterManagerNodes, numDataOnlyNodes, indices, replicaCount, shardCount, Settings.EMPTY); } protected void prepareCluster( @@ -368,11 +362,16 @@ protected void prepareCluster( int shardCount, Settings settings ) { - internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes, settings); - internalCluster().startDataOnlyNodes(numDataOnlyNodes, settings); + prepareCluster(numClusterManagerNodes, numDataOnlyNodes, settings); for (String index : indices.split(",")) { createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount)); + ensureYellowAndNoInitializingShards(index); ensureGreen(index); } } + + protected void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, Settings settings) { + internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes, settings); + internalCluster().startDataOnlyNodes(numDataOnlyNodes, settings); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotIT.java index 83f93ab9ff6b5..ddbe0520f0c1f 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotIT.java @@ -64,7 +64,6 @@ import java.util.List; import java.util.concurrent.ExecutionException; -import static org.opensearch.remotestore.RemoteStoreBaseIntegTestCase.remoteStoreClusterSettings; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.hasSize; @@ -127,7 +126,7 @@ public void testShardClone() throws Exception { } public void testCloneSnapshotIndex() throws Exception { - internalCluster().startClusterManagerOnlyNode(); + internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS); internalCluster().startDataOnlyNode(); final String repoName = "repo-name"; createRepository(repoName, "fs"); @@ -318,7 +317,7 @@ public void testClonePreventsSnapshotDelete() throws Exception { indexRandomDocs(indexName, randomIntBetween(20, 100)); final String targetSnapshot = "target-snapshot"; - blockNodeOnAnyFiles(repoName, clusterManagerName); + blockClusterManagerOnWriteIndexFile(repoName); final ActionFuture cloneFuture = startClone(repoName, sourceSnapshot, targetSnapshot, indexName); waitForBlock(clusterManagerName, repoName, TimeValue.timeValueSeconds(30L)); assertFalse(cloneFuture.isDone()); @@ -426,7 +425,7 @@ public void testLongRunningSnapshotAllowsConcurrentClone() throws Exception { } public void testDeletePreventsClone() throws Exception { - final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(); + final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS); internalCluster().startDataOnlyNode(); final String repoName = "repo-name"; createRepository(repoName, "mock"); @@ -439,7 +438,7 @@ public void testDeletePreventsClone() throws Exception { indexRandomDocs(indexName, randomIntBetween(20, 100)); final String targetSnapshot = "target-snapshot"; - blockNodeOnAnyFiles(repoName, clusterManagerName); + blockClusterManagerOnWriteIndexFile(repoName); final ActionFuture deleteFuture = startDeleteSnapshot(repoName, sourceSnapshot); waitForBlock(clusterManagerName, repoName, TimeValue.timeValueSeconds(30L)); assertFalse(deleteFuture.isDone()); @@ -591,7 +590,7 @@ public void testClusterManagerFailoverDuringCloneStep2() throws Exception { public void testExceptionDuringShardClone() throws Exception { // large snapshot pool so blocked snapshot threads from cloning don't prevent concurrent snapshot finalizations - internalCluster().startClusterManagerOnlyNodes(3, LARGE_SNAPSHOT_POOL_SETTINGS); + internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS); internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); @@ -602,7 +601,7 @@ public void testExceptionDuringShardClone() throws Exception { createFullSnapshot(repoName, sourceSnapshot); final String targetSnapshot = "target-snapshot"; - blockClusterManagerFromFinalizingSnapshotOnSnapFile(repoName); + blockClusterManagerFromFinalizingSnapshotOnIndexFile(repoName); final ActionFuture cloneFuture = startCloneFromDataNode(repoName, sourceSnapshot, targetSnapshot, testIndex); awaitNumberOfSnapshotsInProgress(1); final String clusterManagerNode = internalCluster().getClusterManagerName(); @@ -680,7 +679,7 @@ public void testStartSnapshotWithSuccessfulShardClonePendingFinalization() throw } public void testStartCloneWithSuccessfulShardClonePendingFinalization() throws Exception { - final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(); + final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS); internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotV2IT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotV2IT.java new file mode 100644 index 0000000000000..c6744ae62db60 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotV2IT.java @@ -0,0 +1,438 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.snapshots; + +import org.opensearch.action.ActionRunnable; +import org.opensearch.action.DocWriteResponse; +import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; +import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; +import org.opensearch.action.delete.DeleteResponse; +import org.opensearch.action.support.PlainActionFuture; +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.client.Client; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.indices.RemoteStoreSettings; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.RepositoryData; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.fs.FsRepository; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; + +import static org.opensearch.remotestore.RemoteStoreBaseIntegTestCase.remoteStoreClusterSettings; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class CloneSnapshotV2IT extends AbstractSnapshotIntegTestCase { + + public void testCloneShallowCopyV2() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + final Path remoteStoreRepoPath = randomRepoPath(); + internalCluster().startClusterManagerOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String indexName3 = "testindex3"; + String snapshotRepoName = "test-clone-snapshot-repo"; + String snapshotName1 = "test-create-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Client client = client(); + + assertAcked( + client.admin() + .cluster() + .preparePutRepository(snapshotRepoName) + .setType(FsRepository.TYPE) + .setSettings( + Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) + ) + ); + + createIndex(indexName1, getRemoteStoreBackedIndexSettings()); + createIndex(indexName2, getRemoteStoreBackedIndexSettings()); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexRandomDocs(indexName1, numDocsInIndex1); + indexRandomDocs(indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) + .get(); + SnapshotInfo sourceSnapshotInfo = createSnapshotResponse.getSnapshotInfo(); + assertThat(sourceSnapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(sourceSnapshotInfo.successfulShards(), greaterThan(0)); + assertThat(sourceSnapshotInfo.successfulShards(), equalTo(sourceSnapshotInfo.totalShards())); + assertThat(sourceSnapshotInfo.snapshotId().getName(), equalTo(snapshotName1)); + + // Validate that the snapshot was created + final BlobStoreRepository repository = (BlobStoreRepository) internalCluster().getCurrentClusterManagerNodeInstance( + RepositoriesService.class + ).repository(snapshotRepoName); + PlainActionFuture repositoryDataPlainActionFuture = new PlainActionFuture<>(); + repository.getRepositoryData(repositoryDataPlainActionFuture); + + RepositoryData repositoryData = repositoryDataPlainActionFuture.get(); + + assertTrue(repositoryData.getSnapshotIds().contains(sourceSnapshotInfo.snapshotId())); + + createIndex(indexName3, getRemoteStoreBackedIndexSettings()); + indexRandomDocs(indexName3, 10); + ensureGreen(indexName3); + + AcknowledgedResponse response = client().admin() + .cluster() + .prepareCloneSnapshot(snapshotRepoName, snapshotName1, "test_clone_snapshot1") + .setIndices("*") + .get(); + assertTrue(response.isAcknowledged()); + awaitClusterManagerFinishRepoOperations(); + + // Validate that snapshot is present in repository data + PlainActionFuture repositoryDataPlainActionFutureClone = new PlainActionFuture<>(); + repository.getRepositoryData(repositoryDataPlainActionFutureClone); + + repositoryData = repositoryDataPlainActionFutureClone.get(); + assertEquals(repositoryData.getSnapshotIds().size(), 2); + boolean foundCloneInRepoData = false; + SnapshotId cloneSnapshotId = null; + for (SnapshotId snapshotId : repositoryData.getSnapshotIds()) { + if (snapshotId.getName().equals("test_clone_snapshot1")) { + foundCloneInRepoData = true; + cloneSnapshotId = snapshotId; + } + } + final SnapshotId cloneSnapshotIdFinal = cloneSnapshotId; + SnapshotInfo cloneSnapshotInfo = PlainActionFuture.get( + f -> repository.threadPool().generic().execute(ActionRunnable.supply(f, () -> repository.getSnapshotInfo(cloneSnapshotIdFinal))) + ); + + assertTrue(foundCloneInRepoData); + + assertThat(cloneSnapshotInfo.getPinnedTimestamp(), equalTo(sourceSnapshotInfo.getPinnedTimestamp())); + for (String index : sourceSnapshotInfo.indices()) { + assertTrue(cloneSnapshotInfo.indices().contains(index)); + + } + assertThat(cloneSnapshotInfo.totalShards(), equalTo(sourceSnapshotInfo.totalShards())); + } + + public void testCloneShallowCopyAfterDisablingV2() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + final Path remoteStoreRepoPath = randomRepoPath(); + internalCluster().startClusterManagerOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String indexName3 = "testindex3"; + String snapshotRepoName = "test-clone-snapshot-repo"; + String sourceSnapshotV2 = "test-source-snapshot-v2"; + String sourceSnapshotV1 = "test-source-snapshot-v1"; + String cloneSnapshotV2 = "test-clone-snapshot-v2"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Client client = client(); + + assertAcked( + client.admin() + .cluster() + .preparePutRepository(snapshotRepoName) + .setType(FsRepository.TYPE) + .setSettings( + Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) + ) + ); + + createIndex(indexName1, getRemoteStoreBackedIndexSettings()); + createIndex(indexName2, getRemoteStoreBackedIndexSettings()); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexRandomDocs(indexName1, numDocsInIndex1); + indexRandomDocs(indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + // create source snapshot which is v2 + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, sourceSnapshotV2) + .setWaitForCompletion(true) + .get(); + SnapshotInfo sourceSnapshotInfo = createSnapshotResponse.getSnapshotInfo(); + assertThat(sourceSnapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(sourceSnapshotInfo.successfulShards(), greaterThan(0)); + assertThat(sourceSnapshotInfo.successfulShards(), equalTo(sourceSnapshotInfo.totalShards())); + assertThat(sourceSnapshotInfo.snapshotId().getName(), equalTo(sourceSnapshotV2)); + + // Validate that the snapshot was created + final BlobStoreRepository repository = (BlobStoreRepository) internalCluster().getCurrentClusterManagerNodeInstance( + RepositoriesService.class + ).repository(snapshotRepoName); + PlainActionFuture repositoryDataPlainActionFuture = new PlainActionFuture<>(); + repository.getRepositoryData(repositoryDataPlainActionFuture); + + RepositoryData repositoryData = repositoryDataPlainActionFuture.get(); + + assertTrue(repositoryData.getSnapshotIds().contains(sourceSnapshotInfo.snapshotId())); + + createIndex(indexName3, getRemoteStoreBackedIndexSettings()); + indexRandomDocs(indexName3, 10); + ensureGreen(indexName3); + + // disable snapshot v2 in repo + assertAcked( + client.admin() + .cluster() + .preparePutRepository(snapshotRepoName) + .setType(FsRepository.TYPE) + .setSettings( + Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), false) + ) + ); + + // validate that the created snapshot is v1 + CreateSnapshotResponse createSnapshotResponseV1 = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, sourceSnapshotV1) + .setWaitForCompletion(true) + .get(); + SnapshotInfo sourceSnapshotInfoV1 = createSnapshotResponseV1.getSnapshotInfo(); + assertThat(sourceSnapshotInfoV1.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(sourceSnapshotInfoV1.successfulShards(), greaterThan(0)); + assertThat(sourceSnapshotInfoV1.successfulShards(), equalTo(sourceSnapshotInfoV1.totalShards())); + assertThat(sourceSnapshotInfoV1.getPinnedTimestamp(), equalTo(0L)); + + // Validate that snapshot is present in repository data + PlainActionFuture repositoryDataV1PlainActionFuture = new PlainActionFuture<>(); + BlobStoreRepository repositoryV1 = (BlobStoreRepository) internalCluster().getCurrentClusterManagerNodeInstance( + RepositoriesService.class + ).repository(snapshotRepoName); + repositoryV1.getRepositoryData(repositoryDataV1PlainActionFuture); + + repositoryData = repositoryDataV1PlainActionFuture.get(); + + assertTrue(repositoryData.getSnapshotIds().contains(sourceSnapshotInfoV1.snapshotId())); + assertEquals(repositoryData.getSnapshotIds().size(), 2); + + // clone should get created for v2 snapshot + AcknowledgedResponse response = client().admin() + .cluster() + .prepareCloneSnapshot(snapshotRepoName, sourceSnapshotV2, cloneSnapshotV2) + .setIndices("*") + .get(); + assertTrue(response.isAcknowledged()); + awaitClusterManagerFinishRepoOperations(); + + // Validate that snapshot is present in repository data + PlainActionFuture repositoryDataCloneV2PlainActionFuture = new PlainActionFuture<>(); + BlobStoreRepository repositoryCloneV2 = (BlobStoreRepository) internalCluster().getCurrentClusterManagerNodeInstance( + RepositoriesService.class + ).repository(snapshotRepoName); + repositoryCloneV2.getRepositoryData(repositoryDataCloneV2PlainActionFuture); + + repositoryData = repositoryDataCloneV2PlainActionFuture.get(); + + assertEquals(repositoryData.getSnapshotIds().size(), 3); + boolean foundCloneInRepoData = false; + SnapshotId cloneSnapshotId = null; + for (SnapshotId snapshotId : repositoryData.getSnapshotIds()) { + if (snapshotId.getName().equals(cloneSnapshotV2)) { + foundCloneInRepoData = true; + cloneSnapshotId = snapshotId; + } + } + final SnapshotId cloneSnapshotIdFinal = cloneSnapshotId; + SnapshotInfo cloneSnapshotInfo = PlainActionFuture.get( + f -> repository.threadPool().generic().execute(ActionRunnable.supply(f, () -> repository.getSnapshotInfo(cloneSnapshotIdFinal))) + ); + + assertTrue(foundCloneInRepoData); + // pinned timestamp value in clone snapshot v2 matches source snapshot v2 + assertThat(cloneSnapshotInfo.getPinnedTimestamp(), equalTo(sourceSnapshotInfo.getPinnedTimestamp())); + for (String index : sourceSnapshotInfo.indices()) { + assertTrue(cloneSnapshotInfo.indices().contains(index)); + + } + assertThat(cloneSnapshotInfo.totalShards(), equalTo(sourceSnapshotInfo.totalShards())); + + } + + public void testRestoreFromClone() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + final Path remoteStoreRepoPath = randomRepoPath(); + + internalCluster().startClusterManagerOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + + String snapshotRepoName = "test-clone-snapshot-repo"; + String sourceSnapshot = "test-source-snapshot"; + String cloneSnapshot = "test-clone-snapshot"; + + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + String restoredIndexName1 = indexName1 + "-restored"; + + Client client = client(); + + assertAcked( + client.admin() + .cluster() + .preparePutRepository(snapshotRepoName) + .setType(FsRepository.TYPE) + .setSettings( + Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) + ) + ); + + createIndex(indexName1, getRemoteStoreBackedIndexSettings()); + createIndex(indexName2, getRemoteStoreBackedIndexSettings()); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexRandomDocs(indexName1, numDocsInIndex1); + indexRandomDocs(indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + logger.info("--> create source snapshot"); + + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, sourceSnapshot) + .setWaitForCompletion(true) + .get(); + SnapshotInfo sourceSnapshotInfo = createSnapshotResponse.getSnapshotInfo(); + assertThat(sourceSnapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(sourceSnapshotInfo.successfulShards(), greaterThan(0)); + assertThat(sourceSnapshotInfo.successfulShards(), equalTo(sourceSnapshotInfo.totalShards())); + assertThat(sourceSnapshotInfo.snapshotId().getName(), equalTo(sourceSnapshot)); + + AcknowledgedResponse response = client().admin() + .cluster() + .prepareCloneSnapshot(snapshotRepoName, sourceSnapshot, cloneSnapshot) + .setIndices("*") + .get(); + assertTrue(response.isAcknowledged()); + + DeleteResponse deleteResponse = client().prepareDelete(indexName1, "0").execute().actionGet(); + assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); + ensureGreen(indexName1); + + deleteResponse = client().prepareDelete(indexName1, "1").execute().actionGet(); + assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); + ensureGreen(indexName1); + + // delete the source snapshot + AcknowledgedResponse deleteSnapshotResponse = internalCluster().clusterManagerClient() + .admin() + .cluster() + .prepareDeleteSnapshot(snapshotRepoName, sourceSnapshot) + .get(); + assertAcked(deleteSnapshotResponse); + + deleteResponse = client().prepareDelete(indexName1, "2").execute().actionGet(); + assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); + ensureGreen(indexName1); + ensureGreen(indexName1); + + // restore from clone + RestoreSnapshotResponse restoreSnapshotResponse1 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, cloneSnapshot) + .setWaitForCompletion(true) + .setIndices(indexName1) + .setRenamePattern(indexName1) + .setRenameReplacement(restoredIndexName1) + .get(); + + assertEquals(restoreSnapshotResponse1.status(), RestStatus.OK); + ensureGreen(restoredIndexName1, indexName2); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1); + assertDocsPresentInIndex(client, indexName2, numDocsInIndex2); + } + + private void assertDocsPresentInIndex(Client client, String indexName, int numOfDocs) { + for (int i = 0; i < numOfDocs; i++) { + String id = Integer.toString(i); + logger.info("checking for index " + indexName + " with docId" + id); + assertTrue("doc with id" + id + " is not present for index " + indexName, client.prepareGet(indexName, id).get().isExists()); + } + } + + private Settings snapshotV2Settings(Path remoteStoreRepoPath) { + String REMOTE_REPO_NAME = "remote-store-repo-name"; + Settings settings = Settings.builder() + .put(remoteStoreClusterSettings(REMOTE_REPO_NAME, remoteStoreRepoPath)) + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) + .build(); + return settings; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java index 54ef372609390..6da26d9a2da45 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java @@ -95,7 +95,7 @@ protected void clusterManagerOperation( ClusterState state, ActionListener listener ) { - snapshotsService.cloneSnapshot(request, ActionListener.map(listener, v -> new AcknowledgedResponse(true))); + snapshotsService.executeClone(request, ActionListener.map(listener, v -> new AcknowledgedResponse(true))); } @Override diff --git a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java index c7820c2c9a365..9cffc7051d756 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java @@ -39,8 +39,8 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.io.IOUtils; +import org.opensearch.gateway.remote.ClusterMetadataManifest; import java.io.Closeable; import java.io.IOException; @@ -53,7 +53,7 @@ import java.util.Set; import static org.opensearch.cluster.coordination.Coordinator.ZEN1_BWC_TERM; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled; /** @@ -81,7 +81,7 @@ public class CoordinationState { private VotingConfiguration lastPublishedConfiguration; private VoteCollection publishVotes; private final boolean isRemoteStateEnabled; - private final boolean isRemotePublicationEnabled; + private boolean isRemotePublicationEnabled; public CoordinationState( DiscoveryNode localNode, @@ -105,8 +105,9 @@ public CoordinationState( .getLastAcceptedConfiguration(); this.publishVotes = new VoteCollection(); this.isRemoteStateEnabled = isRemoteStoreClusterStateEnabled(settings); + // ToDo: revisit this check while making the setting dynamic this.isRemotePublicationEnabled = isRemoteStateEnabled - && FeatureFlags.isEnabled(REMOTE_PUBLICATION_EXPERIMENTAL) + && REMOTE_PUBLICATION_SETTING.get(settings) && localNode.isRemoteStatePublicationEnabled(); } @@ -460,6 +461,9 @@ public PublishResponse handlePublishRequest(PublishRequest publishRequest) { clusterState.term() ); persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL).setLastAcceptedState(clusterState); + if (shouldUpdateRemotePersistedState(publishRequest)) { + updateRemotePersistedStateOnPublishRequest(publishRequest); + } assert getLastAcceptedState() == clusterState; return new PublishResponse(clusterState.term(), clusterState.version()); @@ -572,6 +576,9 @@ public void handleCommit(ApplyCommitRequest applyCommit) { ); persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL).markLastAcceptedStateAsCommitted(); + if (shouldCommitRemotePersistedState()) { + persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).markLastAcceptedStateAsCommitted(); + } assert getLastCommittedConfiguration().equals(getLastAcceptedConfiguration()); } @@ -617,6 +624,33 @@ public void close() throws IOException { IOUtils.close(persistedStateRegistry); } + private boolean shouldUpdateRemotePersistedState(PublishRequest publishRequest) { + return persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE) != null + && publishRequest.getAcceptedState().getNodes().isLocalNodeElectedClusterManager() == false; + } + + private void updateRemotePersistedStateOnPublishRequest(PublishRequest publishRequest) { + if (publishRequest instanceof RemoteStatePublishRequest) { + persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).setLastAcceptedState(publishRequest.getAcceptedState()); + persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE) + .setLastAcceptedManifest(((RemoteStatePublishRequest) publishRequest).getAcceptedManifest()); + } else { + // We will end up here if PublishRequest was sent not using Remote Store even with remote persisted state on this node + persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).setLastAcceptedState(null); + persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).setLastAcceptedManifest(null); + } + } + + private boolean shouldCommitRemotePersistedState() { + return persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE) != null + && persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL) + .getLastAcceptedState() + .getNodes() + .isLocalNodeElectedClusterManager() == false + && persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState() != null + && persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedManifest() != null; + } + /** * Pluggable persistence layer for {@link CoordinationState}. * @@ -654,6 +688,22 @@ public interface PersistedState extends Closeable { */ PersistedStateStats getStats(); + /** + * Returns the last accepted {@link ClusterMetadataManifest}. + * + * @return The last accepted {@link ClusterMetadataManifest}, or null if no manifest + * has been accepted yet. + */ + default ClusterMetadataManifest getLastAcceptedManifest() { + // return null by default, this method needs to be overridden wherever required + return null; + } + + /** + * Sets the last accepted {@link ClusterMetadataManifest}. + */ + default void setLastAcceptedManifest(ClusterMetadataManifest manifest) {} + /** * Marks the last accepted cluster state as committed. * After a successful call to this method, {@link #getLastAcceptedState()} should return the last cluster state that was set, @@ -662,14 +712,7 @@ public interface PersistedState extends Closeable { */ default void markLastAcceptedStateAsCommitted() { final ClusterState lastAcceptedState = getLastAcceptedState(); - Metadata.Builder metadataBuilder = null; - if (lastAcceptedState.getLastAcceptedConfiguration().equals(lastAcceptedState.getLastCommittedConfiguration()) == false) { - final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder(lastAcceptedState.coordinationMetadata()) - .lastCommittedConfiguration(lastAcceptedState.getLastAcceptedConfiguration()) - .build(); - metadataBuilder = Metadata.builder(lastAcceptedState.metadata()); - metadataBuilder.coordinationMetadata(coordinationMetadata); - } + Metadata.Builder metadataBuilder = commitVotingConfiguration(lastAcceptedState); // if we receive a commit from a Zen1 cluster-manager that has not recovered its state yet, // the cluster uuid might not been known yet. assert lastAcceptedState.metadata().clusterUUID().equals(Metadata.UNKNOWN_CLUSTER_UUID) == false @@ -694,6 +737,18 @@ default void markLastAcceptedStateAsCommitted() { } } + default Metadata.Builder commitVotingConfiguration(ClusterState lastAcceptedState) { + Metadata.Builder metadataBuilder = null; + if (lastAcceptedState.getLastAcceptedConfiguration().equals(lastAcceptedState.getLastCommittedConfiguration()) == false) { + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder(lastAcceptedState.coordinationMetadata()) + .lastCommittedConfiguration(lastAcceptedState.getLastAcceptedConfiguration()) + .build(); + metadataBuilder = Metadata.builder(lastAcceptedState.metadata()); + metadataBuilder.coordinationMetadata(coordinationMetadata); + } + return metadataBuilder; + } + default void close() throws IOException {} } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java index ca36011b3a0e9..cdf331b7bb577 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java @@ -199,7 +199,7 @@ private PublishWithJoinResponse handleIncomingPublishRequest(BytesTransportReque } fullClusterStateReceivedCount.incrementAndGet(); logger.debug("received full cluster state version [{}] with size [{}]", incomingState.version(), request.bytes().length()); - final PublishWithJoinResponse response = acceptState(incomingState); + final PublishWithJoinResponse response = acceptState(incomingState, null); lastSeenClusterState.set(incomingState); return response; } else { @@ -230,7 +230,7 @@ private PublishWithJoinResponse handleIncomingPublishRequest(BytesTransportReque incomingState.stateUUID(), request.bytes().length() ); - final PublishWithJoinResponse response = acceptState(incomingState); + final PublishWithJoinResponse response = acceptState(incomingState, null); lastSeenClusterState.compareAndSet(lastSeen, incomingState); return response; } @@ -281,7 +281,7 @@ PublishWithJoinResponse handleIncomingRemotePublishRequest(RemotePublishRequest true ); fullClusterStateReceivedCount.incrementAndGet(); - final PublishWithJoinResponse response = acceptState(clusterState); + final PublishWithJoinResponse response = acceptState(clusterState, manifest); lastSeenClusterState.set(clusterState); return response; } else { @@ -300,7 +300,7 @@ PublishWithJoinResponse handleIncomingRemotePublishRequest(RemotePublishRequest transportService.getLocalNode().getId() ); compatibleClusterStateDiffReceivedCount.incrementAndGet(); - final PublishWithJoinResponse response = acceptState(clusterState); + final PublishWithJoinResponse response = acceptState(clusterState, manifest); lastSeenClusterState.compareAndSet(lastSeen, clusterState); return response; } @@ -314,7 +314,7 @@ PublishWithJoinResponse handleIncomingRemotePublishRequest(RemotePublishRequest } } - private PublishWithJoinResponse acceptState(ClusterState incomingState) { + private PublishWithJoinResponse acceptState(ClusterState incomingState, ClusterMetadataManifest manifest) { // if the state is coming from the current node, use original request instead (see currentPublishRequestToSelf for explanation) if (transportService.getLocalNode().equals(incomingState.nodes().getClusterManagerNode())) { final PublishRequest publishRequest = currentPublishRequestToSelf.get(); @@ -324,6 +324,9 @@ private PublishWithJoinResponse acceptState(ClusterState incomingState) { return handlePublishRequest.apply(publishRequest); } } + if (manifest != null) { + return handlePublishRequest.apply(new RemoteStatePublishRequest(incomingState, manifest)); + } return handlePublishRequest.apply(new PublishRequest(incomingState)); } @@ -539,7 +542,7 @@ public String executor() { } public void sendClusterState(DiscoveryNode destination, ActionListener listener) { - logger.info("sending cluster state over transport to node: {}", destination.getName()); + logger.debug("sending cluster state over transport to node: {}", destination.getName()); if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) { logger.trace("sending full cluster state version [{}] to [{}]", newState.version(), destination); sendFullClusterState(destination, listener); @@ -639,7 +642,7 @@ public class RemotePublicationContext extends PublicationContext { @Override public void sendClusterState(final DiscoveryNode destination, final ActionListener listener) { try { - logger.info("sending remote cluster state to node: {}", destination.getName()); + logger.debug("sending remote cluster state to node: {}", destination.getName()); final String manifestFileName = ((RemotePersistedState) persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE)) .getLastUploadedManifestFile(); final RemotePublishRequest remotePublishRequest = new RemotePublishRequest( diff --git a/server/src/main/java/org/opensearch/cluster/coordination/RemoteStatePublishRequest.java b/server/src/main/java/org/opensearch/cluster/coordination/RemoteStatePublishRequest.java new file mode 100644 index 0000000000000..5667e6d67d062 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/coordination/RemoteStatePublishRequest.java @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.coordination; + +import org.opensearch.cluster.ClusterState; +import org.opensearch.gateway.remote.ClusterMetadataManifest; + +import java.util.Objects; + +/** + * PublishRequest created by downloading the accepted {@link ClusterState} from Remote Store, using the published {@link ClusterMetadataManifest} + * + * @opensearch.internal + */ +public class RemoteStatePublishRequest extends PublishRequest { + private final ClusterMetadataManifest manifest; + + public RemoteStatePublishRequest(ClusterState acceptedState, ClusterMetadataManifest acceptedManifest) { + super(acceptedState); + this.manifest = acceptedManifest; + } + + public ClusterMetadataManifest getAcceptedManifest() { + return manifest; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + RemoteStatePublishRequest that = (RemoteStatePublishRequest) o; + return Objects.equals(manifest, that.manifest); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), manifest); + } + + @Override + public String toString() { + return "RemoteStatePublishRequest{" + super.toString() + "manifest=" + manifest + "} "; + } +} diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index e35aa42563d4d..09832e2b41b6d 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -734,6 +734,7 @@ public void apply(Settings value, Settings current, Settings previous) { // Remote cluster state settings RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING, RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING, + RemoteClusterStateService.REMOTE_PUBLICATION_SETTING, INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, diff --git a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java index 9c7684923d06c..c8d00f65bda10 100644 --- a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java @@ -37,7 +37,6 @@ protected FeatureFlagSettings( FeatureFlags.TIERED_REMOTE_INDEX_SETTING, FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING, FeatureFlags.PLUGGABLE_CACHE_SETTING, - FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL_SETTING, FeatureFlags.STAR_TREE_INDEX_SETTING, FeatureFlags.APPLICATION_BASED_CONFIGURATION_TEMPLATES_SETTING, FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING diff --git a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java index 0fd5edde2b94c..49ecbb0a7069d 100644 --- a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java +++ b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java @@ -67,11 +67,6 @@ public class FeatureFlags { */ public static final String PLUGGABLE_CACHE = "opensearch.experimental.feature.pluggable.caching.enabled"; - /** - * Gates the functionality of remote routing table. - */ - public static final String REMOTE_PUBLICATION_EXPERIMENTAL = "opensearch.experimental.feature.remote_store.publication.enabled"; - /** * Gates the functionality of background task execution. */ @@ -101,12 +96,6 @@ public class FeatureFlags { public static final Setting PLUGGABLE_CACHE_SETTING = Setting.boolSetting(PLUGGABLE_CACHE, false, Property.NodeScope); - public static final Setting REMOTE_PUBLICATION_EXPERIMENTAL_SETTING = Setting.boolSetting( - REMOTE_PUBLICATION_EXPERIMENTAL, - false, - Property.NodeScope - ); - public static final Setting READER_WRITER_SPLIT_EXPERIMENTAL_SETTING = Setting.boolSetting( READER_WRITER_SPLIT_EXPERIMENTAL, false, @@ -148,7 +137,6 @@ public class FeatureFlags { DATETIME_FORMATTER_CACHING_SETTING, TIERED_REMOTE_INDEX_SETTING, PLUGGABLE_CACHE_SETTING, - REMOTE_PUBLICATION_EXPERIMENTAL_SETTING, STAR_TREE_INDEX_SETTING, APPLICATION_BASED_CONFIGURATION_TEMPLATES_SETTING, READER_WRITER_SPLIT_EXPERIMENTAL_SETTING diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index bd56c9e1757c6..b3836edcd7d6c 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -697,8 +697,18 @@ public String getLastUploadedManifestFile() { return lastUploadedManifestFile; } + @Override + public ClusterMetadataManifest getLastAcceptedManifest() { + return lastAcceptedManifest; + } + @Override public void setLastAcceptedState(ClusterState clusterState) { + // for non leader node, update the lastAcceptedClusterState + if (clusterState == null || clusterState.getNodes().isLocalNodeElectedClusterManager() == false) { + lastAcceptedState = clusterState; + return; + } try { final RemoteClusterStateManifestInfo manifestDetails; // Decide the codec version @@ -735,7 +745,7 @@ assert verifyManifestAndClusterState(lastAcceptedManifest, lastAcceptedState) == } assert verifyManifestAndClusterState(manifestDetails.getClusterMetadataManifest(), clusterState) == true : "Manifest and ClusterState are not in sync"; - lastAcceptedManifest = manifestDetails.getClusterMetadataManifest(); + setLastAcceptedManifest(manifestDetails.getClusterMetadataManifest()); lastAcceptedState = clusterState; lastUploadedManifestFile = manifestDetails.getManifestFileName(); } catch (Exception e) { @@ -744,6 +754,11 @@ assert verifyManifestAndClusterState(manifestDetails.getClusterMetadataManifest( } } + @Override + public void setLastAcceptedManifest(ClusterMetadataManifest manifest) { + this.lastAcceptedManifest = manifest; + } + @Override public PersistedStateStats getStats() { return remoteClusterStateService.getUploadStats(); @@ -767,7 +782,7 @@ private boolean shouldWriteFullClusterState(ClusterState clusterState, int codec assert lastAcceptedManifest == null || lastAcceptedManifest.getCodecVersion() <= codecVersion; if (lastAcceptedState == null || lastAcceptedManifest == null - || lastAcceptedState.term() != clusterState.term() + || (remoteClusterStateService.isRemotePublicationEnabled() == false && lastAcceptedState.term() != clusterState.term()) || lastAcceptedManifest.getOpensearchVersion() != Version.CURRENT || lastAcceptedManifest.getCodecVersion() != codecVersion) { return true; @@ -781,19 +796,32 @@ public void markLastAcceptedStateAsCommitted() { assert lastAcceptedState != null : "Last accepted state is not present"; assert lastAcceptedManifest != null : "Last accepted manifest is not present"; ClusterState clusterState = lastAcceptedState; - if (lastAcceptedState.metadata().clusterUUID().equals(Metadata.UNKNOWN_CLUSTER_UUID) == false - && lastAcceptedState.metadata().clusterUUIDCommitted() == false) { + boolean shouldCommitVotingConfig = shouldCommitVotingConfig(); + boolean isClusterUUIDUnknown = lastAcceptedState.metadata().clusterUUID().equals(Metadata.UNKNOWN_CLUSTER_UUID); + boolean isClusterUUIDCommitted = lastAcceptedState.metadata().clusterUUIDCommitted(); + if (shouldCommitVotingConfig || (isClusterUUIDUnknown == false && isClusterUUIDCommitted == false)) { Metadata.Builder metadataBuilder = Metadata.builder(lastAcceptedState.metadata()); - metadataBuilder.clusterUUIDCommitted(true); + if (shouldCommitVotingConfig) { + metadataBuilder = commitVotingConfiguration(lastAcceptedState); + } + if (isClusterUUIDUnknown == false && isClusterUUIDCommitted == false) { + metadataBuilder.clusterUUIDCommitted(true); + } clusterState = ClusterState.builder(lastAcceptedState).metadata(metadataBuilder).build(); } - final RemoteClusterStateManifestInfo committedManifestDetails = remoteClusterStateService.markLastStateAsCommitted( - clusterState, - lastAcceptedManifest - ); - lastAcceptedManifest = committedManifestDetails.getClusterMetadataManifest(); + if (clusterState.getNodes().isLocalNodeElectedClusterManager()) { + final RemoteClusterStateManifestInfo committedManifestDetails = remoteClusterStateService.markLastStateAsCommitted( + clusterState, + lastAcceptedManifest, + shouldCommitVotingConfig + ); + assert committedManifestDetails != null; + setLastAcceptedManifest(committedManifestDetails.getClusterMetadataManifest()); + lastUploadedManifestFile = committedManifestDetails.getManifestFileName(); + } else { + setLastAcceptedManifest(ClusterMetadataManifest.builder(lastAcceptedManifest).committed(true).build()); + } lastAcceptedState = clusterState; - lastUploadedManifestFile = committedManifestDetails.getManifestFileName(); } catch (Exception e) { handleExceptionOnWrite(e); } @@ -804,6 +832,10 @@ public void close() throws IOException { remoteClusterStateService.close(); } + private boolean shouldCommitVotingConfig() { + return !lastAcceptedState.getLastAcceptedConfiguration().equals(lastAcceptedState.getLastCommittedConfiguration()); + } + private void handleExceptionOnWrite(Exception e) { throw ExceptionsHelper.convertToRuntime(e); } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index fe34b68702c41..e504c5abb46d3 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -40,7 +40,6 @@ import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; @@ -92,7 +91,6 @@ import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; import static org.opensearch.cluster.ClusterState.CUSTOM_VALUE_SERIALIZER; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; import static org.opensearch.gateway.PersistedClusterStateService.SLOW_WRITE_LOGGING_THRESHOLD; import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V2; import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V3; @@ -123,6 +121,18 @@ public class RemoteClusterStateService implements Closeable { private static final Logger logger = LogManager.getLogger(RemoteClusterStateService.class); + /** + * Gates the functionality of remote publication. + */ + public static final String REMOTE_PUBLICATION_SETTING_KEY = "cluster.remote_store.publication.enabled"; + + public static final Setting REMOTE_PUBLICATION_SETTING = Setting.boolSetting( + REMOTE_PUBLICATION_SETTING_KEY, + false, + Property.NodeScope, + Property.Final + ); + /** * Used to specify if cluster state metadata should be published to remote store */ @@ -260,7 +270,7 @@ public RemoteClusterStateService( this.remoteStateStats = new RemotePersistenceStats(); this.namedWriteableRegistry = namedWriteableRegistry; this.indexMetadataUploadListeners = indexMetadataUploadListeners; - this.isPublicationEnabled = FeatureFlags.isEnabled(REMOTE_PUBLICATION_EXPERIMENTAL) + this.isPublicationEnabled = REMOTE_PUBLICATION_SETTING.get(settings) && RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled(settings) && RemoteStoreNodeAttribute.isRemoteRoutingTableEnabled(settings); this.remotePathPrefix = CLUSTER_REMOTE_STORE_STATE_PATH_PREFIX.get(settings); @@ -353,12 +363,20 @@ public RemoteClusterStateManifestInfo writeFullMetadata(ClusterState clusterStat * * @return {@link RemoteClusterStateManifestInfo} object containing uploaded manifest detail */ - @Nullable public RemoteClusterStateManifestInfo writeIncrementalMetadata( ClusterState previousClusterState, ClusterState clusterState, ClusterMetadataManifest previousManifest ) throws IOException { + if (previousClusterState == null) { + throw new IllegalArgumentException("previousClusterState cannot be null"); + } + if (clusterState == null) { + throw new IllegalArgumentException("clusterState cannot be null"); + } + if (previousManifest == null) { + throw new IllegalArgumentException("previousManifest cannot be null"); + } logger.trace("WRITING INCREMENTAL STATE"); final long startTimeNanos = relativeTimeNanosSupplier.getAsLong(); @@ -366,7 +384,6 @@ public RemoteClusterStateManifestInfo writeIncrementalMetadata( logger.error("Local node is not elected cluster manager. Exiting"); return null; } - assert previousClusterState.metadata().coordinationMetadata().term() == clusterState.metadata().coordinationMetadata().term(); boolean firstUploadForSplitGlobalMetadata = !previousManifest.hasMetadataAttributesFiles(); @@ -939,18 +956,41 @@ public RemoteClusterStateCleanupManager getCleanupManager() { } @Nullable - public RemoteClusterStateManifestInfo markLastStateAsCommitted(ClusterState clusterState, ClusterMetadataManifest previousManifest) - throws IOException { + public RemoteClusterStateManifestInfo markLastStateAsCommitted( + ClusterState clusterState, + ClusterMetadataManifest previousManifest, + boolean commitVotingConfig + ) throws IOException { assert clusterState != null : "Last accepted cluster state is not set"; if (clusterState.nodes().isLocalNodeElectedClusterManager() == false) { logger.error("Local node is not elected cluster manager. Exiting"); return null; } assert previousManifest != null : "Last cluster metadata manifest is not set"; + UploadedMetadataAttribute uploadedCoordinationMetadata = previousManifest.getCoordinationMetadata(); + if (commitVotingConfig) { + // update the coordination metadata if voting config is committed + uploadedCoordinationMetadata = writeMetadataInParallel( + clusterState, + emptyList(), + emptyMap(), + emptyMap(), + true, + false, + false, + false, + false, + false, + emptyMap(), + false, + emptyList(), + null + ).uploadedCoordinationMetadata; + } UploadedMetadataResults uploadedMetadataResults = new UploadedMetadataResults( previousManifest.getIndices(), previousManifest.getCustomMetadataMap(), - previousManifest.getCoordinationMetadata(), + uploadedCoordinationMetadata, previousManifest.getSettingsMetadata(), previousManifest.getTemplatesMetadata(), previousManifest.getTransientSettingsMetadata(), @@ -1752,6 +1792,10 @@ public String getLastKnownUUIDFromRemote(String clusterName) { } } + public boolean isRemotePublicationEnabled() { + return this.isPublicationEnabled; + } + public void setRemoteStateReadTimeout(TimeValue remoteStateReadTimeout) { this.remoteStateReadTimeout = remoteStateReadTimeout; } diff --git a/server/src/main/java/org/opensearch/index/query/AbstractGeometryQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/AbstractGeometryQueryBuilder.java index 9fb857e33bfee..3823b524df6fe 100644 --- a/server/src/main/java/org/opensearch/index/query/AbstractGeometryQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/AbstractGeometryQueryBuilder.java @@ -67,7 +67,9 @@ * * @opensearch.internal */ -public abstract class AbstractGeometryQueryBuilder> extends AbstractQueryBuilder { +public abstract class AbstractGeometryQueryBuilder> extends AbstractQueryBuilder + implements + WithFieldName { public static final String DEFAULT_SHAPE_INDEX_NAME = "shapes"; public static final String DEFAULT_SHAPE_FIELD_NAME = "shape"; @@ -218,6 +220,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { /** * @return the name of the field that will be queried */ + @Override public String fieldName() { return fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/BaseTermQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/BaseTermQueryBuilder.java index c4d9437a60c75..deb36a26e4c86 100644 --- a/server/src/main/java/org/opensearch/index/query/BaseTermQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/BaseTermQueryBuilder.java @@ -47,7 +47,7 @@ * * @opensearch.internal */ -public abstract class BaseTermQueryBuilder> extends AbstractQueryBuilder { +public abstract class BaseTermQueryBuilder> extends AbstractQueryBuilder implements WithFieldName { public static final ParseField VALUE_FIELD = new ParseField("value"); @@ -153,6 +153,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } /** Returns the field name used in this query. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/CommonTermsQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/CommonTermsQueryBuilder.java index 652cae86da0dc..24b10851cbe10 100644 --- a/server/src/main/java/org/opensearch/index/query/CommonTermsQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/CommonTermsQueryBuilder.java @@ -67,7 +67,7 @@ * @opensearch.internal */ @Deprecated -public class CommonTermsQueryBuilder extends AbstractQueryBuilder { +public class CommonTermsQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String COMMON_TERMS_QUERY_DEPRECATION_MSG = "[match] query which can efficiently " + "skip blocks of documents if the total number of hits is not tracked"; @@ -152,6 +152,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { out.writeFloat(cutoffFrequency); } + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/DistanceFeatureQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/DistanceFeatureQueryBuilder.java index 1d9f0479c6b17..e4f3d8556158a 100644 --- a/server/src/main/java/org/opensearch/index/query/DistanceFeatureQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/DistanceFeatureQueryBuilder.java @@ -57,7 +57,7 @@ * * @opensearch.internal */ -public class DistanceFeatureQueryBuilder extends AbstractQueryBuilder { +public class DistanceFeatureQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "distance_feature"; private static final ParseField FIELD_FIELD = new ParseField("field"); @@ -136,7 +136,8 @@ protected Query doToQuery(QueryShardContext context) throws IOException { return fieldType.distanceFeatureQuery(origin.origin(), pivot, 1.0f, context); } - String fieldName() { + @Override + public String fieldName() { return field; } diff --git a/server/src/main/java/org/opensearch/index/query/ExistsQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/ExistsQueryBuilder.java index 6ae40fe1b1e64..f6b24e98f0f71 100644 --- a/server/src/main/java/org/opensearch/index/query/ExistsQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/ExistsQueryBuilder.java @@ -59,7 +59,7 @@ * * @opensearch.internal */ -public class ExistsQueryBuilder extends AbstractQueryBuilder { +public class ExistsQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "exists"; public static final ParseField FIELD_FIELD = new ParseField("field"); @@ -89,6 +89,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { /** * @return the field name that has to exist for this query to match */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/FieldMaskingSpanQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/FieldMaskingSpanQueryBuilder.java index 4e73d87b07b7a..7846098b55071 100644 --- a/server/src/main/java/org/opensearch/index/query/FieldMaskingSpanQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/FieldMaskingSpanQueryBuilder.java @@ -53,7 +53,10 @@ * * @opensearch.internal */ -public class FieldMaskingSpanQueryBuilder extends AbstractQueryBuilder implements SpanQueryBuilder { +public class FieldMaskingSpanQueryBuilder extends AbstractQueryBuilder + implements + SpanQueryBuilder, + WithFieldName { public static final String NAME = "span_field_masking"; public static final ParseField SPAN_FIELD_MASKING_FIELD = new ParseField(NAME, "field_masking_span"); @@ -100,6 +103,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { /** * @return the field name for this query */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/GeoBoundingBoxQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/GeoBoundingBoxQueryBuilder.java index 1fade8601e2a6..52e96159cf06e 100644 --- a/server/src/main/java/org/opensearch/index/query/GeoBoundingBoxQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/GeoBoundingBoxQueryBuilder.java @@ -66,7 +66,7 @@ * * @opensearch.internal * */ -public class GeoBoundingBoxQueryBuilder extends AbstractQueryBuilder { +public class GeoBoundingBoxQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "geo_bounding_box"; /** Default type for executing this query (memory as of this writing). */ @@ -263,6 +263,7 @@ public GeoExecType type() { } /** Returns the name of the field to base the bounding box computation on. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/GeoDistanceQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/GeoDistanceQueryBuilder.java index 8d126f19a204c..79377ba01701f 100644 --- a/server/src/main/java/org/opensearch/index/query/GeoDistanceQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/GeoDistanceQueryBuilder.java @@ -63,7 +63,7 @@ * * @opensearch.internal */ -public class GeoDistanceQueryBuilder extends AbstractQueryBuilder { +public class GeoDistanceQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "geo_distance"; /** Default for distance unit computation. */ @@ -129,6 +129,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } /** Name of the field this query is operating on. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/GeoPolygonQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/GeoPolygonQueryBuilder.java index 47eafa3893384..e120f04ed9351 100644 --- a/server/src/main/java/org/opensearch/index/query/GeoPolygonQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/GeoPolygonQueryBuilder.java @@ -61,7 +61,7 @@ * * @opensearch.internal */ -public class GeoPolygonQueryBuilder extends AbstractQueryBuilder { +public class GeoPolygonQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "geo_polygon"; /** @@ -131,6 +131,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { out.writeBoolean(ignoreUnmapped); } + @Override public String fieldName() { return fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/MatchBoolPrefixQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/MatchBoolPrefixQueryBuilder.java index 7ceb17203e837..2c2c2de943e2f 100644 --- a/server/src/main/java/org/opensearch/index/query/MatchBoolPrefixQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/MatchBoolPrefixQueryBuilder.java @@ -61,7 +61,7 @@ * * @opensearch.internal */ -public class MatchBoolPrefixQueryBuilder extends AbstractQueryBuilder { +public class MatchBoolPrefixQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "match_bool_prefix"; @@ -127,6 +127,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } /** Returns the field name used in this query. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/MatchPhrasePrefixQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/MatchPhrasePrefixQueryBuilder.java index d61a5957627ea..3337a31658ca1 100644 --- a/server/src/main/java/org/opensearch/index/query/MatchPhrasePrefixQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/MatchPhrasePrefixQueryBuilder.java @@ -52,7 +52,7 @@ * * @opensearch.internal */ -public class MatchPhrasePrefixQueryBuilder extends AbstractQueryBuilder { +public class MatchPhrasePrefixQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "match_phrase_prefix"; public static final ParseField MAX_EXPANSIONS_FIELD = new ParseField("max_expansions"); public static final ParseField ZERO_TERMS_QUERY_FIELD = new ParseField("zero_terms_query"); @@ -104,6 +104,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } /** Returns the field name used in this query. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/MatchPhraseQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/MatchPhraseQueryBuilder.java index 6cdf6c6600304..97e03d53d38f1 100644 --- a/server/src/main/java/org/opensearch/index/query/MatchPhraseQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/MatchPhraseQueryBuilder.java @@ -52,7 +52,7 @@ * * @opensearch.internal */ -public class MatchPhraseQueryBuilder extends AbstractQueryBuilder { +public class MatchPhraseQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "match_phrase"; public static final ParseField SLOP_FIELD = new ParseField("slop"); public static final ParseField ZERO_TERMS_QUERY_FIELD = new ParseField("zero_terms_query"); @@ -100,6 +100,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } /** Returns the field name used in this query. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/MatchQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/MatchQueryBuilder.java index 5e9e6a3660e76..593077d18951e 100644 --- a/server/src/main/java/org/opensearch/index/query/MatchQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/MatchQueryBuilder.java @@ -56,7 +56,7 @@ * * @opensearch.internal */ -public class MatchQueryBuilder extends AbstractQueryBuilder { +public class MatchQueryBuilder extends AbstractQueryBuilder implements WithFieldName { private static final String CUTOFF_FREQUENCY_DEPRECATION_MSG = "you can omit this option, " + "the [match] query can skip block of documents efficiently if the total number of hits is not tracked"; @@ -171,6 +171,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } /** Returns the field name used in this query. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/MultiTermQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/MultiTermQueryBuilder.java index b8e88da4741bb..f854df79f1ee8 100644 --- a/server/src/main/java/org/opensearch/index/query/MultiTermQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/MultiTermQueryBuilder.java @@ -36,9 +36,4 @@ * * @opensearch.internal */ -public interface MultiTermQueryBuilder extends QueryBuilder { - /** - * Get the field name for this query. - */ - String fieldName(); -} +public interface MultiTermQueryBuilder extends QueryBuilder, WithFieldName {} diff --git a/server/src/main/java/org/opensearch/index/query/SpanNearQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/SpanNearQueryBuilder.java index 30a1c29c29126..179673f500a92 100644 --- a/server/src/main/java/org/opensearch/index/query/SpanNearQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/SpanNearQueryBuilder.java @@ -322,7 +322,7 @@ public void visit(QueryBuilderVisitor visitor) { * * @opensearch.internal */ - public static class SpanGapQueryBuilder implements SpanQueryBuilder { + public static class SpanGapQueryBuilder implements SpanQueryBuilder, WithFieldName { public static final String NAME = "span_gap"; /** Name of field to match against. */ @@ -358,6 +358,7 @@ public SpanGapQueryBuilder(StreamInput in) throws IOException { /** * @return fieldName The name of the field */ + @Override public String fieldName() { return fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java index 4b92d6a1f5460..dbd141e00f81c 100644 --- a/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java @@ -79,7 +79,7 @@ * * @opensearch.internal */ -public class TermsQueryBuilder extends AbstractQueryBuilder { +public class TermsQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "terms"; private final String fieldName; @@ -269,6 +269,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } } + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/WithFieldName.java b/server/src/main/java/org/opensearch/index/query/WithFieldName.java new file mode 100644 index 0000000000000..adfd789ac3707 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/query/WithFieldName.java @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +/** + * Interface for classes with a fieldName method + * + * @opensearch.internal + */ +public interface WithFieldName { + /** + * Get the field name for this query. + */ + String fieldName(); +} diff --git a/server/src/main/java/org/opensearch/index/store/remote/utils/TransferManager.java b/server/src/main/java/org/opensearch/index/store/remote/utils/TransferManager.java index f07c4832d982c..94c25202ac90c 100644 --- a/server/src/main/java/org/opensearch/index/store/remote/utils/TransferManager.java +++ b/server/src/main/java/org/opensearch/index/store/remote/utils/TransferManager.java @@ -95,6 +95,19 @@ public IndexInput fetchBlob(BlobFetchRequest blobFetchRequest) throws IOExceptio @SuppressWarnings("removal") private static FileCachedIndexInput createIndexInput(FileCache fileCache, StreamReader streamReader, BlobFetchRequest request) { try { + // This local file cache is ref counted and may not strictly enforce configured capacity. + // If we find available capacity is exceeded, deny further BlobFetchRequests. + if (fileCache.capacity() < fileCache.usage().usage()) { + fileCache.prune(); + throw new IOException( + "Local file cache capacity (" + + fileCache.capacity() + + ") exceeded (" + + fileCache.usage().usage() + + ") - BlobFetchRequest failed: " + + request.getFilePath() + ); + } if (Files.exists(request.getFilePath()) == false) { logger.trace("Fetching from Remote in createIndexInput of Transfer Manager"); try ( diff --git a/server/src/main/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandler.java b/server/src/main/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandler.java index ac6b2e6b77d18..234e9ba66f340 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandler.java @@ -12,15 +12,12 @@ import org.opensearch.action.StepListener; import org.opensearch.action.support.ThreadedActionListener; import org.opensearch.action.support.replication.ReplicationResponse; -import org.opensearch.cluster.routing.IndexShardRoutingTable; -import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.SetOnce; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.lease.Releasable; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.action.ActionListener; import org.opensearch.index.engine.RecoveryEngineException; -import org.opensearch.index.seqno.ReplicationTracker; import org.opensearch.index.seqno.RetentionLease; import org.opensearch.index.seqno.RetentionLeaseNotFoundException; import org.opensearch.index.seqno.RetentionLeases; @@ -58,21 +55,7 @@ public LocalStorePeerRecoverySourceHandler( @Override protected void innerRecoveryToTarget(ActionListener listener, Consumer onFailure) throws IOException { final SetOnce retentionLeaseRef = new SetOnce<>(); - - RunUnderPrimaryPermit.run(() -> { - final IndexShardRoutingTable routingTable = shard.getReplicationGroup().getRoutingTable(); - ShardRouting targetShardRouting = routingTable.getByAllocationId(request.targetAllocationId()); - if (targetShardRouting == null) { - logger.debug( - "delaying recovery of {} as it is not listed as assigned to target node {}", - request.shardId(), - request.targetNode() - ); - throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node"); - } - assert targetShardRouting.initializing() : "expected recovery target to be initializing but was " + targetShardRouting; - retentionLeaseRef.set(shard.getRetentionLeases().get(ReplicationTracker.getPeerRecoveryRetentionLeaseId(targetShardRouting))); - }, shardId + " validating recovery target [" + request.targetAllocationId() + "] registered ", shard, cancellableThreads, logger); + waitForAssignmentPropagate(retentionLeaseRef); final Closeable retentionLock = shard.acquireHistoryRetentionLock(); resources.add(retentionLock); final long startingSeqNo; diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoverySourceHandler.java b/server/src/main/java/org/opensearch/indices/recovery/RecoverySourceHandler.java index 8966516d3ef7f..7aa90d2f9fa3e 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoverySourceHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoverySourceHandler.java @@ -41,10 +41,14 @@ import org.apache.lucene.util.ArrayUtil; import org.opensearch.action.ActionRunnable; import org.opensearch.action.StepListener; +import org.opensearch.action.bulk.BackoffPolicy; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.action.support.ThreadedActionListener; import org.opensearch.action.support.replication.ReplicationResponse; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.CheckedRunnable; +import org.opensearch.common.SetOnce; import org.opensearch.common.StopWatch; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.lease.Releasable; @@ -59,6 +63,7 @@ import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.index.engine.RecoveryEngineException; +import org.opensearch.index.seqno.ReplicationTracker; import org.opensearch.index.seqno.RetentionLease; import org.opensearch.index.seqno.RetentionLeaseNotFoundException; import org.opensearch.index.seqno.RetentionLeases; @@ -79,12 +84,14 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import java.util.function.IntSupplier; import java.util.stream.StreamSupport; @@ -191,6 +198,50 @@ public void recoverToTarget(ActionListener listener) { protected abstract void innerRecoveryToTarget(ActionListener listener, Consumer onFailure) throws IOException; + /* + Waits for cluster state propagation of assignment of replica on the target node + */ + void waitForAssignmentPropagate(SetOnce retentionLeaseRef) { + BackoffPolicy EXPONENTIAL_BACKOFF_POLICY = BackoffPolicy.exponentialBackoff(TimeValue.timeValueMillis(1000), 5); + AtomicReference targetShardRouting = new AtomicReference<>(); + Iterator backoffDelayIterator = EXPONENTIAL_BACKOFF_POLICY.iterator(); + while (backoffDelayIterator.hasNext()) { + RunUnderPrimaryPermit.run(() -> { + final IndexShardRoutingTable routingTable = shard.getReplicationGroup().getRoutingTable(); + targetShardRouting.set(routingTable.getByAllocationId(request.targetAllocationId())); + if (targetShardRouting.get() == null) { + logger.info( + "delaying recovery of {} as it is not listed as assigned to target node {}", + request.shardId(), + request.targetNode() + ); + Thread.sleep(backoffDelayIterator.next().millis()); + } + if (targetShardRouting.get() != null) { + assert targetShardRouting.get().initializing() : "expected recovery target to be initializing but was " + + targetShardRouting; + retentionLeaseRef.set( + shard.getRetentionLeases().get(ReplicationTracker.getPeerRecoveryRetentionLeaseId(targetShardRouting.get())) + ); + } + + }, + shardId + " validating recovery target [" + request.targetAllocationId() + "] registered ", + shard, + cancellableThreads, + logger + ); + + if (targetShardRouting.get() != null) { + return; + } + } + if (targetShardRouting.get() != null) { + return; + } + throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node"); + } + protected void finalizeStepAndCompleteFuture( long startingSeqNo, StepListener sendSnapshotStep, diff --git a/server/src/main/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandler.java b/server/src/main/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandler.java index 66c7a3b48f28f..383ed92314165 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandler.java @@ -10,11 +10,13 @@ import org.apache.lucene.index.IndexCommit; import org.opensearch.action.StepListener; +import org.opensearch.common.SetOnce; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.lease.Releasable; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.action.ActionListener; import org.opensearch.index.engine.RecoveryEngineException; +import org.opensearch.index.seqno.RetentionLease; import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.RunUnderPrimaryPermit; @@ -48,7 +50,8 @@ protected void innerRecoveryToTarget(ActionListener listener, // A replica of an index with remote translog does not require the translogs locally and keeps receiving the // updated segments file on refresh, flushes, and merges. In recovery, here, only file-based recovery is performed // and there is no translog replay done. - + final SetOnce retentionLeaseRef = new SetOnce<>(); + waitForAssignmentPropagate(retentionLeaseRef); final StepListener sendFileStep = new StepListener<>(); final StepListener prepareEngineStep = new StepListener<>(); final StepListener sendSnapshotStep = new StepListener<>(); @@ -102,4 +105,5 @@ protected void innerRecoveryToTarget(ActionListener listener, finalizeStepAndCompleteFuture(startingSeqNo, sendSnapshotStep, sendFileStep, prepareEngineStep, onFailure); } + } diff --git a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java index a2364d96f2098..f6e550525a3e5 100644 --- a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java @@ -631,6 +631,34 @@ public void onFailure(Exception e) { ); } + private void cloneSnapshotPinnedTimestamp( + RepositoryData repositoryData, + SnapshotId sourceSnapshot, + Snapshot snapshot, + long timestampToPin, + ActionListener listener + ) { + remoteStorePinnedTimestampService.cloneTimestamp( + timestampToPin, + snapshot.getRepository() + SNAPSHOT_PINNED_TIMESTAMP_DELIMITER + sourceSnapshot.getUUID(), + snapshot.getRepository() + SNAPSHOT_PINNED_TIMESTAMP_DELIMITER + snapshot.getSnapshotId().getUUID(), + new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.debug("Timestamp pinned successfully for clone snapshot {}", snapshot.getSnapshotId().getName()); + listener.onResponse(repositoryData); + } + + @Override + public void onFailure(Exception e) { + logger.error("Failed to pin timestamp for clone snapshot {} with exception {}", snapshot.getSnapshotId().getName(), e); + listener.onFailure(e); + + } + } + ); + } + private static void ensureSnapshotNameNotRunning( List runningSnapshots, String repositoryName, @@ -652,9 +680,14 @@ private static Map getInFlightIndexIds(List listener) { + /** + * This method does some pre-validation, checks for the presence of source snapshot in repository data. + * For shallow snapshot v2 clone, it checks the pinned timestamp to be greater than zero in the source snapshot. + * + * @param request snapshot request + * @param listener snapshot completion listener + */ + public void executeClone(CloneSnapshotRequest request, ActionListener listener) { final String repositoryName = request.repository(); Repository repository = repositoriesService.repository(repositoryName); if (repository.isReadOnly()) { @@ -663,10 +696,230 @@ public void cloneSnapshot(CloneSnapshotRequest request, ActionListener lis } final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.target()); validate(repositoryName, snapshotName); - // TODO: create snapshot UUID in CloneSnapshotRequest and make this operation idempotent to cleanly deal with transport layer - // retries final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); final Snapshot snapshot = new Snapshot(repositoryName, snapshotId); + try { + final StepListener repositoryDataListener = new StepListener<>(); + repositoriesService.getRepositoryData(repositoryName, repositoryDataListener); + repositoryDataListener.whenComplete(repositoryData -> { + final SnapshotId sourceSnapshotId = repositoryData.getSnapshotIds() + .stream() + .filter(src -> src.getName().equals(request.source())) + .findAny() + .orElseThrow(() -> new SnapshotMissingException(repositoryName, request.source())); + final StepListener snapshotInfoListener = new StepListener<>(); + final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); + + executor.execute(ActionRunnable.supply(snapshotInfoListener, () -> repository.getSnapshotInfo(sourceSnapshotId))); + + snapshotInfoListener.whenComplete(sourceSnapshotInfo -> { + if (sourceSnapshotInfo.getPinnedTimestamp() > 0) { + if (hasWildCardPatterForCloneSnapshotV2(request.indices()) == false) { + throw new SnapshotException( + repositoryName, + snapshotName, + "Aborting clone for Snapshot-v2, only wildcard pattern '*' is supported for indices" + ); + } + cloneSnapshotV2(request, snapshot, repositoryName, repository, listener); + } else { + cloneSnapshot(request, snapshot, repositoryName, repository, listener); + } + }, e -> listener.onFailure(e)); + }, e -> listener.onFailure(e)); + + } catch (Exception e) { + assert false : new AssertionError(e); + logger.error("Snapshot {} clone failed with exception {}", snapshot.getSnapshotId().getName(), e); + listener.onFailure(e); + } + } + + /** + * This method is responsible for creating a clone of the shallow snapshot v2. + * It pins the same timestamp that is pinned by the source snapshot. + * + * Unlike traditional snapshot operations, this method performs a synchronous clone execution and doesn't + * upload any shard metadata to the snapshot repository. + * The pinned timestamp is later reconciled with remote store segment and translog metadata files during the restore + * operation. + * + * @param request snapshot request + * @param snapshot clone snapshot + * @param repositoryName snapshot repository name + * @param repository snapshot repository + * @param listener completion listener + */ + public void cloneSnapshotV2( + CloneSnapshotRequest request, + Snapshot snapshot, + String repositoryName, + Repository repository, + ActionListener listener + ) { + + long startTime = System.currentTimeMillis(); + ClusterState currentState = clusterService.state(); + String snapshotName = snapshot.getSnapshotId().getName(); + repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask(Priority.URGENT) { + private SnapshotsInProgress.Entry newEntry; + private SnapshotId sourceSnapshotId; + private List indicesForSnapshot; + + @Override + public ClusterState execute(ClusterState currentState) { + createSnapshotPreValidations(currentState, repositoryData, repositoryName, snapshotName); + final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); + final List runningSnapshots = snapshots.entries(); + + sourceSnapshotId = repositoryData.getSnapshotIds() + .stream() + .filter(src -> src.getName().equals(request.source())) + .findAny() + .orElseThrow(() -> new SnapshotMissingException(repositoryName, request.source())); + + final SnapshotDeletionsInProgress deletionsInProgress = currentState.custom( + SnapshotDeletionsInProgress.TYPE, + SnapshotDeletionsInProgress.EMPTY + ); + if (deletionsInProgress.getEntries().stream().anyMatch(entry -> entry.getSnapshots().contains(sourceSnapshotId))) { + throw new ConcurrentSnapshotExecutionException( + repositoryName, + sourceSnapshotId.getName(), + "cannot clone from snapshot that is being deleted" + ); + } + indicesForSnapshot = new ArrayList<>(); + for (IndexId indexId : repositoryData.getIndices().values()) { + if (repositoryData.getSnapshots(indexId).contains(sourceSnapshotId)) { + indicesForSnapshot.add(indexId.getName()); + } + } + + newEntry = SnapshotsInProgress.startClone( + snapshot, + sourceSnapshotId, + repositoryData.resolveIndices(indicesForSnapshot), + threadPool.absoluteTimeInMillis(), + repositoryData.getGenId(), + minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null) + ); + final List newEntries = new ArrayList<>(runningSnapshots); + newEntries.add(newEntry); + return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(newEntries)).build(); + } + + @Override + public void onFailure(String source, Exception e) { + logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to clone snapshot-v2", repositoryName, snapshotName), e); + listener.onFailure(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) { + logger.info("snapshot-v2 clone [{}] started", snapshot); + final StepListener snapshotInfoListener = new StepListener<>(); + final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); + + executor.execute(ActionRunnable.supply(snapshotInfoListener, () -> repository.getSnapshotInfo(sourceSnapshotId))); + final ShardGenerations shardGenerations = repositoryData.shardGenerations(); + + snapshotInfoListener.whenComplete(snapshotInfo -> { + final SnapshotInfo cloneSnapshotInfo = new SnapshotInfo( + snapshot.getSnapshotId(), + indicesForSnapshot, + newEntry.dataStreams(), + startTime, + null, + System.currentTimeMillis(), + snapshotInfo.totalShards(), + Collections.emptyList(), + newEntry.includeGlobalState(), + newEntry.userMetadata(), + true, + snapshotInfo.getPinnedTimestamp() + ); + if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) { + throw new SnapshotException(repositoryName, snapshotName, "Aborting snapshot-v2 clone, no longer cluster manager"); + } + final StepListener pinnedTimestampListener = new StepListener<>(); + pinnedTimestampListener.whenComplete(repoData -> { + logger.info("snapshot-v2 clone [{}] completed successfully", snapshot); + listener.onResponse(null); + }, listener::onFailure); + repository.finalizeSnapshot( + shardGenerations, + repositoryData.getGenId(), + metadataForSnapshot( + currentState.metadata(), + newEntry.includeGlobalState(), + false, + newEntry.dataStreams(), + newEntry.indices() + ), + cloneSnapshotInfo, + repositoryData.getVersion(sourceSnapshotId), + state -> stateWithoutSnapshot(state, snapshot), + Priority.IMMEDIATE, + new ActionListener() { + @Override + public void onResponse(RepositoryData repositoryData) { + if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) { + failSnapshotCompletionListeners( + snapshot, + new SnapshotException(snapshot, "Aborting Snapshot-v2 clone, no longer cluster manager") + ); + listener.onFailure( + new SnapshotException( + repositoryName, + snapshotName, + "Aborting Snapshot-v2 clone, no longer cluster manager" + ) + ); + return; + } + cloneSnapshotPinnedTimestamp( + repositoryData, + sourceSnapshotId, + snapshot, + snapshotInfo.getPinnedTimestamp(), + pinnedTimestampListener + ); + } + + @Override + public void onFailure(Exception e) { + logger.error( + "Failed to upload files to snapshot repo {} for clone snapshot-v2 {} ", + repositoryName, + snapshotName + ); + listener.onFailure(e); + } + } + ); + + }, listener::onFailure); + } + + @Override + public TimeValue timeout() { + return request.clusterManagerNodeTimeout(); + } + }, "clone_snapshot_v2 [" + request.source() + "][" + snapshotName + ']', listener::onFailure); + } + + // TODO: It is worth revisiting the design choice of creating a placeholder entry in snapshots-in-progress here once we have a cache + // for repository metadata and loading it has predictable performance + public void cloneSnapshot( + CloneSnapshotRequest request, + Snapshot snapshot, + String repositoryName, + Repository repository, + ActionListener listener + ) { + String snapshotName = snapshot.getSnapshotId().getName(); + initializingClones.add(snapshot); repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask() { @@ -3499,6 +3752,15 @@ private void startExecutableClones(SnapshotsInProgress snapshotsInProgress, @Nul } } + private boolean hasWildCardPatterForCloneSnapshotV2(String[] indices) { + for (String index : indices) { + if ("*".equals(index)) { + return true; + } + } + return false; + } + private class UpdateSnapshotStatusAction extends TransportClusterManagerNodeAction< UpdateIndexShardSnapshotStatusRequest, UpdateIndexShardSnapshotStatusResponse> { diff --git a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java index ee9a2951ec541..32cb95e0c04f6 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java @@ -57,6 +57,7 @@ import java.io.IOException; import java.util.Collections; import java.util.Locale; +import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -66,26 +67,34 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; import static org.opensearch.gateway.remote.ClusterMetadataManifest.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.nullValue; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoInteractions; +import static org.mockito.Mockito.when; public class CoordinationStateTests extends OpenSearchTestCase { private DiscoveryNode node1; private DiscoveryNode node2; private DiscoveryNode node3; + private DiscoveryNode nodeWithPub; private ClusterState initialStateNode1; + private ClusterState initialStateNode2; private PersistedState ps1; private PersistedStateRegistry psr1; @@ -99,16 +108,18 @@ public void setupNodes() { node1 = createNode("node1"); node2 = createNode("node2"); node3 = createNode("node3"); + nodeWithPub = createNode( + "nodeWithPub", + Map.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "", + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "" + ) + ); initialStateNode1 = clusterState(0L, 0L, node1, VotingConfiguration.EMPTY_CONFIG, VotingConfiguration.EMPTY_CONFIG, 42L); - ClusterState initialStateNode2 = clusterState( - 0L, - 0L, - node2, - VotingConfiguration.EMPTY_CONFIG, - VotingConfiguration.EMPTY_CONFIG, - 42L - ); + initialStateNode2 = clusterState(0L, 0L, node2, VotingConfiguration.EMPTY_CONFIG, VotingConfiguration.EMPTY_CONFIG, 42L); ClusterState initialStateNode3 = clusterState( 0L, 0L, @@ -128,6 +139,10 @@ public void setupNodes() { } public static DiscoveryNode createNode(String id) { + return createNode(id, Collections.emptyMap()); + } + + public static DiscoveryNode createNode(String id, Map attributes) { final TransportAddress address = buildNewFakeTransportAddress(); return new DiscoveryNode( "", @@ -136,7 +151,7 @@ public static DiscoveryNode createNode(String id) { address.address().getHostString(), address.getAddress(), address, - Collections.emptyMap(), + attributes, DiscoveryNodeRole.BUILT_IN_ROLES, Version.CURRENT ); @@ -913,7 +928,7 @@ public void testSafety() { public void testHandlePrePublishAndCommitWhenRemoteStateDisabled() { final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, ps1); - final PersistedStateRegistry persistedStateRegistrySpy = Mockito.spy(persistedStateRegistry); + final PersistedStateRegistry persistedStateRegistrySpy = spy(persistedStateRegistry); final CoordinationState coordinationState = createCoordinationState(persistedStateRegistrySpy, node1, Settings.EMPTY); final VotingConfiguration initialConfig = VotingConfiguration.of(node1); final ClusterState clusterState = clusterState(0L, 0L, node1, initialConfig, initialConfig, 42L); @@ -932,7 +947,7 @@ public void testHandlePrePublishAndCommitWhenRemoteStateDisabled() { public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOException { final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); final VotingConfiguration initialConfig = VotingConfiguration.of(node1); - final ClusterState clusterState = clusterState(0L, 0L, node1, initialConfig, initialConfig, 42L); + final ClusterState clusterState = clusterStateWithClusterManager(0L, 0L, node1, node1, initialConfig, initialConfig, 42L); final String previousClusterUUID = "prev-cluster-uuid"; final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder() .clusterTerm(0L) @@ -948,8 +963,9 @@ public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOExcep .previousClusterUUID(randomAlphaOfLength(10)) .clusterUUIDCommitted(true) .build(); - Mockito.when(remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID, MANIFEST_CURRENT_CODEC_VERSION)) - .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); + when(remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID, MANIFEST_CURRENT_CODEC_VERSION)).thenReturn( + new RemoteClusterStateManifestInfo(manifest, "path/to/manifest") + ); final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, ps1); @@ -958,40 +974,21 @@ public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOExcep new RemotePersistedState(remoteClusterStateService, previousClusterUUID) ); - String randomRepoName = "randomRepoName"; - String stateRepoTypeAttributeKey = String.format( - Locale.getDefault(), - "node.attr." + REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, - randomRepoName - ); - String stateRepoSettingsAttributeKeyPrefix = String.format( - Locale.getDefault(), - "node.attr." + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, - randomRepoName - ); - - Settings settings = Settings.builder() - .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, randomRepoName) - .put(stateRepoTypeAttributeKey, FsRepository.TYPE) - .put(stateRepoSettingsAttributeKeyPrefix + "location", "randomRepoPath") - .put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) - .build(); - - final CoordinationState coordinationState = createCoordinationState(persistedStateRegistry, node1, settings); + final CoordinationState coordinationState = createCoordinationState(persistedStateRegistry, node1, remoteStateSettings()); coordinationState.handlePrePublish(clusterState); Mockito.verify(remoteClusterStateService, Mockito.times(1)) .writeFullMetadata(clusterState, previousClusterUUID, MANIFEST_CURRENT_CODEC_VERSION); assertThat(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState(), equalTo(clusterState)); - Mockito.when(remoteClusterStateService.markLastStateAsCommitted(any(), any())) - .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); + when(remoteClusterStateService.markLastStateAsCommitted(any(), any(), eq(false))).thenReturn( + new RemoteClusterStateManifestInfo(manifest, "path/to/manifest") + ); coordinationState.handlePreCommit(); ClusterState committedClusterState = ClusterState.builder(clusterState) .metadata(Metadata.builder(clusterState.metadata()).clusterUUIDCommitted(true).build()) .build(); - // Mockito.verify(remoteClusterStateService, Mockito.times(1)).markLastStateAsCommitted(committedClusterState, manifest); ArgumentCaptor clusterStateCaptor = ArgumentCaptor.forClass(ClusterState.class); - verify(remoteClusterStateService, times(1)).markLastStateAsCommitted(clusterStateCaptor.capture(), any()); + verify(remoteClusterStateService, times(1)).markLastStateAsCommitted(clusterStateCaptor.capture(), any(), eq(false)); assertThat(clusterStateCaptor.getValue().metadata().indices(), equalTo(committedClusterState.metadata().indices())); assertThat(clusterStateCaptor.getValue().metadata().clusterUUID(), equalTo(committedClusterState.metadata().clusterUUID())); assertThat(clusterStateCaptor.getValue().stateUUID(), equalTo(committedClusterState.stateUUID())); @@ -1006,11 +1003,276 @@ public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOExcep ); } + public void testHandlePublishRequestOnFollowerWhenRemotePublicationEnabled() { + final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); + // cluster manager is node1 and node2 is a follower node + VotingConfiguration initialConfig = VotingConfiguration.of(node1); + ClusterState state1 = clusterState( + 0L, + 0L, + DiscoveryNodes.builder() + .add(node1) + .add(nodeWithPub) + .clusterManagerNodeId(node1.getId()) + .localNodeId(nodeWithPub.getId()) + .build(), + initialConfig, + initialConfig, + 42L + ); + final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); + persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, new InMemoryPersistedState(0L, initialStateNode2)); + persistedStateRegistry.addPersistedState( + PersistedStateType.REMOTE, + new RemotePersistedState(remoteClusterStateService, state1.metadata().clusterUUID()) + ); + + final CoordinationState coordinationState = createCoordinationState( + persistedStateRegistry, + nodeWithPub, + remotePublicationSettings() + ); + coordinationState.setInitialState(state1); + long newTerm = randomLongBetween(1, 10); + StartJoinRequest startJoinRequest = new StartJoinRequest(nodeWithPub, newTerm); + + coordinationState.handleStartJoin(startJoinRequest); + + ClusterState state2 = setValue( + ClusterState.builder(state1) + .metadata( + Metadata.builder(state1.metadata()) + .coordinationMetadata(CoordinationMetadata.builder(state1.coordinationMetadata()).term(newTerm).build()) + .build() + ) + .version(randomLongBetween(1, 10)) + .build(), + 43L + ); + final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder() + .clusterTerm(1L) + .stateVersion(state2.version()) + .clusterUUID(state2.metadata().clusterUUID()) + .nodeId(node1.getId()) + .stateUUID(randomAlphaOfLength(10)) + .opensearchVersion(Version.CURRENT) + .committed(false) + .codecVersion(1) + .globalMetadataFileName(randomAlphaOfLength(10)) + .indices(Collections.emptyList()) + .previousClusterUUID(randomAlphaOfLength(10)) + .clusterUUIDCommitted(true) + .build(); + + PublishResponse publishResponse = coordinationState.handlePublishRequest(new RemoteStatePublishRequest(state2, manifest)); + assertEquals(state2.term(), publishResponse.getTerm()); + assertEquals(state2.version(), publishResponse.getVersion()); + verifyNoInteractions(remoteClusterStateService); + assertEquals(state2, persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState()); + assertEquals(manifest, persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedManifest()); + } + + public void testHandleCommitOnFollowerNodeWhenRemotePublicationEnabled() { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + VotingConfiguration initialConfig = VotingConfiguration.of(node1, nodeWithPub); + ClusterState state1 = clusterState( + 0L, + 0L, + DiscoveryNodes.builder() + .add(node1) + .add(nodeWithPub) + .clusterManagerNodeId(node1.getId()) + .localNodeId(nodeWithPub.getId()) + .build(), + initialConfig, + initialConfig, + 42L + ); + final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); + persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, new InMemoryPersistedState(0L, initialStateNode2)); + persistedStateRegistry.addPersistedState( + PersistedStateType.REMOTE, + new RemotePersistedState(remoteClusterStateService, state1.metadata().clusterUUID()) + ); + + final CoordinationState coordinationState = createCoordinationState( + persistedStateRegistry, + nodeWithPub, + remotePublicationSettings() + ); + coordinationState.setInitialState(state1); + long newTerm = randomLongBetween(1, 10); + StartJoinRequest startJoinRequest = new StartJoinRequest(nodeWithPub, newTerm); + + Join v1 = cs1.handleStartJoin(startJoinRequest); + Join v2 = coordinationState.handleStartJoin(startJoinRequest); + assertTrue(coordinationState.handleJoin(v1)); + assertTrue(coordinationState.handleJoin(v2)); + assertTrue(coordinationState.electionWon()); + VotingConfiguration newConfig = VotingConfiguration.of(node1, nodeWithPub, node3); + ClusterState state2 = clusterState( + startJoinRequest.getTerm(), + 2L, + DiscoveryNodes.builder() + .add(node1) + .add(nodeWithPub) + .clusterManagerNodeId(node1.getId()) + .localNodeId(nodeWithPub.getId()) + .build(), + initialConfig, + newConfig, + 7L + ); + final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder() + .clusterTerm(1L) + .stateVersion(state2.version()) + .clusterUUID(state2.metadata().clusterUUID()) + .nodeId(node1.getId()) + .stateUUID(randomAlphaOfLength(10)) + .opensearchVersion(Version.CURRENT) + .committed(false) + .codecVersion(1) + .globalMetadataFileName(randomAlphaOfLength(10)) + .indices(Collections.emptyList()) + .previousClusterUUID(randomAlphaOfLength(10)) + .clusterUUIDCommitted(true) + .build(); + + PublishRequest publishRequest = coordinationState.handleClientValue(state2); + coordinationState.handlePublishRequest(new RemoteStatePublishRequest(publishRequest.getAcceptedState(), manifest)); + ApplyCommitRequest applyCommitRequest = new ApplyCommitRequest(node2, state2.term(), state2.version()); + coordinationState.handleCommit(applyCommitRequest); + verifyNoInteractions(remoteClusterStateService); + assertTrue( + persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState().metadata().clusterUUIDCommitted() + ); + assertTrue(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedManifest().isCommitted()); + assertEquals(coordinationState.getLastCommittedConfiguration(), newConfig); + } + + public void testRemotePersistedStateResetsForPublicationEnabledAfterLocalPublication() { + final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); + // cluster manager is node1 and nodeWithPub is a follower node + VotingConfiguration initialConfig = VotingConfiguration.of(node1); + ClusterState state1 = clusterState( + 0L, + 0L, + DiscoveryNodes.builder() + .add(node1) + .add(nodeWithPub) + .clusterManagerNodeId(node1.getId()) + .localNodeId(nodeWithPub.getId()) + .build(), + initialConfig, + initialConfig, + 42L + ); + final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); + persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, new InMemoryPersistedState(0L, initialStateNode2)); + persistedStateRegistry.addPersistedState( + PersistedStateType.REMOTE, + new RemotePersistedState(remoteClusterStateService, state1.metadata().clusterUUID()) + ); + + final CoordinationState coordinationState = createCoordinationState( + persistedStateRegistry, + nodeWithPub, + remotePublicationSettings() + ); + coordinationState.setInitialState(state1); + long newTerm = randomLongBetween(1, 10); + StartJoinRequest startJoinRequest = new StartJoinRequest(nodeWithPub, newTerm); + + coordinationState.handleStartJoin(startJoinRequest); + + ClusterState state2 = setValue( + ClusterState.builder(state1) + .metadata( + Metadata.builder(state1.metadata()) + .coordinationMetadata(CoordinationMetadata.builder(state1.coordinationMetadata()).term(newTerm).build()) + .build() + ) + .version(randomLongBetween(1, 10)) + .build(), + 43L + ); + + PublishResponse publishResponse = coordinationState.handlePublishRequest(new PublishRequest(state2)); + assertEquals(state2.term(), publishResponse.getTerm()); + assertEquals(state2.version(), publishResponse.getVersion()); + verifyNoInteractions(remoteClusterStateService); + assertNull(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState()); + assertNull(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedManifest()); + } + + public void testHandleCommitOnFollowerNodeWhenRemotePublicationEnabledWithNullRemotePersistedState() { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + VotingConfiguration initialConfig = VotingConfiguration.of(node1, nodeWithPub); + ClusterState state1 = clusterState( + 0L, + 0L, + DiscoveryNodes.builder() + .add(node1) + .add(nodeWithPub) + .clusterManagerNodeId(node1.getId()) + .localNodeId(nodeWithPub.getId()) + .build(), + initialConfig, + initialConfig, + 42L + ); + final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); + persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, new InMemoryPersistedState(0L, initialStateNode2)); + persistedStateRegistry.addPersistedState( + PersistedStateType.REMOTE, + new RemotePersistedState(remoteClusterStateService, state1.metadata().clusterUUID()) + ); + + final CoordinationState coordinationState = createCoordinationState( + persistedStateRegistry, + nodeWithPub, + remotePublicationSettings() + ); + coordinationState.setInitialState(state1); + long newTerm = randomLongBetween(1, 10); + StartJoinRequest startJoinRequest = new StartJoinRequest(nodeWithPub, newTerm); + + Join v1 = cs1.handleStartJoin(startJoinRequest); + Join v2 = coordinationState.handleStartJoin(startJoinRequest); + assertTrue(coordinationState.handleJoin(v1)); + assertTrue(coordinationState.handleJoin(v2)); + assertTrue(coordinationState.electionWon()); + VotingConfiguration newConfig = VotingConfiguration.of(node1, nodeWithPub, node3); + ClusterState state2 = clusterState( + startJoinRequest.getTerm(), + 2L, + DiscoveryNodes.builder() + .add(node1) + .add(nodeWithPub) + .clusterManagerNodeId(node1.getId()) + .localNodeId(nodeWithPub.getId()) + .build(), + initialConfig, + newConfig, + 7L + ); + + PublishRequest publishRequest = coordinationState.handleClientValue(state2); + coordinationState.handlePublishRequest(new PublishRequest(publishRequest.getAcceptedState())); + assertNull(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState()); + assertNull(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedManifest()); + ApplyCommitRequest applyCommitRequest = new ApplyCommitRequest(node2, state2.term(), state2.version()); + PersistedState spyRPS = spy(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE)); + coordinationState.handleCommit(applyCommitRequest); + verifyNoInteractions(spyRPS); + verifyNoInteractions(remoteClusterStateService); + } + public void testIsRemotePublicationEnabled_WithInconsistentSettings() { // create settings with remote state disabled but publication enabled Settings settings = Settings.builder() .put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), false) - .put(REMOTE_PUBLICATION_EXPERIMENTAL, true) + .put(REMOTE_PUBLICATION_SETTING_KEY, true) .build(); CoordinationState coordinationState = createCoordinationState(psr1, node1, settings); assertFalse(coordinationState.isRemotePublicationEnabled()); @@ -1042,6 +1304,30 @@ public static ClusterState clusterState( ); } + public static ClusterState clusterStateWithClusterManager( + long term, + long version, + DiscoveryNode localNode, + DiscoveryNode clusterManagerNode, + VotingConfiguration lastCommittedConfig, + VotingConfiguration lastAcceptedConfig, + long value + ) { + return clusterState( + term, + version, + DiscoveryNodes.builder() + .add(localNode) + .add(clusterManagerNode) + .localNodeId(localNode.getId()) + .clusterManagerNodeId(clusterManagerNode.getId()) + .build(), + lastCommittedConfig, + lastAcceptedConfig, + value + ); + } + public static ClusterState clusterState( long term, long version, @@ -1090,4 +1376,30 @@ private static PersistedStateRegistry createPersistedStateRegistry(ClusterState persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, new InMemoryPersistedState(0L, clusterState)); return persistedStateRegistry; } + + private static Settings remoteStateSettings() { + String randomRepoName = "randomRepoName"; + String stateRepoTypeAttributeKey = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, + randomRepoName + ); + String stateRepoSettingsAttributeKeyPrefix = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, + randomRepoName + ); + + Settings settings = Settings.builder() + .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, randomRepoName) + .put(stateRepoTypeAttributeKey, FsRepository.TYPE) + .put(stateRepoSettingsAttributeKeyPrefix + "location", "randomRepoPath") + .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) + .build(); + return settings; + } + + private static Settings remotePublicationSettings() { + return Settings.builder().put(remoteStateSettings()).put(REMOTE_PUBLICATION_SETTING_KEY, true).build(); + } } diff --git a/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceFactoryTests.java b/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceFactoryTests.java index 86f4b9502d6ab..683942fd34a37 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceFactoryTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceFactoryTests.java @@ -10,7 +10,6 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.fs.FsRepository; import org.opensearch.test.OpenSearchTestCase; @@ -20,7 +19,7 @@ import java.util.function.Supplier; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; public class RemoteRoutingTableServiceFactoryTests extends OpenSearchTestCase { @@ -50,9 +49,8 @@ public void testGetServiceWhenRemoteRoutingEnabled() { Settings settings = Settings.builder() .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, "routing_repository") .put(FsRepository.REPOSITORIES_COMPRESS_SETTING.getKey(), false) + .put(REMOTE_PUBLICATION_SETTING_KEY, "true") .build(); - Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, "true").build(); - FeatureFlags.initializeFeatureFlags(nodeSettings); RemoteRoutingTableService service = RemoteRoutingTableServiceFactory.getService( repositoriesService, settings, diff --git a/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java b/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java index 5061de9161ab4..63501f878d55d 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java @@ -28,7 +28,6 @@ import org.opensearch.common.compress.DeflateCompressor; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.TestCapturingListener; import org.opensearch.core.action.ActionListener; import org.opensearch.core.compress.Compressor; @@ -63,8 +62,8 @@ import org.mockito.Mockito; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; import static org.opensearch.gateway.remote.ClusterMetadataManifestTests.randomUploadedIndexMetadataList; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.gateway.remote.RemoteClusterStateServiceTests.generateClusterStateWithOneIndex; import static org.opensearch.gateway.remote.RemoteClusterStateUtils.CLUSTER_STATE_PATH_TOKEN; import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; @@ -114,6 +113,7 @@ public void setup() { Settings settings = Settings.builder() .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, "routing_repository") + .put(REMOTE_PUBLICATION_SETTING_KEY, "true") .build(); clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); clusterService = mock(ClusterService.class); @@ -126,8 +126,6 @@ public void setup() { when(repositoriesService.repository("routing_repository")).thenReturn(blobStoreRepository); when(blobStoreRepository.blobStore()).thenReturn(blobStore); when(blobStore.blobContainer(any())).thenReturn(blobContainer); - Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, "true").build(); - FeatureFlags.initializeFeatureFlags(nodeSettings); compressor = new NoneCompressor(); basePath = BlobPath.cleanPath().add("base-path"); when(blobStoreRepository.basePath()).thenReturn(basePath); diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index 9972bbfff5d66..5ea5241762753 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -115,6 +115,7 @@ import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoInteractions; @@ -210,11 +211,15 @@ public void testSetCurrentTerm() throws IOException { } private ClusterState createClusterState(long version, Metadata metadata) { - return ClusterState.builder(clusterName) - .nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId()).build()) - .version(version) - .metadata(metadata) - .build(); + return createClusterState(version, metadata, false); + } + + private ClusterState createClusterState(long version, Metadata metadata, boolean isClusterManagerNode) { + DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId()); + if (isClusterManagerNode) { + nodesBuilder.clusterManagerNodeId(localNode.getId()); + } + return ClusterState.builder(clusterName).nodes(nodesBuilder.build()).version(version).metadata(metadata).build(); } private ClusterState createClusterStateWithNodes(long version, Metadata metadata) { @@ -225,7 +230,12 @@ private ClusterState createClusterStateWithNodes(long version, Metadata metadata Sets.newHashSet(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE), Version.V_2_13_0 ); - DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId()).add(oldNode).build(); + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder() + .add(localNode) + .localNodeId(localNode.getId()) + .clusterManagerNodeId(localNode.getId()) + .add(oldNode) + .build(); return ClusterState.builder(clusterName).nodes(discoveryNodes).version(version).metadata(metadata).build(); } @@ -762,7 +772,8 @@ public void testRemotePersistedState() throws IOException { final long clusterTerm = randomNonNegativeLong(); final ClusterState clusterState = createClusterState( randomNonNegativeLong(), - Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build(), + true ); remotePersistedState.setLastAcceptedState(clusterState); @@ -773,7 +784,8 @@ public void testRemotePersistedState() throws IOException { final ClusterState secondClusterState = createClusterState( randomNonNegativeLong(), - Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build(), + true ); remotePersistedState.setLastAcceptedState(secondClusterState); @@ -783,11 +795,11 @@ public void testRemotePersistedState() throws IOException { assertThat(remotePersistedState.getLastAcceptedState(), equalTo(secondClusterState)); assertThat(remotePersistedState.getCurrentTerm(), equalTo(clusterTerm)); - when(remoteClusterStateService.markLastStateAsCommitted(Mockito.any(), Mockito.any())).thenReturn( + when(remoteClusterStateService.markLastStateAsCommitted(Mockito.any(), Mockito.any(), eq(false))).thenReturn( new RemoteClusterStateManifestInfo(manifest, "path/to/manifest") ); remotePersistedState.markLastAcceptedStateAsCommitted(); - Mockito.verify(remoteClusterStateService, times(1)).markLastStateAsCommitted(Mockito.any(), Mockito.any()); + Mockito.verify(remoteClusterStateService, times(1)).markLastStateAsCommitted(Mockito.any(), Mockito.any(), eq(false)); assertThat(remotePersistedState.getLastAcceptedState(), equalTo(secondClusterState)); assertThat(remotePersistedState.getCurrentTerm(), equalTo(clusterTerm)); @@ -825,7 +837,8 @@ public void testRemotePersistedStateWithDifferentNodeConfiguration() throws IOEx ClusterState clusterState2 = createClusterState( randomNonNegativeLong(), - Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(1L).build()).build() + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(1L).build()).build(), + true ); final ClusterMetadataManifest manifest2 = ClusterMetadataManifest.builder() .clusterTerm(1L) @@ -840,7 +853,8 @@ public void testRemotePersistedStateWithDifferentNodeConfiguration() throws IOEx ClusterState clusterState3 = createClusterState( randomNonNegativeLong(), - Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(1L).build()).build() + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(1L).build()).build(), + true ); Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())) .thenReturn(new RemoteClusterStateManifestInfo(manifest2, "path/to/manifest3")); @@ -849,6 +863,73 @@ public void testRemotePersistedStateWithDifferentNodeConfiguration() throws IOEx } + public void testRemotePersistentState_FollowerNode() throws IOException { + final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); + final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder() + .clusterTerm(1L) + .stateVersion(5L) + .committed(false) + .build(); + final String previousClusterUUID = "prev-cluster-uuid"; + RemotePersistedState remotePersistedState = new RemotePersistedState(remoteClusterStateService, previousClusterUUID); + + assertNull(remotePersistedState.getLastAcceptedState()); + assertNull(remotePersistedState.getLastAcceptedManifest()); + assertEquals(0, remotePersistedState.getCurrentTerm()); + + final long clusterTerm = randomNonNegativeLong(); + final ClusterState clusterState = createClusterState( + randomNonNegativeLong(), + Metadata.builder() + .coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()) + .clusterUUIDCommitted(true) + .build(), + false + ); + + remotePersistedState.setLastAcceptedState(clusterState); + remotePersistedState.setLastAcceptedManifest(manifest); + Mockito.verify(remoteClusterStateService, never()) + .writeFullMetadata(clusterState, previousClusterUUID, MANIFEST_CURRENT_CODEC_VERSION); + + assertEquals(clusterState, remotePersistedState.getLastAcceptedState()); + assertEquals(clusterTerm, remotePersistedState.getCurrentTerm()); + assertEquals(manifest, remotePersistedState.getLastAcceptedManifest()); + + final ClusterState secondClusterState = createClusterState( + randomNonNegativeLong(), + Metadata.builder() + .coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()) + .clusterUUIDCommitted(false) + .build(), + false + ); + + remotePersistedState.setLastAcceptedState(secondClusterState); + Mockito.verify(remoteClusterStateService, never()) + .writeFullMetadata(secondClusterState, previousClusterUUID, MANIFEST_CURRENT_CODEC_VERSION); + + assertEquals(secondClusterState, remotePersistedState.getLastAcceptedState()); + assertEquals(clusterTerm, remotePersistedState.getCurrentTerm()); + assertFalse(remotePersistedState.getLastAcceptedManifest().isCommitted()); + + remotePersistedState.markLastAcceptedStateAsCommitted(); + Mockito.verify(remoteClusterStateService, never()).markLastStateAsCommitted(Mockito.any(), Mockito.any(), eq(false)); + + assertEquals(secondClusterState, remotePersistedState.getLastAcceptedState()); + assertEquals(clusterTerm, remotePersistedState.getCurrentTerm()); + assertFalse(remotePersistedState.getLastAcceptedState().metadata().clusterUUIDCommitted()); + assertTrue(remotePersistedState.getLastAcceptedManifest().isCommitted()); + + final ClusterState thirdClusterState = ClusterState.builder(secondClusterState) + .metadata(Metadata.builder(secondClusterState.getMetadata()).clusterUUID(randomAlphaOfLength(10)).build()) + .build(); + remotePersistedState.setLastAcceptedState(thirdClusterState); + remotePersistedState.markLastAcceptedStateAsCommitted(); + assertTrue(remotePersistedState.getLastAcceptedState().metadata().clusterUUIDCommitted()); + assertTrue(remotePersistedState.getLastAcceptedManifest().isCommitted()); + } + public void testRemotePersistedStateNotCommitted() throws IOException { final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); final String previousClusterUUID = "prev-cluster-uuid"; @@ -875,7 +956,8 @@ public void testRemotePersistedStateNotCommitted() throws IOException { final long clusterTerm = randomNonNegativeLong(); ClusterState clusterState = createClusterState( randomNonNegativeLong(), - Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build(), + true ); clusterState = ClusterState.builder(clusterState) .metadata(Metadata.builder(clusterState.getMetadata()).clusterUUID(randomAlphaOfLength(10)).clusterUUIDCommitted(false).build()) @@ -901,7 +983,8 @@ public void testRemotePersistedStateExceptionOnFullStateUpload() throws IOExcept final long clusterTerm = randomNonNegativeLong(); final ClusterState clusterState = createClusterState( randomNonNegativeLong(), - Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build(), + true ); assertThrows(OpenSearchException.class, () -> remotePersistedState.setLastAcceptedState(clusterState)); @@ -924,7 +1007,8 @@ public void testRemotePersistedStateFailureStats() throws IOException { final long clusterTerm = randomNonNegativeLong(); final ClusterState clusterState = createClusterState( randomNonNegativeLong(), - Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build(), + true ); assertThrows(OpenSearchException.class, () -> remotePersistedState.setLastAcceptedState(clusterState)); @@ -1025,7 +1109,8 @@ public void testGatewayForRemoteStateForNodeReplacement() throws IOException { false ) .clusterUUID(randomAlphaOfLength(10)) - .build() + .build(), + false ); when(remoteClusterStateService.getLastKnownUUIDFromRemote(clusterName.value())).thenReturn( previousState.metadata().clusterUUID() @@ -1071,7 +1156,8 @@ public void testGatewayForRemoteStateForNodeReboot() throws IOException { .coordinationMetadata(CoordinationMetadata.builder().term(randomLong()).build()) .put(indexMetadata, false) .clusterUUID(randomAlphaOfLength(10)) - .build() + .build(), + false ); gateway = newGatewayForRemoteState( remoteClusterStateService, @@ -1117,7 +1203,8 @@ public void testGatewayForRemoteStateForInitialBootstrapBlocksApplied() throws I .put(indexMetadata, false) .clusterUUID(ClusterState.UNKNOWN_UUID) .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) - .build() + .build(), + false ) ).nodes(DiscoveryNodes.EMPTY_NODES).build(); diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 21b88e5bd66b9..e875b1c5dc64e 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -107,12 +107,12 @@ import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; import static java.util.stream.Collectors.toList; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V1; import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V2; import static org.opensearch.gateway.remote.ClusterMetadataManifest.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.CLUSTER_BLOCKS; import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.CLUSTER_STATE_ATTRIBUTE; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.gateway.remote.RemoteClusterStateTestUtils.CustomMetadata1; import static org.opensearch.gateway.remote.RemoteClusterStateTestUtils.CustomMetadata2; import static org.opensearch.gateway.remote.RemoteClusterStateTestUtils.CustomMetadata3; @@ -273,8 +273,6 @@ public void teardown() throws Exception { super.tearDown(); remoteClusterStateService.close(); publicationEnabled = false; - Settings nodeSettings = Settings.builder().build(); - FeatureFlags.initializeFeatureFlags(nodeSettings); threadPool.shutdown(); } @@ -371,8 +369,7 @@ public void testWriteFullMetadataSuccess() throws IOException { public void testWriteFullMetadataSuccessPublicationEnabled() throws IOException { // TODO Make the publication flag parameterized publicationEnabled = true; - Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, publicationEnabled).build(); - FeatureFlags.initializeFeatureFlags(nodeSettings); + settings = Settings.builder().put(settings).put(REMOTE_PUBLICATION_SETTING_KEY, publicationEnabled).build(); remoteClusterStateService = new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, @@ -611,20 +608,20 @@ public void testFailWriteIncrementalMetadataNonClusterManagerNode() throws IOExc final RemoteClusterStateManifestInfo manifestDetails = remoteClusterStateService.writeIncrementalMetadata( clusterState, clusterState, - null + ClusterMetadataManifest.builder().build() ); Assert.assertThat(manifestDetails, nullValue()); assertEquals(0, remoteClusterStateService.getUploadStats().getSuccessCount()); } - public void testFailWriteIncrementalMetadataWhenTermChanged() { + public void testFailWriteIncrementalMetadataWhenManifestNull() { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(2L).build(); final ClusterState previousClusterState = ClusterState.builder(ClusterName.DEFAULT) .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) .build(); assertThrows( - AssertionError.class, + IllegalArgumentException.class, () -> remoteClusterStateService.writeIncrementalMetadata(previousClusterState, clusterState, null) ); } @@ -749,8 +746,7 @@ public void testWriteIncrementalMetadataSuccess() throws IOException { public void testWriteIncrementalMetadataSuccessWhenPublicationEnabled() throws IOException { publicationEnabled = true; - Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, publicationEnabled).build(); - FeatureFlags.initializeFeatureFlags(nodeSettings); + settings = Settings.builder().put(settings).put(REMOTE_PUBLICATION_SETTING_KEY, true).build(); remoteClusterStateService = new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, @@ -2524,7 +2520,7 @@ public void testMarkLastStateAsCommittedSuccess() throws IOException { List indices = List.of(uploadedIndexMetadata); final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder().indices(indices).build(); - final ClusterMetadataManifest manifest = remoteClusterStateService.markLastStateAsCommitted(clusterState, previousManifest) + final ClusterMetadataManifest manifest = remoteClusterStateService.markLastStateAsCommitted(clusterState, previousManifest, false) .getClusterMetadataManifest(); final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() @@ -2658,7 +2654,7 @@ public void testRemoteRoutingTableInitializedWhenEnabled() { .build(); clusterSettings.applySettings(newSettings); - Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, "true").build(); + Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_SETTING_KEY, "true").build(); FeatureFlags.initializeFeatureFlags(nodeSettings); remoteClusterStateService = new RemoteClusterStateService( @@ -2935,11 +2931,10 @@ private void initializeRoutingTable() { .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, "routing_repository") .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "remote_store_repository") .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) + .put(REMOTE_PUBLICATION_SETTING_KEY, "true") .build(); clusterSettings.applySettings(newSettings); - Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, "true").build(); - FeatureFlags.initializeFeatureFlags(nodeSettings); remoteClusterStateService = new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, @@ -2966,11 +2961,10 @@ private void initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClust .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "remote_store_repository") .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING.getKey(), mode.name()) + .put(REMOTE_PUBLICATION_SETTING_KEY, true) .build(); clusterSettings.applySettings(newSettings); - Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, "true").build(); - FeatureFlags.initializeFeatureFlags(nodeSettings); remoteClusterStateService = new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, diff --git a/server/src/test/java/org/opensearch/index/store/remote/utils/TransferManagerTestCase.java b/server/src/test/java/org/opensearch/index/store/remote/utils/TransferManagerTestCase.java index 810a4c336fdf7..1eae5119ab462 100644 --- a/server/src/test/java/org/opensearch/index/store/remote/utils/TransferManagerTestCase.java +++ b/server/src/test/java/org/opensearch/index/store/remote/utils/TransferManagerTestCase.java @@ -99,7 +99,7 @@ public void testConcurrentAccess() throws Exception { } } - public void testFetchBlobWithConcurrentCacheEvictions() throws Exception { + public void testFetchBlobWithConcurrentCacheEvictions() { // Submit 256 tasks to an executor with 16 threads that will each randomly // request one of eight blobs. Given that the cache can only hold two // blobs this will lead to a huge amount of contention and thrashing. @@ -114,41 +114,34 @@ public void testFetchBlobWithConcurrentCacheEvictions() throws Exception { try (IndexInput indexInput = fetchBlobWithName(blobname)) { assertIndexInputIsFunctional(indexInput); } + } catch (IOException ignored) { // fetchBlobWithName may fail due to fixed capacity } catch (Exception e) { throw new AssertionError(e); } })); } // Wait for all threads to complete - for (Future future : futures) { - future.get(10, TimeUnit.SECONDS); + try { + for (Future future : futures) { + future.get(10, TimeUnit.SECONDS); + } + } catch (java.util.concurrent.ExecutionException ignored) { // Index input may be null + } catch (Exception e) { + throw new AssertionError(e); } + } finally { assertTrue(terminate(testRunner)); } MatcherAssert.assertThat("Expected many evictions to happen", fileCache.stats().evictionCount(), greaterThan(0L)); } - public void testUsageExceedsCapacity() throws Exception { - // Fetch resources that exceed the configured capacity of the cache and assert that the - // returned IndexInputs are still functional. - try (IndexInput i1 = fetchBlobWithName("1"); IndexInput i2 = fetchBlobWithName("2"); IndexInput i3 = fetchBlobWithName("3")) { - assertIndexInputIsFunctional(i1); - assertIndexInputIsFunctional(i2); - assertIndexInputIsFunctional(i3); - MatcherAssert.assertThat(fileCache.usage().activeUsage(), equalTo((long) EIGHT_MB * 3)); - MatcherAssert.assertThat(fileCache.usage().usage(), equalTo((long) EIGHT_MB * 3)); - } - MatcherAssert.assertThat(fileCache.usage().activeUsage(), equalTo(0L)); - MatcherAssert.assertThat(fileCache.usage().usage(), equalTo((long) EIGHT_MB * 3)); - // Fetch another resource which will trigger an eviction - try (IndexInput i1 = fetchBlobWithName("1")) { - assertIndexInputIsFunctional(i1); - MatcherAssert.assertThat(fileCache.usage().activeUsage(), equalTo((long) EIGHT_MB)); - MatcherAssert.assertThat(fileCache.usage().usage(), equalTo((long) EIGHT_MB)); - } - MatcherAssert.assertThat(fileCache.usage().activeUsage(), equalTo(0L)); - MatcherAssert.assertThat(fileCache.usage().usage(), equalTo((long) EIGHT_MB)); + public void testOverflowDisabled() throws Exception { + initializeTransferManager(); + IndexInput i1 = fetchBlobWithName("1"); + IndexInput i2 = fetchBlobWithName("2"); + + assertThrows(IOException.class, () -> { IndexInput i3 = fetchBlobWithName("3"); }); } public void testDownloadFails() throws Exception { diff --git a/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java index c510a6475147d..4c9da7e95dfa7 100644 --- a/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java @@ -310,6 +310,7 @@ public void testIndexDeletionWithNoPinnedTimestampButRecentFiles() throws Except } @Override + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/15731") public void testSimpleOperationsUpload() throws Exception { ArrayList ops = new ArrayList<>(); diff --git a/server/src/test/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandlerTests.java b/server/src/test/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandlerTests.java index 4685a7196b85a..a4598c4294a33 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandlerTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandlerTests.java @@ -52,6 +52,8 @@ import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.Numbers; import org.opensearch.common.Randomness; import org.opensearch.common.SetOnce; @@ -141,6 +143,8 @@ import static org.mockito.Mockito.anyString; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; /** @@ -746,6 +750,206 @@ void phase2( assertFalse(phase2Called.get()); } + /* + If the replica allocation id is not reflected in source nodes routing table even after retries, + recoveries should fail + */ + public void testThrowExceptionOnNoTargetInRouting() throws IOException { + final RecoverySettings recoverySettings = new RecoverySettings(Settings.EMPTY, service); + final StartRecoveryRequest request = getStartRecoveryRequest(); + final IndexShard shard = mock(IndexShard.class); + when(shard.seqNoStats()).thenReturn(mock(SeqNoStats.class)); + when(shard.segmentStats(anyBoolean(), anyBoolean())).thenReturn(mock(SegmentsStats.class)); + when(shard.isRelocatedPrimary()).thenReturn(false); + final org.opensearch.index.shard.ReplicationGroup replicationGroup = mock(org.opensearch.index.shard.ReplicationGroup.class); + final IndexShardRoutingTable routingTable = mock(IndexShardRoutingTable.class); + when(routingTable.getByAllocationId(anyString())).thenReturn(null); + when(shard.getReplicationGroup()).thenReturn(replicationGroup); + when(replicationGroup.getRoutingTable()).thenReturn(routingTable); + when(shard.acquireSafeIndexCommit()).thenReturn(mock(GatedCloseable.class)); + doAnswer(invocation -> { + ((ActionListener) invocation.getArguments()[0]).onResponse(() -> {}); + return null; + }).when(shard).acquirePrimaryOperationPermit(any(), anyString(), any()); + + final IndexMetadata.Builder indexMetadata = IndexMetadata.builder("test") + .settings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, between(0, 5)) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, between(1, 5)) + .put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random())) + .put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID(random())) + ); + if (randomBoolean()) { + indexMetadata.state(IndexMetadata.State.CLOSE); + } + when(shard.indexSettings()).thenReturn(new IndexSettings(indexMetadata.build(), Settings.EMPTY)); + + final AtomicBoolean phase1Called = new AtomicBoolean(); + final AtomicBoolean prepareTargetForTranslogCalled = new AtomicBoolean(); + final AtomicBoolean phase2Called = new AtomicBoolean(); + final RecoverySourceHandler handler = new LocalStorePeerRecoverySourceHandler( + shard, + mock(RecoveryTargetHandler.class), + threadPool, + request, + Math.toIntExact(recoverySettings.getChunkSize().getBytes()), + between(1, 8), + between(1, 8) + ) { + + @Override + void phase1( + IndexCommit snapshot, + long startingSeqNo, + IntSupplier translogOps, + ActionListener listener, + boolean skipCreateRetentionLeaseStep + ) { + phase1Called.set(true); + super.phase1(snapshot, startingSeqNo, translogOps, listener, skipCreateRetentionLeaseStep); + } + + @Override + void prepareTargetForTranslog(int totalTranslogOps, ActionListener listener) { + prepareTargetForTranslogCalled.set(true); + super.prepareTargetForTranslog(totalTranslogOps, listener); + } + + @Override + void phase2( + long startingSeqNo, + long endingSeqNo, + Translog.Snapshot snapshot, + long maxSeenAutoIdTimestamp, + long maxSeqNoOfUpdatesOrDeletes, + RetentionLeases retentionLeases, + long mappingVersion, + ActionListener listener + ) throws IOException { + phase2Called.set(true); + super.phase2( + startingSeqNo, + endingSeqNo, + snapshot, + maxSeenAutoIdTimestamp, + maxSeqNoOfUpdatesOrDeletes, + retentionLeases, + mappingVersion, + listener + ); + } + + }; + PlainActionFuture future = new PlainActionFuture<>(); + expectThrows(DelayRecoveryException.class, () -> { + handler.recoverToTarget(future); + future.actionGet(); + }); + verify(routingTable, times(5)).getByAllocationId(null); + assertFalse(phase1Called.get()); + assertFalse(prepareTargetForTranslogCalled.get()); + assertFalse(phase2Called.get()); + } + + /* + Tests when the replica allocation id is reflected in source nodes routing table even after 1 retry + */ + public void testTargetInRoutingInSecondAttempt() throws IOException { + final RecoverySettings recoverySettings = new RecoverySettings(Settings.EMPTY, service); + final StartRecoveryRequest request = getStartRecoveryRequest(); + final IndexShard shard = mock(IndexShard.class); + when(shard.seqNoStats()).thenReturn(mock(SeqNoStats.class)); + when(shard.segmentStats(anyBoolean(), anyBoolean())).thenReturn(mock(SegmentsStats.class)); + when(shard.isRelocatedPrimary()).thenReturn(false); + when(shard.getRetentionLeases()).thenReturn(mock(RetentionLeases.class)); + final org.opensearch.index.shard.ReplicationGroup replicationGroup = mock(org.opensearch.index.shard.ReplicationGroup.class); + final IndexShardRoutingTable routingTable = mock(IndexShardRoutingTable.class); + final ShardRouting shardRouting = mock(ShardRouting.class); + when(shardRouting.initializing()).thenReturn(true); + when(shardRouting.currentNodeId()).thenReturn("node"); + when(routingTable.getByAllocationId(any())).thenReturn(null, shardRouting); + when(shard.getReplicationGroup()).thenReturn(replicationGroup); + when(replicationGroup.getRoutingTable()).thenReturn(routingTable); + when(shard.acquireSafeIndexCommit()).thenReturn(mock(GatedCloseable.class)); + doAnswer(invocation -> { + ((ActionListener) invocation.getArguments()[0]).onResponse(() -> {}); + return null; + }).when(shard).acquirePrimaryOperationPermit(any(), anyString(), any()); + + final IndexMetadata.Builder indexMetadata = IndexMetadata.builder("test") + .settings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, between(0, 5)) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, between(1, 5)) + .put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random())) + .put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID(random())) + ); + if (randomBoolean()) { + indexMetadata.state(IndexMetadata.State.CLOSE); + } + when(shard.indexSettings()).thenReturn(new IndexSettings(indexMetadata.build(), Settings.EMPTY)); + + final AtomicBoolean phase1Called = new AtomicBoolean(); + final AtomicBoolean prepareTargetForTranslogCalled = new AtomicBoolean(); + final AtomicBoolean phase2Called = new AtomicBoolean(); + final RecoverySourceHandler handler = new LocalStorePeerRecoverySourceHandler( + shard, + mock(RecoveryTargetHandler.class), + threadPool, + request, + Math.toIntExact(recoverySettings.getChunkSize().getBytes()), + between(1, 8), + between(1, 8) + ) { + + @Override + void phase1( + IndexCommit snapshot, + long startingSeqNo, + IntSupplier translogOps, + ActionListener listener, + boolean skipCreateRetentionLeaseStep + ) { + phase1Called.set(true); + super.phase1(snapshot, startingSeqNo, translogOps, listener, skipCreateRetentionLeaseStep); + } + + @Override + void prepareTargetForTranslog(int totalTranslogOps, ActionListener listener) { + prepareTargetForTranslogCalled.set(true); + super.prepareTargetForTranslog(totalTranslogOps, listener); + } + + @Override + void phase2( + long startingSeqNo, + long endingSeqNo, + Translog.Snapshot snapshot, + long maxSeenAutoIdTimestamp, + long maxSeqNoOfUpdatesOrDeletes, + RetentionLeases retentionLeases, + long mappingVersion, + ActionListener listener + ) throws IOException { + phase2Called.set(true); + super.phase2( + startingSeqNo, + endingSeqNo, + snapshot, + maxSeenAutoIdTimestamp, + maxSeqNoOfUpdatesOrDeletes, + retentionLeases, + mappingVersion, + listener + ); + } + + }; + handler.waitForAssignmentPropagate(new SetOnce<>()); + verify(routingTable, times(2)).getByAllocationId(null); + } + public void testCancellationsDoesNotLeakPrimaryPermits() throws Exception { final CancellableThreads cancellableThreads = new CancellableThreads(); final IndexShard shard = mock(IndexShard.class); diff --git a/server/src/test/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandlerTests.java b/server/src/test/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandlerTests.java index 131514eb019b3..7f913f3c8596a 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandlerTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandlerTests.java @@ -8,20 +8,64 @@ package org.opensearch.indices.recovery; +import org.apache.lucene.index.IndexCommit; +import org.opensearch.Version; +import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.common.UUIDs; +import org.opensearch.common.concurrent.GatedCloseable; +import org.opensearch.common.lease.Releasable; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.NRTReplicationEngineFactory; +import org.opensearch.index.engine.SegmentsStats; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.replication.OpenSearchIndexLevelReplicationTestCase; import org.opensearch.index.seqno.ReplicationTracker; +import org.opensearch.index.seqno.RetentionLeases; +import org.opensearch.index.seqno.SeqNoStats; +import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.store.Store; +import org.opensearch.index.translog.Translog; import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.IndexSettingsModule; +import org.opensearch.test.VersionUtils; +import java.io.IOException; import java.nio.file.Path; +import java.util.Collections; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.IntSupplier; + +import static java.util.Collections.emptyMap; +import static java.util.Collections.emptySet; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; public class RemoteStorePeerRecoverySourceHandlerTests extends OpenSearchIndexLevelReplicationTestCase { + private static final IndexSettings INDEX_SETTINGS = IndexSettingsModule.newIndexSettings( + "index", + Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, org.opensearch.Version.CURRENT).build() + ); + private final ShardId shardId = new ShardId(INDEX_SETTINGS.getIndex(), 1); + + private final ClusterSettings service = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + private static final Settings settings = Settings.builder() .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, "true") @@ -73,4 +117,122 @@ public void testReplicaShardRecoveryUptoLastFlushedCommit() throws Exception { assertFalse(primary.getRetentionLeases().contains(ReplicationTracker.getPeerRecoveryRetentionLeaseId(replica2.routingEntry()))); } } + + public void testThrowExceptionOnNoTargetInRouting() throws IOException { + final RecoverySettings recoverySettings = new RecoverySettings(Settings.EMPTY, service); + final StartRecoveryRequest request = getStartRecoveryRequest(); + final IndexShard shard = mock(IndexShard.class); + when(shard.seqNoStats()).thenReturn(mock(SeqNoStats.class)); + when(shard.segmentStats(anyBoolean(), anyBoolean())).thenReturn(mock(SegmentsStats.class)); + when(shard.isRelocatedPrimary()).thenReturn(false); + final org.opensearch.index.shard.ReplicationGroup replicationGroup = mock(org.opensearch.index.shard.ReplicationGroup.class); + final IndexShardRoutingTable routingTable = mock(IndexShardRoutingTable.class); + when(routingTable.getByAllocationId(anyString())).thenReturn(null); + when(shard.getReplicationGroup()).thenReturn(replicationGroup); + when(replicationGroup.getRoutingTable()).thenReturn(routingTable); + when(shard.acquireSafeIndexCommit()).thenReturn(mock(GatedCloseable.class)); + doAnswer(invocation -> { + ((ActionListener) invocation.getArguments()[0]).onResponse(() -> {}); + return null; + }).when(shard).acquirePrimaryOperationPermit(any(), anyString(), any()); + + final IndexMetadata.Builder indexMetadata = IndexMetadata.builder("test") + .settings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, between(0, 5)) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, between(1, 5)) + .put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random())) + .put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID(random())) + ); + if (randomBoolean()) { + indexMetadata.state(IndexMetadata.State.CLOSE); + } + when(shard.indexSettings()).thenReturn(new IndexSettings(indexMetadata.build(), Settings.EMPTY)); + + final AtomicBoolean phase1Called = new AtomicBoolean(); + final AtomicBoolean prepareTargetForTranslogCalled = new AtomicBoolean(); + final AtomicBoolean phase2Called = new AtomicBoolean(); + final RecoverySourceHandler handler = new RemoteStorePeerRecoverySourceHandler( + shard, + mock(RecoveryTargetHandler.class), + threadPool, + request, + Math.toIntExact(recoverySettings.getChunkSize().getBytes()), + between(1, 8), + between(1, 8) + ) { + + @Override + void phase1( + IndexCommit snapshot, + long startingSeqNo, + IntSupplier translogOps, + ActionListener listener, + boolean skipCreateRetentionLeaseStep + ) { + phase1Called.set(true); + super.phase1(snapshot, startingSeqNo, translogOps, listener, skipCreateRetentionLeaseStep); + } + + @Override + void prepareTargetForTranslog(int totalTranslogOps, ActionListener listener) { + prepareTargetForTranslogCalled.set(true); + super.prepareTargetForTranslog(totalTranslogOps, listener); + } + + @Override + void phase2( + long startingSeqNo, + long endingSeqNo, + Translog.Snapshot snapshot, + long maxSeenAutoIdTimestamp, + long maxSeqNoOfUpdatesOrDeletes, + RetentionLeases retentionLeases, + long mappingVersion, + ActionListener listener + ) throws IOException { + phase2Called.set(true); + super.phase2( + startingSeqNo, + endingSeqNo, + snapshot, + maxSeenAutoIdTimestamp, + maxSeqNoOfUpdatesOrDeletes, + retentionLeases, + mappingVersion, + listener + ); + } + + }; + PlainActionFuture future = new PlainActionFuture<>(); + expectThrows(DelayRecoveryException.class, () -> { + handler.recoverToTarget(future); + future.actionGet(); + }); + verify(routingTable, times(5)).getByAllocationId(null); + assertFalse(phase1Called.get()); + assertFalse(prepareTargetForTranslogCalled.get()); + assertFalse(phase2Called.get()); + } + + public StartRecoveryRequest getStartRecoveryRequest() throws IOException { + Store.MetadataSnapshot metadataSnapshot = randomBoolean() + ? Store.MetadataSnapshot.EMPTY + : new Store.MetadataSnapshot( + Collections.emptyMap(), + Collections.singletonMap(Engine.HISTORY_UUID_KEY, UUIDs.randomBase64UUID()), + randomIntBetween(0, 100) + ); + return new StartRecoveryRequest( + shardId, + null, + new DiscoveryNode("b", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), + new DiscoveryNode("b", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), + metadataSnapshot, + randomBoolean(), + randomNonNegativeLong(), + randomBoolean() || metadataSnapshot.getHistoryUUID() == null ? SequenceNumbers.UNASSIGNED_SEQ_NO : randomNonNegativeLong() + ); + } }