From 9791998bfd4eecb1a6ef69b5233b456e8c567090 Mon Sep 17 00:00:00 2001 From: tibrewalpratik Date: Thu, 7 Mar 2024 03:20:06 +0530 Subject: [PATCH 1/3] Add metric to track number of segments missed in upsert-snapshot --- .../main/java/org/apache/pinot/common/metrics/ServerGauge.java | 3 ++- .../local/upsert/BasePartitionUpsertMetadataManager.java | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerGauge.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerGauge.java index 45f34803a03..af655578452 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerGauge.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerGauge.java @@ -71,7 +71,8 @@ public enum ServerGauge implements AbstractMetrics.Gauge { END_TO_END_REALTIME_INGESTION_DELAY_MS("milliseconds", false), // Needed to track if valid doc id snapshots are present for faster restarts UPSERT_VALID_DOC_ID_SNAPSHOT_COUNT("upsertValidDocIdSnapshotCount", false), - UPSERT_PRIMARY_KEYS_IN_SNAPSHOT_COUNT("upsertPrimaryKeysInSnapshotCount", false); + UPSERT_PRIMARY_KEYS_IN_SNAPSHOT_COUNT("upsertPrimaryKeysInSnapshotCount", false), + UPSERT_MISSED_VALID_DOC_ID_SNAPSHOT_COUNT("upsertMissedValidDocIdSnapshotCount", false); private final String _gaugeName; private final String _unit; diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java index dcee1cefa42..0f619bc20b2 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java @@ -887,6 +887,9 @@ protected void doTakeSnapshot() { ServerGauge.UPSERT_VALID_DOC_ID_SNAPSHOT_COUNT, numImmutableSegments); _serverMetrics.setValueOfPartitionGauge(_tableNameWithType, _partitionId, ServerGauge.UPSERT_PRIMARY_KEYS_IN_SNAPSHOT_COUNT, numPrimaryKeysInSnapshot); + _serverMetrics.setValueOfPartitionGauge(_tableNameWithType, _partitionId, + ServerGauge.UPSERT_MISSED_VALID_DOC_ID_SNAPSHOT_COUNT, + numTrackedSegments - numImmutableSegments - numConsumingSegments); _logger.info("Finished taking snapshot for {} immutable segments with {} primary keys (out of {} total segments, " + "{} are consuming segments) in {} ms", numImmutableSegments, numPrimaryKeysInSnapshot, numTrackedSegments, numConsumingSegments, System.currentTimeMillis() - startTimeMs); From 9194110b245b8dee2882a58ea4d86ea0da14fe77 Mon Sep 17 00:00:00 2001 From: tibrewalpratik Date: Fri, 8 Mar 2024 02:13:35 +0530 Subject: [PATCH 2/3] address comments --- .../local/upsert/BasePartitionUpsertMetadataManager.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java index 0f619bc20b2..8325ad09b1d 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java @@ -887,9 +887,12 @@ protected void doTakeSnapshot() { ServerGauge.UPSERT_VALID_DOC_ID_SNAPSHOT_COUNT, numImmutableSegments); _serverMetrics.setValueOfPartitionGauge(_tableNameWithType, _partitionId, ServerGauge.UPSERT_PRIMARY_KEYS_IN_SNAPSHOT_COUNT, numPrimaryKeysInSnapshot); - _serverMetrics.setValueOfPartitionGauge(_tableNameWithType, _partitionId, - ServerGauge.UPSERT_MISSED_VALID_DOC_ID_SNAPSHOT_COUNT, - numTrackedSegments - numImmutableSegments - numConsumingSegments); + int numMissedSegments = numTrackedSegments - numImmutableSegments - numConsumingSegments; + if (numMissedSegments > 0) { + _serverMetrics.setValueOfPartitionGauge(_tableNameWithType, _partitionId, + ServerGauge.UPSERT_MISSED_VALID_DOC_ID_SNAPSHOT_COUNT, numMissedSegments); + _logger.warn("Missed taking snapshot for {} immutable segments", numMissedSegments); + } _logger.info("Finished taking snapshot for {} immutable segments with {} primary keys (out of {} total segments, " + "{} are consuming segments) in {} ms", numImmutableSegments, numPrimaryKeysInSnapshot, numTrackedSegments, numConsumingSegments, System.currentTimeMillis() - startTimeMs); From e7fdc328f1483683f908a20daea6ccd5c16662fa Mon Sep 17 00:00:00 2001 From: tibrewalpratik Date: Fri, 8 Mar 2024 05:35:58 +0530 Subject: [PATCH 3/3] address comments 2 --- .../java/org/apache/pinot/common/metrics/ServerGauge.java | 3 +-- .../java/org/apache/pinot/common/metrics/ServerMeter.java | 1 + .../local/upsert/BasePartitionUpsertMetadataManager.java | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerGauge.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerGauge.java index af655578452..45f34803a03 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerGauge.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerGauge.java @@ -71,8 +71,7 @@ public enum ServerGauge implements AbstractMetrics.Gauge { END_TO_END_REALTIME_INGESTION_DELAY_MS("milliseconds", false), // Needed to track if valid doc id snapshots are present for faster restarts UPSERT_VALID_DOC_ID_SNAPSHOT_COUNT("upsertValidDocIdSnapshotCount", false), - UPSERT_PRIMARY_KEYS_IN_SNAPSHOT_COUNT("upsertPrimaryKeysInSnapshotCount", false), - UPSERT_MISSED_VALID_DOC_ID_SNAPSHOT_COUNT("upsertMissedValidDocIdSnapshotCount", false); + UPSERT_PRIMARY_KEYS_IN_SNAPSHOT_COUNT("upsertPrimaryKeysInSnapshotCount", false); private final String _gaugeName; private final String _unit; diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerMeter.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerMeter.java index 53d19b96c28..c516740070f 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerMeter.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ServerMeter.java @@ -52,6 +52,7 @@ public enum ServerMeter implements AbstractMetrics.Meter { UPSERT_OUT_OF_ORDER("rows", false), DELETED_KEYS_TTL_PRIMARY_KEYS_REMOVED("rows", false), METADATA_TTL_PRIMARY_KEYS_REMOVED("rows", false), + UPSERT_MISSED_VALID_DOC_ID_SNAPSHOT_COUNT("segments", false), ROWS_WITH_ERRORS("rows", false), LLC_CONTROLLER_RESPONSE_NOT_SENT("messages", true), LLC_CONTROLLER_RESPONSE_COMMIT("messages", true), diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java index 8325ad09b1d..a97bb659221 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/BasePartitionUpsertMetadataManager.java @@ -889,8 +889,8 @@ protected void doTakeSnapshot() { ServerGauge.UPSERT_PRIMARY_KEYS_IN_SNAPSHOT_COUNT, numPrimaryKeysInSnapshot); int numMissedSegments = numTrackedSegments - numImmutableSegments - numConsumingSegments; if (numMissedSegments > 0) { - _serverMetrics.setValueOfPartitionGauge(_tableNameWithType, _partitionId, - ServerGauge.UPSERT_MISSED_VALID_DOC_ID_SNAPSHOT_COUNT, numMissedSegments); + _serverMetrics.addMeteredTableValue(_tableNameWithType, String.valueOf(_partitionId), + ServerMeter.UPSERT_MISSED_VALID_DOC_ID_SNAPSHOT_COUNT, numMissedSegments); _logger.warn("Missed taking snapshot for {} immutable segments", numMissedSegments); } _logger.info("Finished taking snapshot for {} immutable segments with {} primary keys (out of {} total segments, "