forked from linkedin/venice
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[server] Add store-aware partition-wise shared consumer assignment st…
…rategy (linkedin#1261) Add a new version of partition wise shared consumer assignment strategy. We have been seeing subscriptions to the same topic / store be assigned to the same consumer, and for a particular store push's view (can be inc push / full push) it can be competing with each other and becomes the long-tail partition and slow down the overall progress. Assuming the store/topic itself does not have data-skew, then we should try to assign these subscriptions to different consumers as even as possible. Especially for RT topics, backup / current / future version will share the same input volume, so we should not treat them differently within the same pool, but we can further optimize that in different level (Pool assignment strategy) This PR adds the new strategy so when a new topic partition is looking for assignment, it will compute and sort all the consumer's load based on general load and the store-specific load. It will assign the new topic partition to the least loaded consumer based on the computed load.
- Loading branch information
1 parent
8889ab3
commit 7e9aa79
Showing
7 changed files
with
278 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
121 changes: 121 additions & 0 deletions
121
...java/com/linkedin/davinci/kafka/consumer/StoreAwarePartitionWiseKafkaConsumerService.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
package com.linkedin.davinci.kafka.consumer; | ||
|
||
import com.linkedin.davinci.stats.AggKafkaConsumerServiceStats; | ||
import com.linkedin.venice.meta.ReadOnlyStoreRepository; | ||
import com.linkedin.venice.meta.Version; | ||
import com.linkedin.venice.pubsub.PubSubConsumerAdapterFactory; | ||
import com.linkedin.venice.pubsub.api.PubSubMessageDeserializer; | ||
import com.linkedin.venice.pubsub.api.PubSubTopic; | ||
import com.linkedin.venice.pubsub.api.PubSubTopicPartition; | ||
import com.linkedin.venice.utils.Time; | ||
import io.tehuti.metrics.MetricsRepository; | ||
import java.util.HashSet; | ||
import java.util.Properties; | ||
|
||
|
||
/** | ||
* {@link StoreAwarePartitionWiseKafkaConsumerService} is used to allocate share consumer from consumer pool at partition | ||
* granularity. One shared consumer may have multiple topics, and each topic may have multiple consumers. | ||
* This is store-aware version of topic-wise shared consumer service. The topic partition assignment in this service has | ||
* a heuristic that we should distribute the all the subscriptions related to a same store / version as even as possible. | ||
* The load calculation for each consumer will be: | ||
* Consumer assignment size + IMPOSSIBLE_MAX_PARTITION_COUNT_PER_CONSUMER * subscription count for the same store; | ||
* and we will pick the least loaded consumer for a new topic partition request. If there is no eligible consumer, it | ||
* will throw {@link IllegalStateException} | ||
*/ | ||
public class StoreAwarePartitionWiseKafkaConsumerService extends PartitionWiseKafkaConsumerService { | ||
// This constant makes sure the store subscription count will always be prioritized over consumer assignment count. | ||
private static final int IMPOSSIBLE_MAX_PARTITION_COUNT_PER_CONSUMER = 10000; | ||
|
||
StoreAwarePartitionWiseKafkaConsumerService( | ||
final ConsumerPoolType poolType, | ||
final PubSubConsumerAdapterFactory consumerFactory, | ||
final Properties consumerProperties, | ||
final long readCycleDelayMs, | ||
final int numOfConsumersPerKafkaCluster, | ||
final IngestionThrottler ingestionThrottler, | ||
final KafkaClusterBasedRecordThrottler kafkaClusterBasedRecordThrottler, | ||
final MetricsRepository metricsRepository, | ||
final String kafkaClusterAlias, | ||
final long sharedConsumerNonExistingTopicCleanupDelayMS, | ||
final TopicExistenceChecker topicExistenceChecker, | ||
final boolean liveConfigBasedKafkaThrottlingEnabled, | ||
final PubSubMessageDeserializer pubSubDeserializer, | ||
final Time time, | ||
final AggKafkaConsumerServiceStats stats, | ||
final boolean isKafkaConsumerOffsetCollectionEnabled, | ||
final ReadOnlyStoreRepository metadataRepository, | ||
final boolean isUnregisterMetricForDeletedStoreEnabled) { | ||
super( | ||
poolType, | ||
consumerFactory, | ||
consumerProperties, | ||
readCycleDelayMs, | ||
numOfConsumersPerKafkaCluster, | ||
ingestionThrottler, | ||
kafkaClusterBasedRecordThrottler, | ||
metricsRepository, | ||
kafkaClusterAlias, | ||
sharedConsumerNonExistingTopicCleanupDelayMS, | ||
topicExistenceChecker, | ||
liveConfigBasedKafkaThrottlingEnabled, | ||
pubSubDeserializer, | ||
time, | ||
stats, | ||
isKafkaConsumerOffsetCollectionEnabled, | ||
metadataRepository, | ||
isUnregisterMetricForDeletedStoreEnabled, | ||
StoreAwarePartitionWiseKafkaConsumerService.class.toString()); | ||
} | ||
|
||
@Override | ||
protected synchronized SharedKafkaConsumer pickConsumerForPartition( | ||
PubSubTopic versionTopic, | ||
PubSubTopicPartition topicPartition) { | ||
String storeName = versionTopic.getStoreName(); | ||
long minLoad = Long.MAX_VALUE; | ||
SharedKafkaConsumer minLoadConsumer = null; | ||
for (SharedKafkaConsumer consumer: getConsumerToConsumptionTask().keySet()) { | ||
int index = getConsumerToConsumptionTask().indexOf(consumer); | ||
if (topicPartition.getPubSubTopic().isRealTime() | ||
&& alreadySubscribedRealtimeTopicPartition(consumer, topicPartition)) { | ||
getLOGGER().info( | ||
"Consumer id: {} has already subscribed the same real time topic-partition: {} and thus cannot be picked", | ||
index, | ||
topicPartition); | ||
continue; | ||
} | ||
long overallLoad = getConsumerStoreLoad(consumer, storeName); | ||
if (overallLoad < minLoad) { | ||
minLoadConsumer = consumer; | ||
minLoad = overallLoad; | ||
} | ||
} | ||
if (minLoad == Long.MAX_VALUE) { | ||
throw new IllegalStateException("Unable to find least loaded consumer entry."); | ||
} | ||
|
||
// Update RT topic partition consumer map. | ||
if (topicPartition.getPubSubTopic().isRealTime()) { | ||
getRtTopicPartitionToConsumerMap().computeIfAbsent(topicPartition, key -> new HashSet<>()).add(minLoadConsumer); | ||
} | ||
|
||
getLOGGER().info( | ||
"Picked consumer id: {}, assignment size: {}, computed load: {} for topic partition: {}, version topic: {}", | ||
getConsumerToConsumptionTask().indexOf(minLoadConsumer), | ||
minLoadConsumer.getAssignmentSize(), | ||
minLoad, | ||
topicPartition, | ||
versionTopic); | ||
return minLoadConsumer; | ||
} | ||
|
||
long getConsumerStoreLoad(SharedKafkaConsumer consumer, String storeName) { | ||
long baseAssignmentCount = consumer.getAssignmentSize(); | ||
long storeSubscriptionCount = consumer.getAssignment() | ||
.stream() | ||
.filter(x -> Version.parseStoreFromKafkaTopicName(x.getTopicName()).equals(storeName)) | ||
.count(); | ||
return storeSubscriptionCount * IMPOSSIBLE_MAX_PARTITION_COUNT_PER_CONSUMER + baseAssignmentCount; | ||
} | ||
} |
Oops, something went wrong.