Skip to content

Commit cd68d10

Browse files
committed
KAFKA-12464: improve performance by using ArrayList adding instead of removing
1 parent d2adfbe commit cd68d10

File tree

1 file changed

+79
-20
lines changed

1 file changed

+79
-20
lines changed

clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractStickyAssignor.java

Lines changed: 79 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import java.util.Map.Entry;
3030
import java.util.Optional;
3131
import java.util.Set;
32-
import java.util.SortedSet;
3332
import java.util.TreeMap;
3433
import java.util.TreeSet;
3534
import java.util.stream.Collectors;
@@ -165,15 +164,15 @@ private Map<String, List<TopicPartition>> constrainedAssign(Map<String, Integer>
165164
partitionsPerTopic, consumerToOwnedPartitions));
166165
}
167166

168-
SortedSet<TopicPartition> unassignedPartitions = getTopicPartitions(partitionsPerTopic);
167+
List<TopicPartition> sortedAllPartitions = getTopicPartitions(partitionsPerTopic);
169168

170169
Set<TopicPartition> allRevokedPartitions = new HashSet<>();
171170

172171
// the consumers not yet at expected capacity
173172
List<String> unfilledMembers = new LinkedList<>();
174173

175174
int numberOfConsumers = consumerToOwnedPartitions.size();
176-
int totalPartitionsCount = unassignedPartitions.size();
175+
int totalPartitionsCount = sortedAllPartitions.size();
177176

178177
int minQuota = (int) Math.floor(((double) totalPartitionsCount) / numberOfConsumers);
179178
int maxQuota = (int) Math.ceil(((double) totalPartitionsCount) / numberOfConsumers);
@@ -186,6 +185,7 @@ private Map<String, List<TopicPartition>> constrainedAssign(Map<String, Integer>
186185
Map<String, List<TopicPartition>> assignment = new HashMap<>(
187186
consumerToOwnedPartitions.keySet().stream().collect(Collectors.toMap(c -> c, c -> new ArrayList<>(maxQuota))));
188187

188+
List<TopicPartition> toBeRemovedPartitions = new ArrayList<>();
189189
// Reassign as many previously owned partitions as possible
190190
for (Map.Entry<String, List<TopicPartition>> consumerEntry : consumerToOwnedPartitions.entrySet()) {
191191
String consumer = consumerEntry.getKey();
@@ -198,24 +198,35 @@ private Map<String, List<TopicPartition>> constrainedAssign(Map<String, Integer>
198198
// and put this member into unfilled member list
199199
if (ownedPartitions.size() > 0) {
200200
consumerAssignment.addAll(ownedPartitions);
201-
unassignedPartitions.removeAll(ownedPartitions);
201+
toBeRemovedPartitions.addAll(ownedPartitions);
202202
}
203203
unfilledMembers.add(consumer);
204204
} else if (ownedPartitions.size() >= maxQuota && numMaxCapacityMembers++ <= numExpectedMaxCapacityMembers) {
205205
// consumer owned the "maxQuota" of partitions or more, and we still under the number of expected max capacity members
206206
// so keep "maxQuota" of the owned partitions, and revoke the rest of the partitions
207207
consumerAssignment.addAll(ownedPartitions.subList(0, maxQuota));
208-
unassignedPartitions.removeAll(ownedPartitions.subList(0, maxQuota));
208+
toBeRemovedPartitions.addAll(ownedPartitions.subList(0, maxQuota));
209209
allRevokedPartitions.addAll(ownedPartitions.subList(maxQuota, ownedPartitions.size()));
210210
} else {
211211
// consumer owned the "minQuota" of partitions or more
212212
// so keep "minQuota" of the owned partitions, and revoke the rest of the partitions
213213
consumerAssignment.addAll(ownedPartitions.subList(0, minQuota));
214-
unassignedPartitions.removeAll(ownedPartitions.subList(0, minQuota));
214+
toBeRemovedPartitions.addAll(ownedPartitions.subList(0, minQuota));
215215
allRevokedPartitions.addAll(ownedPartitions.subList(minQuota, ownedPartitions.size()));
216216
}
217217
}
218218

219+
List<TopicPartition> unassignedPartitions;
220+
if (!toBeRemovedPartitions.isEmpty()) {
221+
Collections.sort(toBeRemovedPartitions,
222+
Comparator.comparing(TopicPartition::topic).thenComparing(TopicPartition::partition));
223+
unassignedPartitions = getUnassignedPartitions(sortedAllPartitions, toBeRemovedPartitions);
224+
sortedAllPartitions = null;
225+
} else {
226+
unassignedPartitions = sortedAllPartitions;
227+
}
228+
toBeRemovedPartitions = null;
229+
219230
if (log.isDebugEnabled()) {
220231
log.debug(String.format(
221232
"After reassigning previously owned partitions, unfilled members: %s, unassigned partitions: %s, " +
@@ -234,23 +245,28 @@ private Map<String, List<TopicPartition>> constrainedAssign(Map<String, Integer>
234245
List<TopicPartition> consumerAssignment = assignment.get(consumer);
235246

236247
int expectedAssignedCount = numMaxCapacityMembers < numExpectedMaxCapacityMembers ? maxQuota : minQuota;
248+
int currentAssignedCount = consumerAssignment.size();
237249
if (unassignedPartitionsIter.hasNext()) {
238250
TopicPartition tp = unassignedPartitionsIter.next();
239251
consumerAssignment.add(tp);
240-
unassignedPartitionsIter.remove();
252+
currentAssignedCount++;
241253
// We already assigned all possible ownedPartitions, so we know this must be newly to this consumer
242254
if (allRevokedPartitions.contains(tp))
243255
partitionsTransferringOwnership.put(tp, consumer);
244256
} else {
245-
// Should not enter here since we have calculated the exact number to assign to each consumer
246-
log.warn(String.format(
247-
"No more partitions to be assigned. consumer: [%s] with current size: %d, but expected size is %d",
248-
consumer, consumerAssignment.size(), expectedAssignedCount));
257+
// This will only happen when current consumer has minQuota of partitions, and in previous round,
258+
// the expectedAssignedCount is maxQuota, so, still in unfilledMembers list.
259+
// But now, expectedAssignedCount is minQuota, we can remove it.
260+
if (currentAssignedCount != minQuota) {
261+
// Should not enter here since we have calculated the exact number to assign to each consumer
262+
log.warn(String.format(
263+
"No more partitions to be assigned. consumer: [%s] with current size: %d, but expected size is %d",
264+
consumer, currentAssignedCount, expectedAssignedCount));
265+
}
266+
unfilledConsumerIter.remove();
249267
break;
250268
}
251269

252-
int currentAssignedCount = consumerAssignment.size();
253-
254270
if (currentAssignedCount == expectedAssignedCount) {
255271
if (currentAssignedCount == maxQuota) {
256272
numMaxCapacityMembers++;
@@ -263,16 +279,59 @@ private Map<String, List<TopicPartition>> constrainedAssign(Map<String, Integer>
263279
if (log.isDebugEnabled()) {
264280
log.debug("final assignment: " + assignment);
265281
}
266-
282+
267283
return assignment;
268284
}
269285

270-
private SortedSet<TopicPartition> getTopicPartitions(Map<String, Integer> partitionsPerTopic) {
271-
SortedSet<TopicPartition> allPartitions =
272-
new TreeSet<>(Comparator.comparing(TopicPartition::topic).thenComparing(TopicPartition::partition));
273-
for (Entry<String, Integer> entry: partitionsPerTopic.entrySet()) {
274-
String topic = entry.getKey();
275-
for (int i = 0; i < entry.getValue(); ++i) {
286+
/**
287+
* get the unassigned partition list by computing the difference set of the sortedPartitions(all partitions)
288+
* and sortedToBeRemovedPartitions. We use two pointers technique here:
289+
*
290+
* We loop the sortedPartition, and compare the ith element in sorted toBeRemovedPartitions(i start from 0):
291+
* - if not equal to the ith element, add to unassignedPartitions
292+
* - if equal to the the ith element, get next element from sortedToBeRemovedPartitions
293+
*
294+
* @param sortedPartitions: sorted all partitions
295+
* @param sortedToBeRemovedPartitions: sorted partitions, all are included in the sortedPartitions
296+
* @return the partitions don't assign to any current consumers
297+
*/
298+
private List<TopicPartition> getUnassignedPartitions(List<TopicPartition> sortedPartitions,
299+
List<TopicPartition> sortedToBeRemovedPartitions) {
300+
List<TopicPartition> unassignedPartitions = new ArrayList<>(
301+
sortedPartitions.size() - sortedToBeRemovedPartitions.size());
302+
303+
int index = 0;
304+
boolean shouldAddDirectly = false;
305+
int sizeToBeRemovedPartitions = sortedToBeRemovedPartitions.size();
306+
TopicPartition nextPartition = sortedToBeRemovedPartitions.get(index);
307+
for (TopicPartition topicPartition : sortedPartitions) {
308+
if (shouldAddDirectly || !nextPartition.equals(topicPartition)) {
309+
unassignedPartitions.add(topicPartition);
310+
} else {
311+
// equal case, don't add to unassignedPartitions, just get next partition
312+
if (index < sizeToBeRemovedPartitions - 1) {
313+
nextPartition = sortedToBeRemovedPartitions.get(++index);
314+
} else {
315+
// add the remaining directly since there is no more toBeRemovedPartitions
316+
shouldAddDirectly = true;
317+
}
318+
}
319+
}
320+
return unassignedPartitions;
321+
}
322+
323+
324+
private List<TopicPartition> getTopicPartitions(Map<String, Integer> partitionsPerTopic) {
325+
List<TopicPartition> allPartitions = new ArrayList<>(
326+
partitionsPerTopic.values().stream().reduce(0, Integer::sum));
327+
328+
List<String> allTopics = new ArrayList<>(partitionsPerTopic.keySet());
329+
// sort all topics first, then we can have sorted all topic partitions by adding partitions starting from 0
330+
Collections.sort(allTopics);
331+
332+
for (String topic: allTopics) {
333+
int partitionCount = partitionsPerTopic.get(topic);
334+
for (int i = 0; i < partitionCount; ++i) {
276335
allPartitions.add(new TopicPartition(topic, i));
277336
}
278337
}

0 commit comments

Comments
 (0)