-
-
Notifications
You must be signed in to change notification settings - Fork 259
Client rack support #1159
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Client rack support #1159
Changes from 21 commits
1da0a73
f1a0889
c7c7edc
723f3f5
713e529
bdae9c2
b91e175
5431109
44fc867
8434290
5a03ae4
6da902f
31dc873
c2dee2d
ff8a672
a75ef5d
ffc9ef6
d7a01d9
f02b242
6bc83f9
cd2793c
624ba88
3484400
e33fa60
5e2148c
f6175b6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| Add rack-aware fetching from the closest in-sync replica (KIP-392) via the new ``client_rack`` option on :class:`AIOKafkaConsumer`. When set and the brokers support ``FetchRequest v11`` (Kafka 2.4+) with a ``replica.selector.class`` configured, the consumer will fetch from a same-rack follower instead of the partition leader, reducing cross-AZ traffic and tail latency. | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -495,15 +495,17 @@ class FetchRequest_v11(RequestStruct): | |
|
|
||
|
|
||
| FetchRequestStruct: TypeAlias = ( | ||
| FetchRequest_v1 | FetchRequest_v2 | FetchRequest_v3 | FetchRequest_v4 | ||
| # After v4 is not implemented yet | ||
| # | FetchRequest_v5 | ||
| # | FetchRequest_v6 | ||
| # | FetchRequest_v7 | ||
| # | FetchRequest_v8 | ||
| # | FetchRequest_v9 | ||
| # | FetchRequest_v10 | ||
| # | FetchRequest_v11 | ||
| FetchRequest_v1 | ||
| | FetchRequest_v2 | ||
| | FetchRequest_v3 | ||
| | FetchRequest_v4 | ||
| | FetchRequest_v5 | ||
| | FetchRequest_v6 | ||
| | FetchRequest_v7 | ||
| | FetchRequest_v8 | ||
| | FetchRequest_v9 | ||
| | FetchRequest_v10 | ||
| | FetchRequest_v11 | ||
| ) | ||
|
|
||
|
|
||
|
|
@@ -517,12 +519,14 @@ def __init__( | |
| max_bytes: int, | ||
| isolation_level: int, | ||
| topics: list[tuple[str, list[tuple[int, int, int]]]], | ||
| rack_id: str = "", | ||
| ): | ||
| self._max_wait_ms = max_wait_time | ||
| self._min_bytes = min_bytes | ||
| self._max_bytes = max_bytes | ||
| self._isolation_level = isolation_level | ||
| self._topics = topics | ||
| self._rack_id = rack_id | ||
|
|
||
| @property | ||
| def topics(self) -> list[tuple[str, list[tuple[int, int, int]]]]: | ||
|
|
@@ -531,7 +535,18 @@ def topics(self) -> list[tuple[str, list[tuple[int, int, int]]]]: | |
| def build( | ||
| self, request_struct_class: type[FetchRequestStruct] | ||
| ) -> FetchRequestStruct: | ||
| if request_struct_class.API_VERSION > 3: | ||
| api_version = request_struct_class.API_VERSION | ||
|
|
||
| # v0..v3 do not support isolation_level. v4 adds isolation_level | ||
| # but keeps the v0 per-partition layout. v5+ adds per-partition | ||
| # `log_start_offset`. v9+ also adds `current_leader_epoch`. v7+ | ||
| # adds incremental fetch session fields and forgotten_topics_data. | ||
| # v11 adds top-level rack_id. | ||
| # We silently allow `rack_id` to be set on the FetchRequest builder | ||
| # so callers don't have to branch -- it is simply not transmitted on | ||
| # versions < 11. | ||
|
|
||
| if api_version == 4: | ||
| return request_struct_class( | ||
| -1, # replica_id | ||
| self._max_wait_ms, | ||
|
|
@@ -541,12 +556,63 @@ def build( | |
| self._topics, | ||
| ) | ||
|
|
||
| if api_version >= 5: | ||
| # v5+ adds per-partition `log_start_offset`. v9+ also adds | ||
| # `current_leader_epoch`. v7+ adds incremental fetch session | ||
| # fields and forgotten_topics_data. v11 adds top-level rack_id. | ||
| include_leader_epoch = api_version >= 9 | ||
| partitions_by_topic: list[tuple[str, list[tuple[int, ...]]]] = [] | ||
| for topic, partitions in self._topics: | ||
| new_partitions: list[tuple[int, ...]] = [] | ||
| for partition, offset, max_bytes in partitions: | ||
| if include_leader_epoch: | ||
| new_partitions.append( | ||
| ( | ||
| partition, | ||
| -1, # current_leader_epoch (unknown) | ||
|
ods marked this conversation as resolved.
|
||
| offset, # fetch_offset | ||
| -1, # log_start_offset (consumer) | ||
| max_bytes, | ||
| ) | ||
| ) | ||
| else: | ||
| new_partitions.append( | ||
| ( | ||
| partition, | ||
| offset, # fetch_offset | ||
| -1, # log_start_offset (consumer) | ||
| max_bytes, | ||
| ) | ||
| ) | ||
| partitions_by_topic.append((topic, new_partitions)) | ||
|
|
||
| args: list[object] = [ | ||
| -1, # replica_id | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am a bit unsure if we should still send -1 when consuming from a follower, but it seems to be valid when I look to the java client code https://github.com/apache/kafka/blob/trunk/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractFetch.java#L317 I am just a bit confused by this sentence in the KIP
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, the Java client does the same. -1 means "I'm a consumer" not "only fetch from leader". |
||
| self._max_wait_ms, | ||
| self._min_bytes, | ||
| self._max_bytes, | ||
| self._isolation_level, | ||
| ] | ||
| if api_version >= 7: | ||
| args.extend( | ||
| [ | ||
| 0, # session_id (no incremental fetch session) | ||
| -1, # session_epoch (FINAL_EPOCH) | ||
| ] | ||
| ) | ||
| args.append(partitions_by_topic) | ||
| if api_version >= 7: | ||
| args.append([]) # forgotten_topics_data | ||
| if api_version >= 11: | ||
| args.append(self._rack_id) | ||
| return request_struct_class(*args) | ||
|
|
||
| if self._isolation_level: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we raise a similar exception is someone is setting a rack_id, while the broker doesn't support v11 ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, silent fallback is what Java does. But worth a log warning. |
||
| raise IncompatibleBrokerVersion( | ||
| "isolation_level requires FetchRequest >= v4" | ||
| ) | ||
|
|
||
| if request_struct_class.API_VERSION == 3: | ||
| if api_version == 3: | ||
| return request_struct_class( | ||
| -1, # replica_id | ||
| self._max_wait_ms, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.