Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 37 additions & 10 deletions pubsub/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@
# limitations under the License.

from __future__ import absolute_import
from __future__ import division

import collections
import itertools
import logging
import math
import threading

from google.cloud.pubsub_v1 import types
Expand All @@ -34,6 +37,18 @@
"""The maximum amount of time in seconds to wait for additional request items
before processing the next batch of requests."""

_ACK_IDS_BATCH_SIZE = 2500
"""The maximum number of ACK IDs to send in a single StreamingPullRequest.

The backend imposes a maximum request size limit of 524288 bytes (512 KiB) per
acknowledge / modifyAckDeadline request. ACK IDs have a maximum size of 164
bytes, thus we cannot send more than o 524288/176 ~= 2979 ACK IDs in a single
StreamingPullRequest message.

Accounting for some overhead, we should thus only send a maximum of 2500 ACK
IDs at a time.
"""


class Dispatcher(object):
def __init__(self, manager, queue):
Expand Down Expand Up @@ -119,9 +134,16 @@ def ack(self, items):
if time_to_ack is not None:
self._manager.ack_histogram.add(time_to_ack)

ack_ids = [item.ack_id for item in items]
request = types.StreamingPullRequest(ack_ids=ack_ids)
self._manager.send(request)
# We must potentially split the request into multiple smaller requests
# to avoid the server-side max request size limit.
ack_ids = (item.ack_id for item in items)
total_chunks = int(math.ceil(len(items) / _ACK_IDS_BATCH_SIZE))

for _ in range(total_chunks):
request = types.StreamingPullRequest(
ack_ids=itertools.islice(ack_ids, _ACK_IDS_BATCH_SIZE)
)
self._manager.send(request)

# Remove the message from lease management.
self.drop(items)
Expand Down Expand Up @@ -150,13 +172,18 @@ def modify_ack_deadline(self, items):
Args:
items(Sequence[ModAckRequest]): The items to modify.
"""
ack_ids = [item.ack_id for item in items]
seconds = [item.seconds for item in items]

request = types.StreamingPullRequest(
modify_deadline_ack_ids=ack_ids, modify_deadline_seconds=seconds
)
self._manager.send(request)
# We must potentially split the request into multiple smaller requests
# to avoid the server-side max request size limit.
ack_ids = (item.ack_id for item in items)
seconds = (item.seconds for item in items)
total_chunks = int(math.ceil(len(items) / _ACK_IDS_BATCH_SIZE))

for _ in range(total_chunks):
request = types.StreamingPullRequest(
modify_deadline_ack_ids=itertools.islice(ack_ids, _ACK_IDS_BATCH_SIZE),
modify_deadline_seconds=itertools.islice(seconds, _ACK_IDS_BATCH_SIZE),
)
self._manager.send(request)

def nack(self, items):
"""Explicitly deny receipt of messages.
Expand Down
57 changes: 57 additions & 0 deletions pubsub/tests/unit/pubsub_v1/subscriber/test_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import collections
import threading

from google.cloud.pubsub_v1 import types
Expand Down Expand Up @@ -95,6 +96,34 @@ def test_ack_no_time():
manager.ack_histogram.add.assert_not_called()


def test_ack_splitting_large_payload():
manager = mock.create_autospec(
streaming_pull_manager.StreamingPullManager, instance=True
)
dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue)

items = [
# use realistic lengths for ACK IDs (max 176 bytes)
requests.AckRequest(ack_id=str(i).zfill(176), byte_size=0, time_to_ack=20)
for i in range(5001)
]
dispatcher_.ack(items)

calls = manager.send.call_args_list
assert len(calls) == 3

all_ack_ids = {item.ack_id for item in items}
sent_ack_ids = collections.Counter()

for call in calls:
message = call.args[0]
assert message.ByteSize() <= 524288 # server-side limit (2**19)
sent_ack_ids.update(message.ack_ids)

assert set(sent_ack_ids) == all_ack_ids # all messages should have been ACK-ed
assert sent_ack_ids.most_common(1)[0][1] == 1 # each message ACK-ed exactly once


def test_lease():
manager = mock.create_autospec(
streaming_pull_manager.StreamingPullManager, instance=True
Expand Down Expand Up @@ -153,6 +182,34 @@ def test_modify_ack_deadline():
)


def test_modify_ack_deadline_splitting_large_payload():
manager = mock.create_autospec(
streaming_pull_manager.StreamingPullManager, instance=True
)
dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue)

items = [
# use realistic lengths for ACK IDs (max 176 bytes)
requests.ModAckRequest(ack_id=str(i).zfill(176), seconds=60)
for i in range(5001)
]
dispatcher_.modify_ack_deadline(items)

calls = manager.send.call_args_list
assert len(calls) == 3

all_ack_ids = {item.ack_id for item in items}
sent_ack_ids = collections.Counter()

for call in calls:
message = call.args[0]
assert message.ByteSize() <= 524288 # server-side limit (2**19)
sent_ack_ids.update(message.modify_deadline_ack_ids)

assert set(sent_ack_ids) == all_ack_ids # all messages should have been MODACK-ed
assert sent_ack_ids.most_common(1)[0][1] == 1 # each message MODACK-ed exactly once


@mock.patch("threading.Thread", autospec=True)
def test_start(thread):
manager = mock.create_autospec(
Expand Down