Skip to content

Commit

Permalink
don't wait if the first request will be cancelled
Browse files Browse the repository at this point in the history
  • Loading branch information
sauyon committed Aug 30, 2023
1 parent f503a68 commit 95fd8f6
Showing 1 changed file with 18 additions and 4 deletions.
22 changes: 18 additions & 4 deletions src/bentoml/_internal/marshal/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,10 @@ async def controller(self):
a = self.optimizer.o_a
b = self.optimizer.o_b

if n > 1 and (w0 + a * n + b) >= self.max_latency_in_ms:
# the estimated latency of the first request if we began processing now
latency_0 = w0 + a * n + b

if n > 1 and latency_0 >= self.max_latency_in_ms:
self._queue.popleft()[2].cancel()
continue
if self._sema.is_locked():
Expand All @@ -363,12 +366,23 @@ async def controller(self):
continue
await asyncio.sleep(self.tick_interval)
continue
if (

# we are now free to dispatch whenever we like
while (
# if we don't already have enough requests,
n < self.max_batch_size
and n * (wn + dt + (a or 0)) <= self.optimizer.wait * decay
# we are not about to cancel the first request,
and latency_0 + dt <= self.max_latency_in_ms * 0.95
# and waiting will cause average latency to decrese
and n * (wn + dt + a) <= self.optimizer.wait * decay
):
n = len(self._queue)
now = time.time()
wn = now - self._queue[-1][0]
latency_0 = w0 + a * n + b

# wait for additional requests to arrive
await asyncio.sleep(self.tick_interval)
continue

n_call_out = min(self.max_batch_size, n)
# call
Expand Down

0 comments on commit 95fd8f6

Please sign in to comment.