Skip to content

Commit

Permalink
don't wait if the first request will be cancelled
Browse files Browse the repository at this point in the history
  • Loading branch information
sauyon committed Aug 30, 2023
1 parent 7a83d99 commit 808758a
Showing 1 changed file with 14 additions and 3 deletions.
17 changes: 14 additions & 3 deletions src/bentoml/_internal/marshal/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,12 +318,23 @@ async def controller(self):
continue
await asyncio.sleep(self.tick_interval)
continue
if (

# we are now free to dispatch whenever we like
while (
# if we don't already have enough requests,
n < self.max_batch_size
and n * (wn + dt + (a or 0)) <= self.optimizer.wait * decay
# we are not about to cancel the first request,
and latency_0 + dt <= self.max_latency_in_ms * 0.95
# and waiting will cause average latency to decrese
and n * (wn + dt + a) <= self.optimizer.wait * decay
):
n = len(self._queue)
now = time.time()
wn = now - self._queue[-1][0]
latency_0 = w0 + a * n + b

# wait for additional requests to arrive
await asyncio.sleep(self.tick_interval)
continue

if self.max_batch_size == -1: # batching is disabled
n_call_out = 1
Expand Down

0 comments on commit 808758a

Please sign in to comment.