Skip to content

Commit

Permalink
add message about deadlock and reserve some space in KV cache to miti…
Browse files Browse the repository at this point in the history
…gate the risk
  • Loading branch information
tohtana committed Nov 8, 2023
1 parent 83c4997 commit 5d60967
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions mii/batching/ragged_batching.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,14 @@ def _do_schedule_requests(self, requests: List[RaggedRequest]) -> None:
break

max_blocks = free_blocks - self.scheduled_req_blocks

# Check capacity to mitigate the deadlock risk
# We don't schedule requests when we find that a prompt is too long to fit to the KV cache
if len(r.input_tokens) > 1:
req_tokens, _ = self.inference_engine.query(r.uid, len(r.input_tokens), max_blocks)
if req_tokens < len(r.input_tokens):
break

req_tokens = min(len(r.input_tokens), max_batch_size)
req_tokens, req_blocks = self.inference_engine.query(r.uid, req_tokens, max_blocks)

Expand Down Expand Up @@ -528,6 +536,9 @@ def schedule_requests(self) -> None:
self._do_schedule_requests(next_token_gen_reqs)
self._do_schedule_requests(prompt_reqs)

if len(self.buffer) > 0 and len(self.scheduled_requests) == 0:
raise RuntimeError("Deadlock detected: No requests were scheduled.")

scheduled_requests_ids = set(id(r) for r in self.scheduled_requests)
self.buffer = deque(
[r for r in self.buffer if id(r) not in scheduled_requests_ids])
Expand Down

0 comments on commit 5d60967

Please sign in to comment.