Skip to content

Commit 1ead0ff

Browse files
fix: Rename index fields to chunk_index and tool_call_index for clarity
1 parent 41b7ed4 commit 1ead0ff

File tree

10 files changed

+207
-254
lines changed

10 files changed

+207
-254
lines changed

haystack/components/generators/chat/openai.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -541,8 +541,8 @@ def _convert_chat_completion_chunk_to_streaming_chunk(
541541
return StreamingChunk(
542542
content="",
543543
component_info=component_info,
544-
# Index is None since it's only set to an int when a content block is present
545-
index=None,
544+
# chunk_index is None since it's only set to an int when a content block is present
545+
chunk_index=None,
546546
finish_reason=None,
547547
meta={
548548
"model": chunk.model,
@@ -560,7 +560,7 @@ def _convert_chat_completion_chunk_to_streaming_chunk(
560560
function = tool_call.function
561561
tool_calls_deltas.append(
562562
ToolCallDelta(
563-
index=tool_call.index,
563+
tool_call_index=tool_call.index,
564564
id=tool_call.id,
565565
tool_name=function.name if function else None,
566566
arguments=function.arguments if function and function.arguments else None,
@@ -569,14 +569,13 @@ def _convert_chat_completion_chunk_to_streaming_chunk(
569569
chunk_message = StreamingChunk(
570570
content=choice.delta.content or "",
571571
component_info=component_info,
572-
# We adopt the first tool_calls_deltas.index as the overall index of the chunk.
573-
index=tool_calls_deltas[0].index,
572+
# We adopt the first tool_calls_deltas.tool_call_index as the overall chunk_index of the chunk.
573+
chunk_index=tool_calls_deltas[0].tool_call_index,
574574
tool_calls=tool_calls_deltas,
575575
start=tool_calls_deltas[0].tool_name is not None,
576576
finish_reason=finish_reason_mapping.get(choice.finish_reason) if choice.finish_reason else None,
577577
meta={
578578
"model": chunk.model,
579-
"index": choice.index,
580579
"tool_calls": choice.delta.tool_calls,
581580
"finish_reason": choice.finish_reason,
582581
"received_at": datetime.now().isoformat(),
@@ -598,14 +597,13 @@ def _convert_chat_completion_chunk_to_streaming_chunk(
598597
chunk_message = StreamingChunk(
599598
content=choice.delta.content or "",
600599
component_info=component_info,
601-
index=resolved_index,
600+
chunk_index=resolved_index,
602601
# The first chunk is always a start message chunk that only contains role information, so if we reach here
603602
# and previous_chunks is length 1 then this is the start of text content.
604603
start=len(previous_chunks) == 1,
605604
finish_reason=finish_reason_mapping.get(choice.finish_reason) if choice.finish_reason else None,
606605
meta={
607606
"model": chunk.model,
608-
"index": choice.index,
609607
"tool_calls": choice.delta.tool_calls,
610608
"finish_reason": choice.finish_reason,
611609
"received_at": datetime.now().isoformat(),

haystack/components/generators/utils.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def print_streaming_chunk(chunk: StreamingChunk) -> None:
2525
:param chunk: A chunk of streaming data containing content and optional metadata, such as tool calls and
2626
tool results.
2727
"""
28-
if chunk.start and chunk.index and chunk.index > 0:
28+
if chunk.start and chunk.chunk_index and chunk.chunk_index > 0:
2929
# If this is the start of a new content block but not the first content block, print two new lines
3030
print("\n\n", flush=True, end="")
3131

@@ -40,7 +40,7 @@ def print_streaming_chunk(chunk: StreamingChunk) -> None:
4040
# If there is more than one tool call in the chunk, we print two new lines to separate them
4141
# We know there is more than one tool call if the index of the tool call is greater than the index of
4242
# the chunk.
43-
if chunk.index and tool_call.index > chunk.index:
43+
if chunk.chunk_index and tool_call.tool_call_index > chunk.chunk_index:
4444
print("\n\n", flush=True, end="")
4545

4646
print(f"[TOOL CALL]\nTool: {tool_call.tool_name} \nArguments: ", flush=True, end="")
@@ -86,17 +86,17 @@ def _convert_streaming_chunks_to_chat_message(chunks: list[StreamingChunk]) -> C
8686
for tool_call in chunk.tool_calls:
8787
# We use the index of the tool_call to track the tool call across chunks since the ID is not always
8888
# provided
89-
if tool_call.index not in tool_call_data:
90-
tool_call_data[tool_call.index] = {"id": "", "name": "", "arguments": ""}
89+
if tool_call.tool_call_index not in tool_call_data:
90+
tool_call_data[tool_call.tool_call_index] = {"id": "", "name": "", "arguments": ""}
9191

9292
# Save the ID if present
9393
if tool_call.id is not None:
94-
tool_call_data[tool_call.index]["id"] = tool_call.id
94+
tool_call_data[tool_call.tool_call_index]["id"] = tool_call.id
9595

9696
if tool_call.tool_name is not None:
97-
tool_call_data[tool_call.index]["name"] += tool_call.tool_name
97+
tool_call_data[tool_call.tool_call_index]["name"] += tool_call.tool_name
9898
if tool_call.arguments is not None:
99-
tool_call_data[tool_call.index]["arguments"] += tool_call.arguments
99+
tool_call_data[tool_call.tool_call_index]["arguments"] += tool_call.arguments
100100

101101
# Convert accumulated tool call data into ToolCall objects
102102
sorted_keys = sorted(tool_call_data.keys())
@@ -119,9 +119,16 @@ def _convert_streaming_chunks_to_chat_message(chunks: list[StreamingChunk]) -> C
119119
finish_reasons = [chunk.finish_reason for chunk in chunks if chunk.finish_reason]
120120
finish_reason = finish_reasons[-1] if finish_reasons else None
121121

122+
# Find the first chunk with a meaningful chunk_index (not None)
123+
first_meaningful_chunk = None
124+
for chunk in chunks:
125+
if chunk.chunk_index is not None:
126+
first_meaningful_chunk = chunk
127+
break
128+
122129
meta = {
123130
"model": chunks[-1].meta.get("model"),
124-
"index": 0,
131+
"chunk_index": first_meaningful_chunk.chunk_index if first_meaningful_chunk else None,
125132
"finish_reason": finish_reason,
126133
"completion_start_time": chunks[0].meta.get("received_at"), # first chunk received
127134
"usage": chunks[-1].meta.get("usage"), # last chunk has the final usage data if available

haystack/dataclasses/streaming_chunk.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ class ToolCallDelta:
1919
"""
2020
Represents a Tool call prepared by the model, usually contained in an assistant message.
2121
22-
:param index: The index of the Tool call in the list of Tool calls.
22+
:param tool_call_index: The index of the Tool call in the list of Tool calls.
2323
:param tool_name: The name of the Tool to call.
2424
:param arguments: Either the full arguments in JSON format or a delta of the arguments.
2525
:param id: The ID of the Tool call.
2626
"""
2727

28-
index: int
28+
tool_call_index: int
2929
tool_name: Optional[str] = field(default=None)
3030
arguments: Optional[str] = field(default=None)
3131
id: Optional[str] = field(default=None) # noqa: A003
@@ -34,7 +34,7 @@ def to_dict(self) -> dict[str, Any]:
3434
"""
3535
Returns a dictionary representation of the ToolCallDelta.
3636
37-
:returns: A dictionary with keys 'index', 'tool_name', 'arguments', and 'id'.
37+
:returns: A dictionary with keys 'tool_call_index', 'tool_name', 'arguments', and 'id'.
3838
"""
3939
return asdict(self)
4040

@@ -46,6 +46,9 @@ def from_dict(cls, data: dict[str, Any]) -> "ToolCallDelta":
4646
:param data: Dictionary containing ToolCallDelta's attributes.
4747
:returns: A ToolCallDelta instance.
4848
"""
49+
# Handle backward compatibility for old 'index' field
50+
if 'index' in data and 'tool_call_index' not in data:
51+
data['tool_call_index'] = data.pop('index')
4952
return ToolCallDelta(**data)
5053

5154

@@ -106,7 +109,7 @@ class StreamingChunk:
106109
:param meta: A dictionary containing metadata related to the message chunk.
107110
:param component_info: A `ComponentInfo` object containing information about the component that generated the chunk,
108111
such as the component name and type.
109-
:param index: An optional integer index representing which content block this chunk belongs to.
112+
:param chunk_index: An optional integer index representing which content block this chunk belongs to.
110113
:param tool_calls: An optional list of ToolCallDelta object representing a tool call associated with the message
111114
chunk.
112115
:param tool_call_result: An optional ToolCallResult object representing the result of a tool call.
@@ -119,7 +122,7 @@ class StreamingChunk:
119122
content: str
120123
meta: dict[str, Any] = field(default_factory=dict, hash=False)
121124
component_info: Optional[ComponentInfo] = field(default=None)
122-
index: Optional[int] = field(default=None)
125+
chunk_index: Optional[int] = field(default=None)
123126
tool_calls: Optional[list[ToolCallDelta]] = field(default=None)
124127
tool_call_result: Optional[ToolCallResult] = field(default=None)
125128
start: bool = field(default=False)
@@ -135,8 +138,8 @@ def __post_init__(self):
135138
)
136139

137140
# NOTE: We don't enforce this for self.content otherwise it would be a breaking change
138-
if (self.tool_calls or self.tool_call_result) and self.index is None:
139-
raise ValueError("If `tool_call`, or `tool_call_result` is set, `index` must also be set.")
141+
if (self.tool_calls or self.tool_call_result) and self.chunk_index is None:
142+
raise ValueError("If `tool_call`, or `tool_call_result` is set, `chunk_index` must also be set.")
140143

141144
def to_dict(self) -> dict[str, Any]:
142145
"""
@@ -148,7 +151,7 @@ def to_dict(self) -> dict[str, Any]:
148151
"content": self.content,
149152
"meta": self.meta,
150153
"component_info": self.component_info.to_dict() if self.component_info else None,
151-
"index": self.index,
154+
"chunk_index": self.chunk_index,
152155
"tool_calls": [tc.to_dict() for tc in self.tool_calls] if self.tool_calls else None,
153156
"tool_call_result": self.tool_call_result.to_dict() if self.tool_call_result else None,
154157
"start": self.start,
@@ -166,11 +169,15 @@ def from_dict(cls, data: dict[str, Any]) -> "StreamingChunk":
166169
if "content" not in data:
167170
raise ValueError("Missing required field `content` in StreamingChunk deserialization.")
168171

172+
# Handle backward compatibility for old 'index' field
173+
if 'index' in data and 'chunk_index' not in data:
174+
data['chunk_index'] = data.pop('index')
175+
169176
return StreamingChunk(
170177
content=data["content"],
171178
meta=data.get("meta", {}),
172179
component_info=ComponentInfo.from_dict(data["component_info"]) if data.get("component_info") else None,
173-
index=data.get("index"),
180+
chunk_index=data.get("chunk_index"),
174181
tool_calls=[ToolCallDelta.from_dict(tc) for tc in data["tool_calls"]] if data.get("tool_calls") else None,
175182
tool_call_result=ToolCallResult.from_dict(data["tool_call_result"])
176183
if data.get("tool_call_result")

test/components/builders/test_answer_builder.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def test_run_with_chat_message_replies_without_pattern(self):
186186

187187
message_meta = {
188188
"model": "gpt-4o-mini",
189-
"index": 0,
189+
"chunk_index": 0,
190190
"finish_reason": "stop",
191191
"usage": {"prompt_tokens": 32, "completion_tokens": 153, "total_tokens": 185},
192192
}
@@ -210,7 +210,7 @@ def test_run_with_chat_message_replies_with_pattern(self):
210210

211211
message_meta = {
212212
"model": "gpt-4o-mini",
213-
"index": 0,
213+
"chunk_index": 0,
214214
"finish_reason": "stop",
215215
"usage": {"prompt_tokens": 32, "completion_tokens": 153, "total_tokens": 185},
216216
}
@@ -232,7 +232,7 @@ def test_run_with_chat_message_replies_with_documents(self):
232232
component = AnswerBuilder(reference_pattern="\\[(\\d+)\\]")
233233
message_meta = {
234234
"model": "gpt-4o-mini",
235-
"index": 0,
235+
"chunk_index": 0,
236236
"finish_reason": "stop",
237237
"usage": {"prompt_tokens": 32, "completion_tokens": 153, "total_tokens": 185},
238238
}
@@ -263,7 +263,7 @@ def test_run_with_chat_message_replies_with_pattern_set_at_runtime(self):
263263
component = AnswerBuilder(pattern="unused pattern")
264264
message_meta = {
265265
"model": "gpt-4o-mini",
266-
"index": 0,
266+
"chunk_index": 0,
267267
"finish_reason": "stop",
268268
"usage": {"prompt_tokens": 32, "completion_tokens": 153, "total_tokens": 185},
269269
}
@@ -286,7 +286,7 @@ def test_run_with_chat_message_replies_with_meta_set_at_run_time(self):
286286
component = AnswerBuilder()
287287
message_meta = {
288288
"model": "gpt-4o-mini",
289-
"index": 0,
289+
"chunk_index": 0,
290290
"finish_reason": "stop",
291291
"usage": {"prompt_tokens": 32, "completion_tokens": 153, "total_tokens": 185},
292292
}
@@ -300,7 +300,7 @@ def test_run_with_chat_message_replies_with_meta_set_at_run_time(self):
300300
# Check metadata excluding all_messages
301301
expected_meta = {
302302
"model": "gpt-4o-mini",
303-
"index": 0,
303+
"chunk_index": 0,
304304
"finish_reason": "stop",
305305
"usage": {"prompt_tokens": 32, "completion_tokens": 153, "total_tokens": 185},
306306
"test": "meta",

test/components/generators/chat/test_hugging_face_api.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,7 @@ def test_run_with_tools(self, mock_check_valid_model, tools):
588588
assert response["replies"][0].tool_calls[0].id == "0"
589589
assert response["replies"][0].meta == {
590590
"finish_reason": "tool_calls",
591-
"index": 0,
591+
"chunk_index": 0,
592592
"model": "meta-llama/Llama-3.1-70B-Instruct",
593593
"usage": {"completion_tokens": 30, "prompt_tokens": 426},
594594
}
@@ -675,14 +675,12 @@ def test_convert_hfapi_tool_calls_invalid_type_arguments(self):
675675
model="microsoft/Phi-3.5-mini-instruct",
676676
system_fingerprint="3.2.1-sha-4d28897",
677677
),
678-
StreamingChunk(
679-
content=" Paris",
678+
StreamingChunk(content=" Paris",
680679
meta={
681680
"received_at": "2025-05-27T12:14:28.228852",
682681
"model": "microsoft/Phi-3.5-mini-instruct",
683682
"finish_reason": None,
684-
},
685-
index=0,
683+
},chunk_index=0,
686684
start=True,
687685
),
688686
[],
@@ -1041,7 +1039,7 @@ async def test_run_async_with_tools(self, tools, mock_check_valid_model):
10411039
assert response["replies"][0].tool_calls[0].id == "0"
10421040
assert response["replies"][0].meta == {
10431041
"finish_reason": "tool_calls",
1044-
"index": 0,
1042+
"chunk_index": 0,
10451043
"model": "meta-llama/Llama-3.1-70B-Instruct",
10461044
"usage": {"completion_tokens": 30, "prompt_tokens": 426},
10471045
}

0 commit comments

Comments
 (0)