From 616dec095ac3a63150cf25695b706e1ef3d5ce6c Mon Sep 17 00:00:00 2001
From: lievan <42917263+lievan@users.noreply.github.com>
Date: Mon, 27 Jan 2025 00:32:47 -0500
Subject: [PATCH] fix(llmobs): fix token extraction for chat completion streams
 (#12070)

Fixes token chunk extraction to account for the `choices` field in a
chunk being an empty list

#### Before
```
Error generating LLMObs span event for span <Span(id=16151817411339450163,trace_id=137677390470467884790869841527646927357,parent_id=None,name=openai.request)>, likely due to malformed span
Traceback (most recent call last):
  File "/XXXXX/ddtrace/contrib/internal/openai/utils.py", line 118, in __aiter__
    await self._extract_token_chunk(chunk)
  File "/XXXXX/ddtrace/contrib/internal/openai/utils.py", line 157, in _extract_token_chunk
    choice = getattr(chunk, "choices", [None])[0]
IndexError: list index out of range
```

#### After
Traced succesfully
<img width="904" alt="image"
src="https://github.com/user-attachments/assets/43c68edd-03f7-4105-a3d3-213eeb5fb0ab"
/>

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)

---------

Co-authored-by: lievan <evan.li@datadoqhq.com>
Co-authored-by: Yun Kim <35776586+Yun-Kim@users.noreply.github.com>
(cherry picked from commit 75179ef6425d0318efb1f6c8aa31e072bc620c61)
---
 ddtrace/contrib/internal/openai/utils.py               | 10 ++++++++--
 .../notes/fix-token-extraction-0133808742374ef4.yaml   |  4 ++++
 2 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 releasenotes/notes/fix-token-extraction-0133808742374ef4.yaml
diff --git a/ddtrace/contrib/internal/openai/utils.py b/ddtrace/contrib/internal/openai/utils.py
index f5dfc10efef..0217b1e61d2 100644
--- a/ddtrace/contrib/internal/openai/utils.py
+++ b/ddtrace/contrib/internal/openai/utils.py
@@ -89,7 +89,10 @@ def _extract_token_chunk(self, chunk):
         """Attempt to extract the token chunk (last chunk in the stream) from the streamed response."""
         if not self._dd_span._get_ctx_item("_dd.auto_extract_token_chunk"):
             return
-        choice = getattr(chunk, "choices", [None])[0]
+        choices = getattr(chunk, "choices")
+        if not choices:
+            return
+        choice = choices[0]
         if not getattr(choice, "finish_reason", None):
             # Only the second-last chunk in the stream with token usage enabled will have finish_reason set
             return
@@ -152,7 +155,10 @@ async def _extract_token_chunk(self, chunk):
         """Attempt to extract the token chunk (last chunk in the stream) from the streamed response."""
         if not self._dd_span._get_ctx_item("_dd.auto_extract_token_chunk"):
             return
-        choice = getattr(chunk, "choices", [None])[0]
+        choices = getattr(chunk, "choices")
+        if not choices:
+            return
+        choice = choices[0]
         if not getattr(choice, "finish_reason", None):
             return
         try:
diff --git a/releasenotes/notes/fix-token-extraction-0133808742374ef4.yaml b/releasenotes/notes/fix-token-extraction-0133808742374ef4.yaml
new file mode 100644
index 00000000000..cc8c1aa127b
--- /dev/null
+++ b/releasenotes/notes/fix-token-extraction-0133808742374ef4.yaml
@@ -0,0 +1,4 @@
+---
+fixes:
+  - |
+    LLM Observability: This fix resolves an issue where extracting token metadata from openai streamed chat completion token chunks caused an IndexError.