diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
index efe9629c3a..f0e066dab0 100644
--- a/pydantic_ai_slim/pydantic_ai/models/openai.py
+++ b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -55,6 +55,7 @@
 from . import Model, ModelRequestParameters, StreamedResponse, check_allow_model_requests, download_item, get_user_agent
 
 try:
+    import tiktoken
     from openai import NOT_GIVEN, APIConnectionError, APIStatusError, AsyncOpenAI, AsyncStream, Omit, omit
     from openai.types import AllModels, chat, responses
     from openai.types.chat import (
@@ -1063,6 +1064,35 @@ def _inline_text_file_part(text: str, *, media_type: str, identifier: str) -> Ch
         )
         return ChatCompletionContentPartTextParam(text=text, type='text')
 
+    async def count_tokens(
+        self,
+        messages: list[ModelMessage],
+        model_settings: ModelSettings | None,
+        model_request_parameters: ModelRequestParameters,
+    ) -> usage.RequestUsage:
+        """Count the number of tokens in the given messages."""
+        if self.system != 'openai':
+            raise NotImplementedError('Token counting is only supported for OpenAI system.')
+
+        try:
+            encoding = await _utils.run_in_executor(tiktoken.encoding_for_model, self.model_name)
+        except KeyError as e:
+            raise ValueError(
+                f'The model {self.model_name!r} is not supported by tiktoken',
+            ) from e
+
+        model_settings, model_request_parameters = self.prepare_request(model_settings, model_request_parameters)
+        openai_messages = await self._map_messages(messages, model_request_parameters)
+        message_token_count = await _num_tokens_from_messages(openai_messages, self.model_name, encoding)
+
+        # Count tokens for tools/functions
+        tools = self._get_tools(model_request_parameters)
+        tools_token_count = await _num_tokens_for_tools(tools, self.model_name, encoding)
+
+        return usage.RequestUsage(
+            input_tokens=message_token_count + tools_token_count,
+        )
+
 
 @deprecated(
     '`OpenAIModel` was renamed to `OpenAIChatModel` to clearly distinguish it from `OpenAIResponsesModel` which '
@@ -1908,6 +1938,36 @@ async def _map_user_prompt(part: UserPromptPart) -> responses.EasyInputMessagePa
                     assert_never(item)
         return responses.EasyInputMessageParam(role='user', content=content)
 
+    async def count_tokens(
+        self,
+        messages: list[ModelMessage],
+        model_settings: ModelSettings | None,
+        model_request_parameters: ModelRequestParameters,
+    ) -> usage.RequestUsage:
+        """Count the number of tokens in the given messages."""
+        if self.system != 'openai':
+            raise NotImplementedError('Token counting is only supported for OpenAI system.')
+
+        try:
+            encoding = await _utils.run_in_executor(tiktoken.encoding_for_model, self.model_name)
+        except KeyError as e:
+            raise ValueError(
+                f'The model {self.model_name!r} is not supported by tiktoken',
+            ) from e
+
+        model_settings, model_request_parameters = self.prepare_request(model_settings, model_request_parameters)
+        _, openai_messages = await self._map_messages(
+            messages, cast(OpenAIResponsesModelSettings, model_settings or {}), model_request_parameters
+        )
+        message_token_count = await _num_tokens_from_messages(openai_messages, self.model_name, encoding)
+
+        # Count tokens for tools/functions
+        tools = self._get_tools(model_request_parameters)
+        tools_token_count = await _num_tokens_for_tools(tools, self.model_name, encoding)
+        return usage.RequestUsage(
+            input_tokens=message_token_count + tools_token_count,
+        )
+
 
 @dataclass
 class OpenAIStreamedResponse(StreamedResponse):
@@ -2680,3 +2740,160 @@ def _map_mcp_call(
             provider_name=provider_name,
         ),
     )
+
+
+async def _num_tokens_from_messages(  # noqa: C901
+    messages: list[chat.ChatCompletionMessageParam] | list[responses.ResponseInputItemParam],
+    model: OpenAIModelName,
+    encoding: tiktoken.Encoding,
+) -> int:
+    """Return the number of tokens used by a list of messages."""
+    if 'gpt-5' in model:
+        tokens_per_message = 3
+        tokens_per_name = 1
+        final_primer = 2  # "reverse engineered" based on test cases
+    else:
+        # Adapted from https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken#6-counting-tokens-for-chat-completions-api-calls
+        tokens_per_message = 3
+        tokens_per_name = 1
+        final_primer = 3  # every reply is primed with <|start|>assistant<|message|>
+
+    num_tokens = 0
+    for message in messages:
+        num_tokens += tokens_per_message
+        for key, value in message.items():
+            if (key == 'content' or key == 'role') and isinstance(value, str):
+                num_tokens += len(encoding.encode(value))
+            elif key == 'tool_calls' and isinstance(value, list):
+                # Chat Completions API: list of ChatCompletionToolCallParam
+                # Responses API: list of ResponseFunctionToolCallParam
+                for tool_call in value:  # pyright: ignore[reportUnknownVariableType]
+                    if isinstance(tool_call, dict):
+                        # Both ChatCompletionToolCallParam and ResponseFunctionToolCallParam have 'function' field
+                        num_tokens += 3
+                        num_tokens += len(encoding.encode(tool_call.get('type', '')))  # pyright: ignore[reportUnknownMemberType,reportUnknownArgumentType]
+                        function = tool_call.get('function', {})  # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
+                        if function and isinstance(function, dict):
+                            # Both have 'name' and 'arguments' fields
+                            num_tokens += 1
+                            if 'name' in function and isinstance(function['name'], str):
+                                num_tokens += len(encoding.encode(function['name'])) * 2
+                            if 'arguments' in function and isinstance(function['arguments'], str):
+                                num_tokens += 4
+                                num_tokens += len(encoding.encode(function['arguments']))
+                if len(value) > 1:  # pyright: ignore[reportUnknownArgumentType]
+                    num_tokens += 15
+                    num_tokens -= len(value) * 5 - 6  # pyright: ignore[reportUnknownArgumentType]
+                else:
+                    num_tokens -= 2
+            elif key == 'name':
+                num_tokens += tokens_per_name
+            elif key == 'content' and isinstance(value, list):
+                # Handle list content (multimodal messages)
+                # Chat Completions API: list of ChatCompletionContentPartParam
+                # Responses API: list of ResponseInputContentParam
+                for content_part in value:  # pyright: ignore[reportUnknownVariableType]
+                    if isinstance(content_part, dict):
+                        # ChatCompletionContentPartTextParam has 'text' field
+                        # ResponseInputTextParam has 'text' field
+                        if 'text' in content_part and isinstance(content_part['text'], str):
+                            num_tokens += len(encoding.encode(content_part['text']))
+                        # Note: Images, audio, files are not tokenized as text
+                        # They have their own token costs handled by the API
+
+    num_tokens += final_primer
+    return num_tokens
+
+
+async def _num_tokens_for_tools(
+    tools: list[chat.ChatCompletionToolParam] | list[responses.FunctionToolParam],
+    model: OpenAIModelName,
+    encoding: tiktoken.Encoding,
+) -> int:
+    """Return the number of tokens used by a list of tools.
+
+    Based on the OpenAI token counting approach for function calling.
+    Supports both Chat Completions API tools (ChatCompletionToolParam) and
+    Responses API tools (FunctionToolParam).
+    """
+    # Initialize function settings to 0
+    func_init = 0
+    prop_init = 0
+    prop_key = 0
+    enum_init = 0
+    enum_item = 0
+    func_end = 0
+
+    if 'gpt-4o' in model or 'gpt-4o-mini' in model:
+        # Set function settings for gpt-4o models
+        func_init = 7
+        prop_init = 3
+        prop_key = 3
+        enum_init = -3
+        enum_item = 3
+        func_end = 12
+    elif 'gpt-3.5-turbo' in model or 'gpt-4' in model:
+        # Set function settings for gpt-3.5-turbo and gpt-4 models
+        func_init = 10
+        prop_init = 3
+        prop_key = 3
+        enum_init = -3
+        enum_item = 3
+        func_end = 12
+    else:
+        # Default to gpt-4o settings for unknown models
+        func_init = 7
+        prop_init = 3
+        prop_key = 3
+        enum_init = -3
+        enum_item = 3
+        func_end = 12
+
+    func_token_count = 0
+    if len(tools) > 0:
+        for tool in tools:
+            func_token_count += func_init  # Add tokens for start of each function
+
+            # Handle both ChatCompletionToolParam and FunctionToolParam structures
+            # ChatCompletionToolParam: {'type': 'function', 'function': {'name': ..., 'description': ..., 'parameters': ...}}
+            # FunctionToolParam: {'type': 'function', 'name': ..., 'description': ..., 'parameters': ...}
+            if 'function' in tool:
+                # ChatCompletionToolParam format
+                function = tool['function']
+                f_name = str(function.get('name', ''))
+                f_desc = str(function.get('description', '') or '')
+                parameters = function.get('parameters')
+            else:
+                # FunctionToolParam format (Responses API)
+                f_name = str(tool.get('name', ''))
+                f_desc = str(tool.get('description', '') or '')
+                parameters = tool.get('parameters')
+
+            if f_desc.endswith('.'):
+                f_desc = f_desc[:-1]
+            line = f'{f_name}:{f_desc}'
+            func_token_count += len(encoding.encode(line))  # Add tokens for function name and description
+
+            if parameters and isinstance(parameters, dict):
+                properties_raw = parameters.get('properties', {})
+                if properties_raw and isinstance(properties_raw, dict) and len(properties_raw) > 0:  # pyright: ignore[reportUnknownArgumentType]
+                    func_token_count += prop_init  # Add tokens for start of properties
+                    for key, prop_value in properties_raw.items():  # pyright: ignore[reportUnknownVariableType]
+                        if not isinstance(prop_value, dict):
+                            continue
+                        func_token_count += prop_key  # Add tokens for each property
+                        p_name = str(key)  # pyright: ignore[reportUnknownArgumentType]
+                        p_type = str(prop_value.get('type', '') or '')  # pyright: ignore[reportUnknownMemberType,reportUnknownArgumentType]
+                        p_desc = str(prop_value.get('description', '') or '')  # pyright: ignore[reportUnknownMemberType,reportUnknownArgumentType]
+                        if 'enum' in prop_value:
+                            func_token_count += enum_init  # Add tokens if property has enum list
+                            for item in prop_value['enum']:  # pyright: ignore[reportUnknownVariableType]
+                                func_token_count += enum_item
+                                func_token_count += len(encoding.encode(str(item)))  # pyright: ignore[reportUnknownArgumentType]
+                        if p_desc.endswith('.'):
+                            p_desc = p_desc[:-1]
+                        line = f'{p_name}:{p_type}:{p_desc}'
+                        func_token_count += len(encoding.encode(line))
+        func_token_count += func_end
+
+    return func_token_count
diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py
index 586af8dcfc..855b96fed8 100644
--- a/pydantic_ai_slim/pydantic_ai/usage.py
+++ b/pydantic_ai_slim/pydantic_ai/usage.py
@@ -267,8 +267,17 @@ class UsageLimits:
     """The maximum number of tokens allowed in requests and responses combined."""
     count_tokens_before_request: bool = False
     """If True, perform a token counting pass before sending the request to the model,
-    to enforce `request_tokens_limit` ahead of time. This may incur additional overhead
-    (from calling the model's `count_tokens` API before making the actual request) and is disabled by default."""
+    to enforce `input_tokens_limit` ahead of time. This may incur additional overhead
+    (from calling the model's `count_tokens` method before making the actual request) and is disabled by default.
+
+    Supported by:
+
+    - [`OpenAIChatModel`][pydantic_ai.models.openai.OpenAIChatModel] and
+    [`OpenAIResponsesModel`][pydantic_ai.models.openai.OpenAIResponsesModel] (only for OpenAI models)
+    - [`AnthropicModel`][pydantic_ai.models.anthropic.AnthropicModel] (excluding Bedrock client)
+    - [`GoogleModel`][pydantic_ai.models.google.GoogleModel]
+    - [`BedrockModel`][pydantic_ai.models.bedrock.BedrockModel] (including Anthropic models)
+    """
 
     @property
     @deprecated('`request_tokens_limit` is deprecated, use `input_tokens_limit` instead')
diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml
index efa12fca85..311a90bba4 100644
--- a/pydantic_ai_slim/pyproject.toml
+++ b/pydantic_ai_slim/pyproject.toml
@@ -67,7 +67,7 @@ dependencies = [
 # WARNING if you add optional groups, please update docs/install.md
 logfire = ["logfire[httpx]>=3.14.1"]
 # Models
-openai = ["openai>=2.11.0"]
+openai = ["openai>=2.11.0","tiktoken>=0.12.0"]
 cohere = ["cohere>=5.18.0; platform_system != 'Emscripten'"]
 vertexai = ["google-auth>=2.36.0", "requests>=2.32.2"]
 google = ["google-genai>=1.51.0"]
diff --git a/tests/conftest.py b/tests/conftest.py
index 32b4a475cc..59506d6060 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -151,6 +151,35 @@ def allow_model_requests():
         yield
 
 
+@pytest.fixture
+def mock_tiktoken_encoding(monkeypatch: pytest.MonkeyPatch):
+    """Mock `tiktoken.encoding_for_model` to avoid downloading encoding files.
+
+    The fake encoder estimates token counts by assuming roughly 4 characters per token,
+    which is sufficient for tests that only need consistent, deterministic counts.
+    """
+
+    try:
+        import tiktoken
+    except ImportError:  # pragma: no cover - tiktoken might not be installed
+        yield
+        return
+
+    class _FakeEncoding:
+        @staticmethod
+        def encode(text: object) -> list[int]:
+            # Convert to string and estimate tokens as ceil(len/4), minimum 1.
+            s = '' if text is None else str(text)
+            approx_tokens = max(1, (len(s)) // 4)
+            return list(range(approx_tokens))
+
+    def _fake_encoding_for_model(_model_name: str) -> _FakeEncoding:  # pragma: no cover - trivial
+        return _FakeEncoding()
+
+    monkeypatch.setattr(tiktoken, 'encoding_for_model', _fake_encoding_for_model)
+    yield
+
+
 @pytest.fixture
 async def client_with_handler() -> AsyncIterator[ClientWithHandler]:
     client: httpx.AsyncClient | None = None
diff --git a/tests/models/cassettes/test_openai_counting/test_count_tokens_all_model_request_parts.yaml b/tests/models/cassettes/test_openai_counting/test_count_tokens_all_model_request_parts.yaml
new file mode 100644
index 0000000000..c3b85c1113
--- /dev/null
+++ b/tests/models/cassettes/test_openai_counting/test_count_tokens_all_model_request_parts.yaml
@@ -0,0 +1,370 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '193'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful weather assistant.
+        role: system
+      - content: What is the weather like in Tokyo today?
+        role: user
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '806'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '140'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: I'm
+          refusal: null
+          role: assistant
+      created: 1765858741
+      id: chatcmpl-CnGoXCMQGpZUJ1RIjqKO9Xck4B90i
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 27
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 28
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '518'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=d07ec83O7zjUR6.SD7MPusZ94bJlvGwEyUx8kmSD.Jw-1765858741-1.0.1.1-si9WgSCozegQEVzerB9wYTIw8R54Kd_MZE4avyFpRgwxuPVBSkE6GIBI7INUIsp_2lMYIHTqtuDRPAoI3US1sRFxe75tgtu.HRllDJuuasU;
+        _cfuvid=AAB.xCyuOSNntTVau2cxKjBvWrpgAtdCbPtFXvldjlM-1765858741905-0.0.1.1-604800000
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful weather assistant.
+        role: system
+      - content: What is the weather like in Tokyo today?
+        role: user
+      - content: null
+        role: assistant
+        tool_calls:
+        - function:
+            arguments: '{"city": "Tokyo", "units": "celsius"}'
+            name: get_weather
+          id: call_weather_001
+          type: function
+      - content: '{"temperature": 18, "condition": "Partly cloudy", "humidity": 65}'
+        role: tool
+        tool_call_id: call_weather_001
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '806'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '217'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: The
+          refusal: null
+          role: assistant
+      created: 1765858742
+      id: chatcmpl-CnGoYBfmhaK9t1EDnaFbqr0Brwn0h
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 74
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 75
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '618'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=d07ec83O7zjUR6.SD7MPusZ94bJlvGwEyUx8kmSD.Jw-1765858741-1.0.1.1-si9WgSCozegQEVzerB9wYTIw8R54Kd_MZE4avyFpRgwxuPVBSkE6GIBI7INUIsp_2lMYIHTqtuDRPAoI3US1sRFxe75tgtu.HRllDJuuasU;
+        _cfuvid=AAB.xCyuOSNntTVau2cxKjBvWrpgAtdCbPtFXvldjlM-1765858741905-0.0.1.1-604800000
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful weather assistant.
+        role: system
+      - content: What is the weather like in Tokyo today?
+        role: user
+      - content: null
+        role: assistant
+        tool_calls:
+        - function:
+            arguments: '{"city": "Tokyo", "units": "celsius"}'
+            name: get_weather
+          id: call_weather_001
+          type: function
+      - content: '{"temperature": 18, "condition": "Partly cloudy", "humidity": 65}'
+        role: tool
+        tool_call_id: call_weather_001
+      - content: The weather in Tokyo is partly cloudy with a temperature of 18°C.
+        role: assistant
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '806'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '151'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: The
+          refusal: null
+          role: assistant
+      created: 1765858742
+      id: chatcmpl-CnGoYmJzFqFIH8Xg0tOvhhrTKj4eq
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 93
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 94
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '758'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=d07ec83O7zjUR6.SD7MPusZ94bJlvGwEyUx8kmSD.Jw-1765858741-1.0.1.1-si9WgSCozegQEVzerB9wYTIw8R54Kd_MZE4avyFpRgwxuPVBSkE6GIBI7INUIsp_2lMYIHTqtuDRPAoI3US1sRFxe75tgtu.HRllDJuuasU;
+        _cfuvid=AAB.xCyuOSNntTVau2cxKjBvWrpgAtdCbPtFXvldjlM-1765858741905-0.0.1.1-604800000
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful weather assistant.
+        role: system
+      - content: What is the weather like in Tokyo today?
+        role: user
+      - content: null
+        role: assistant
+        tool_calls:
+        - function:
+            arguments: '{"city": "Tokyo", "units": "celsius"}'
+            name: get_weather
+          id: call_weather_001
+          type: function
+      - content: '{"temperature": 18, "condition": "Partly cloudy", "humidity": 65}'
+        role: tool
+        tool_call_id: call_weather_001
+      - content: The weather in Tokyo is partly cloudy with a temperature of 18°C.
+        role: assistant
+      - content: |-
+          Validation feedback:
+          Please also include the humidity level in your response.
+
+          Fix the errors and try again.
+        role: user
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '808'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '203'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: The
+          refusal: null
+          role: assistant
+      created: 1765858742
+      id: chatcmpl-CnGoYyPtPiDwSRWcsPHCkUCl595OS
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e819e3438b
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 117
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 118
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/cassettes/test_openai_counting/test_count_tokens_all_model_response_parts.yaml b/tests/models/cassettes/test_openai_counting/test_count_tokens_all_model_response_parts.yaml
new file mode 100644
index 0000000000..ccb32d4f55
--- /dev/null
+++ b/tests/models/cassettes/test_openai_counting/test_count_tokens_all_model_response_parts.yaml
@@ -0,0 +1,447 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '215'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant with access to calculator tools.
+        role: system
+      - content: Hello! Can you help me with some math?
+        role: user
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '808'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '192'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: Hello
+          refusal: null
+          role: assistant
+      created: 1765858743
+      id: chatcmpl-CnGoZPsIvN6NuwO0lCRHyyqMZISzM
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 32
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 33
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '338'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=Z5jgrqsJ3ow98sWj8VE_ZsmZfDj8UL0MUAIPid1ECbg-1765858743-1.0.1.1-k9XknwPRRC.kDU55d1zDpyOky6RgwWADl_CyFLl0UnP1DXmF1WlLkolxWn8JpU.k5T8APMuqP9oiou0njo0Rzn46Prxpxogk9RmgLu3iFww;
+        _cfuvid=G8PKpHBwPAN3AQ1YX0X3Ktqgw1dXdHBYqIxFT6w177k-1765858743749-0.0.1.1-604800000
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant with access to calculator tools.
+        role: system
+      - content: Hello! Can you help me with some math?
+        role: user
+      - content: Of course! I can help you with mathematical calculations. What would you like to compute?
+        role: assistant
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '805'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '181'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: Of
+          refusal: null
+          role: assistant
+      created: 1765858743
+      id: chatcmpl-CnGoZuXTYVTRF0xc1dysqbLFFa5IC
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 54
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 55
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '408'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=Z5jgrqsJ3ow98sWj8VE_ZsmZfDj8UL0MUAIPid1ECbg-1765858743-1.0.1.1-k9XknwPRRC.kDU55d1zDpyOky6RgwWADl_CyFLl0UnP1DXmF1WlLkolxWn8JpU.k5T8APMuqP9oiou0njo0Rzn46Prxpxogk9RmgLu3iFww;
+        _cfuvid=G8PKpHBwPAN3AQ1YX0X3Ktqgw1dXdHBYqIxFT6w177k-1765858743749-0.0.1.1-604800000
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant with access to calculator tools.
+        role: system
+      - content: Hello! Can you help me with some math?
+        role: user
+      - content: Of course! I can help you with mathematical calculations. What would you like to compute?
+        role: assistant
+      - content: Calculate 15 * 7 and also 128 / 4 please.
+        role: user
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '812'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '156'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: Certainly
+          refusal: null
+          role: assistant
+      created: 1765858744
+      id: chatcmpl-CnGoar9hMYrdoB9GiE4hFHORdO72p
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 73
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 74
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '630'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=Z5jgrqsJ3ow98sWj8VE_ZsmZfDj8UL0MUAIPid1ECbg-1765858743-1.0.1.1-k9XknwPRRC.kDU55d1zDpyOky6RgwWADl_CyFLl0UnP1DXmF1WlLkolxWn8JpU.k5T8APMuqP9oiou0njo0Rzn46Prxpxogk9RmgLu3iFww;
+        _cfuvid=G8PKpHBwPAN3AQ1YX0X3Ktqgw1dXdHBYqIxFT6w177k-1765858743749-0.0.1.1-604800000
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant with access to calculator tools.
+        role: system
+      - content: Hello! Can you help me with some math?
+        role: user
+      - content: Of course! I can help you with mathematical calculations. What would you like to compute?
+        role: assistant
+      - content: Calculate 15 * 7 and also 128 / 4 please.
+        role: user
+      - content: null
+        role: assistant
+        tool_calls:
+        - function:
+            arguments: '{"a": 15, "b": 7}'
+            name: multiply
+          id: call_mult_001
+          type: function
+      - content: '105'
+        role: tool
+        tool_call_id: call_mult_001
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '805'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '170'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: '15'
+          refusal: null
+          role: assistant
+      created: 1765858744
+      id: chatcmpl-CnGoaRVqlvegH42PPYY0mcWVVCmWv
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 98
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 99
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '719'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=Z5jgrqsJ3ow98sWj8VE_ZsmZfDj8UL0MUAIPid1ECbg-1765858743-1.0.1.1-k9XknwPRRC.kDU55d1zDpyOky6RgwWADl_CyFLl0UnP1DXmF1WlLkolxWn8JpU.k5T8APMuqP9oiou0njo0Rzn46Prxpxogk9RmgLu3iFww;
+        _cfuvid=G8PKpHBwPAN3AQ1YX0X3Ktqgw1dXdHBYqIxFT6w177k-1765858743749-0.0.1.1-604800000
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant with access to calculator tools.
+        role: system
+      - content: Hello! Can you help me with some math?
+        role: user
+      - content: Of course! I can help you with mathematical calculations. What would you like to compute?
+        role: assistant
+      - content: Calculate 15 * 7 and also 128 / 4 please.
+        role: user
+      - content: null
+        role: assistant
+        tool_calls:
+        - function:
+            arguments: '{"a": 15, "b": 7}'
+            name: multiply
+          id: call_mult_001
+          type: function
+      - content: '105'
+        role: tool
+        tool_call_id: call_mult_001
+      - content: 'Here are your results: 15 × 7 = 105 and 128 ÷ 4 = 32.'
+        role: assistant
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '807'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '158'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: Is
+          refusal: null
+          role: assistant
+      created: 1765858744
+      id: chatcmpl-CnGoaDcCnbTnXaGBsXHIaP2ismVrj
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 126
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 127
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/cassettes/test_openai_counting/test_count_tokens_basic.yaml b/tests/models/cassettes/test_openai_counting/test_count_tokens_basic.yaml
new file mode 100644
index 0000000000..994a45bf24
--- /dev/null
+++ b/tests/models/cassettes/test_openai_counting/test_count_tokens_basic.yaml
@@ -0,0 +1,81 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '158'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant.
+        role: system
+      - content: Hello, world!
+        role: user
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '808'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '216'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: Hello
+          refusal: null
+          role: assistant
+      created: 1765858745
+      id: chatcmpl-CnGobwIYyjUyjz967ZwIANmU4Z2vW
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 21
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 22
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/cassettes/test_openai_counting/test_count_tokens_gpt4o_mini.yaml b/tests/models/cassettes/test_openai_counting/test_count_tokens_gpt4o_mini.yaml
new file mode 100644
index 0000000000..689437f39b
--- /dev/null
+++ b/tests/models/cassettes/test_openai_counting/test_count_tokens_gpt4o_mini.yaml
@@ -0,0 +1,81 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '192'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant.
+        role: system
+      - content: Explain quantum computing in one sentence.
+        role: user
+      model: gpt-4o-mini
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '815'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '592'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: Quantum
+          refusal: null
+          role: assistant
+      created: 1765858749
+      id: chatcmpl-CnGof25Neb4cw2smDxwEkc042PqhC
+      model: gpt-4o-mini-2024-07-18
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_644f11dd4d
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 24
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 25
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/cassettes/test_openai_counting/test_count_tokens_individual_message_types.yaml b/tests/models/cassettes/test_openai_counting/test_count_tokens_individual_message_types.yaml
new file mode 100644
index 0000000000..ae819b8a3d
--- /dev/null
+++ b/tests/models/cassettes/test_openai_counting/test_count_tokens_individual_message_types.yaml
@@ -0,0 +1,632 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '116'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant.
+        role: system
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '806'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '134'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: Got
+          refusal: null
+          role: assistant
+      created: 1765858738
+      id: chatcmpl-CnGoUFRwrTRzGFxZV5bOgyHq3Si9b
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 13
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 14
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '164'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=T9zx387BxMuBX2Nrn4UuHxBvEwJvp7VuCBaintlWjyw-1765858738-1.0.1.1-YU5ffyP2Ygo06mNjiO320olWA2QHkd6_O0QMYr9LA8ShTQnMLnr4p3qPEECJV.KhCS6hdhc6jLhZqPM52_7ctpV7PyM_7FBrJC4IPYIYaRk;
+        _cfuvid=cdyncNURVLH1Nu7dKGHaA9c1MG_r__eMQWrmdlKrsWc-1765858738726-0.0.1.1-604800000
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant.
+        role: system
+      - content: Hello, how are you?
+        role: user
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '808'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '167'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: Hello
+          refusal: null
+          role: assistant
+      created: 1765858738
+      id: chatcmpl-CnGoUuis7xKExkFRBJ2LzobCqyfsj
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 23
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 24
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '236'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=T9zx387BxMuBX2Nrn4UuHxBvEwJvp7VuCBaintlWjyw-1765858738-1.0.1.1-YU5ffyP2Ygo06mNjiO320olWA2QHkd6_O0QMYr9LA8ShTQnMLnr4p3qPEECJV.KhCS6hdhc6jLhZqPM52_7ctpV7PyM_7FBrJC4IPYIYaRk;
+        _cfuvid=cdyncNURVLH1Nu7dKGHaA9c1MG_r__eMQWrmdlKrsWc-1765858738726-0.0.1.1-604800000
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant.
+        role: system
+      - content: Hello, how are you?
+        role: user
+      - content: I am doing well, thank you for asking!
+        role: assistant
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '808'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '174'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: Hello
+          refusal: null
+          role: assistant
+      created: 1765858739
+      id: chatcmpl-CnGoVEtngebV10yvo2lKhjhO90FmT
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 37
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 38
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '294'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=T9zx387BxMuBX2Nrn4UuHxBvEwJvp7VuCBaintlWjyw-1765858738-1.0.1.1-YU5ffyP2Ygo06mNjiO320olWA2QHkd6_O0QMYr9LA8ShTQnMLnr4p3qPEECJV.KhCS6hdhc6jLhZqPM52_7ctpV7PyM_7FBrJC4IPYIYaRk;
+        _cfuvid=cdyncNURVLH1Nu7dKGHaA9c1MG_r__eMQWrmdlKrsWc-1765858738726-0.0.1.1-604800000
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant.
+        role: system
+      - content: Hello, how are you?
+        role: user
+      - content: I am doing well, thank you for asking!
+        role: assistant
+      - content: What is the weather in Paris?
+        role: user
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '806'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '183'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: I'm
+          refusal: null
+          role: assistant
+      created: 1765858739
+      id: chatcmpl-CnGoVlYtl0jao9GJKdjuVZKNTjlV4
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 48
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 49
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '539'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=T9zx387BxMuBX2Nrn4UuHxBvEwJvp7VuCBaintlWjyw-1765858738-1.0.1.1-YU5ffyP2Ygo06mNjiO320olWA2QHkd6_O0QMYr9LA8ShTQnMLnr4p3qPEECJV.KhCS6hdhc6jLhZqPM52_7ctpV7PyM_7FBrJC4IPYIYaRk;
+        _cfuvid=cdyncNURVLH1Nu7dKGHaA9c1MG_r__eMQWrmdlKrsWc-1765858738726-0.0.1.1-604800000
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant.
+        role: system
+      - content: Hello, how are you?
+        role: user
+      - content: I am doing well, thank you for asking!
+        role: assistant
+      - content: What is the weather in Paris?
+        role: user
+      - content: null
+        role: assistant
+        tool_calls:
+        - function:
+            arguments: '{"city": "Paris"}'
+            name: get_weather
+          id: call_abc123
+          type: function
+      - content: Sunny, 22°C in Paris today
+        role: tool
+        tool_call_id: call_abc123
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '806'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '188'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: The
+          refusal: null
+          role: assistant
+      created: 1765858740
+      id: chatcmpl-CnGoWLcV6cgedOjqVpoX8NKO5z5JJ
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 78
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 79
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '631'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=T9zx387BxMuBX2Nrn4UuHxBvEwJvp7VuCBaintlWjyw-1765858738-1.0.1.1-YU5ffyP2Ygo06mNjiO320olWA2QHkd6_O0QMYr9LA8ShTQnMLnr4p3qPEECJV.KhCS6hdhc6jLhZqPM52_7ctpV7PyM_7FBrJC4IPYIYaRk;
+        _cfuvid=cdyncNURVLH1Nu7dKGHaA9c1MG_r__eMQWrmdlKrsWc-1765858738726-0.0.1.1-604800000
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant.
+        role: system
+      - content: Hello, how are you?
+        role: user
+      - content: I am doing well, thank you for asking!
+        role: assistant
+      - content: What is the weather in Paris?
+        role: user
+      - content: null
+        role: assistant
+        tool_calls:
+        - function:
+            arguments: '{"city": "Paris"}'
+            name: get_weather
+          id: call_abc123
+          type: function
+      - content: Sunny, 22°C in Paris today
+        role: tool
+        tool_call_id: call_abc123
+      - content: The weather in Paris is sunny with a temperature of 22°C.
+        role: assistant
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '806'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '311'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: The
+          refusal: null
+          role: assistant
+      created: 1765858740
+      id: chatcmpl-CnGoWLopMQ1QAvhNm5ohP01sFB3HU
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 96
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 97
+    status:
+      code: 200
+      message: OK
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '761'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=T9zx387BxMuBX2Nrn4UuHxBvEwJvp7VuCBaintlWjyw-1765858738-1.0.1.1-YU5ffyP2Ygo06mNjiO320olWA2QHkd6_O0QMYr9LA8ShTQnMLnr4p3qPEECJV.KhCS6hdhc6jLhZqPM52_7ctpV7PyM_7FBrJC4IPYIYaRk;
+        _cfuvid=cdyncNURVLH1Nu7dKGHaA9c1MG_r__eMQWrmdlKrsWc-1765858738726-0.0.1.1-604800000
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant.
+        role: system
+      - content: Hello, how are you?
+        role: user
+      - content: I am doing well, thank you for asking!
+        role: assistant
+      - content: What is the weather in Paris?
+        role: user
+      - content: null
+        role: assistant
+        tool_calls:
+        - function:
+            arguments: '{"city": "Paris"}'
+            name: get_weather
+          id: call_abc123
+          type: function
+      - content: Sunny, 22°C in Paris today
+        role: tool
+        tool_call_id: call_abc123
+      - content: The weather in Paris is sunny with a temperature of 22°C.
+        role: assistant
+      - content: |-
+          Validation feedback:
+          Please provide more details about the weather.
+
+          Fix the errors and try again.
+        role: user
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '806'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '229'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: I
+          refusal: null
+          role: assistant
+      created: 1765858740
+      id: chatcmpl-CnGoWjh7LXyTu4eFtfx2T6Q4yv95X
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_ff5f7093b3
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 118
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 119
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/cassettes/test_openai_counting/test_count_tokens_multi_tool.yaml b/tests/models/cassettes/test_openai_counting/test_count_tokens_multi_tool.yaml
new file mode 100644
index 0000000000..c71d39e49f
--- /dev/null
+++ b/tests/models/cassettes/test_openai_counting/test_count_tokens_multi_tool.yaml
@@ -0,0 +1,96 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '480'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: null
+        role: assistant
+        tool_calls:
+        - function:
+            arguments: '{"city": "Paris"}'
+            name: get_weather
+          id: call_paris
+          type: function
+        - function:
+            arguments: '{"city": "London"}'
+            name: get_weather
+          id: call_london
+          type: function
+      - content: 'Paris: Sunny, 22°C'
+        role: tool
+        tool_call_id: call_paris
+      - content: 'London: Rainy, 15°C'
+        role: tool
+        tool_call_id: call_london
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '812'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '126'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: Currently
+          refusal: null
+          role: assistant
+      created: 1765858748
+      id: chatcmpl-CnGoenUt6oBQTzN8xs3TgGtfKA4KA
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 84
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 85
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/cassettes/test_openai_counting/test_count_tokens_multi_turn.yaml b/tests/models/cassettes/test_openai_counting/test_count_tokens_multi_turn.yaml
new file mode 100644
index 0000000000..9bc19339cc
--- /dev/null
+++ b/tests/models/cassettes/test_openai_counting/test_count_tokens_multi_turn.yaml
@@ -0,0 +1,83 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '243'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: Tell me a joke
+        role: user
+      - content: Why did the chicken cross the road? To get to the other side!
+        role: assistant
+      - content: Tell me another one
+        role: user
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '806'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '147'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: Why
+          refusal: null
+          role: assistant
+      created: 1765858746
+      id: chatcmpl-CnGoc2ReCEgaZt8DFIFVwwLKHaTR8
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 38
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 39
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/cassettes/test_openai_counting/test_count_tokens_multiple_system_prompts.yaml b/tests/models/cassettes/test_openai_counting/test_count_tokens_multiple_system_prompts.yaml
new file mode 100644
index 0000000000..f08582e377
--- /dev/null
+++ b/tests/models/cassettes/test_openai_counting/test_count_tokens_multiple_system_prompts.yaml
@@ -0,0 +1,89 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '684'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful, pattern-following assistant that translates corporate jargon into plain English.
+        role: system
+      - content: New synergies will help drive top-line growth.
+        role: system
+      - content: Things working well together will increase revenue.
+        role: system
+      - content: Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.
+        role: system
+      - content: Let's talk later when we're less busy about how to do better.
+        role: system
+      - content: This late pivot means we don't have time to boil the ocean for the client deliverable.
+        role: user
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '813'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '153'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: Changing
+          refusal: null
+          role: assistant
+      created: 1765858747
+      id: chatcmpl-CnGod0dOxTFpX5nzxCandvG28ECn3
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 110
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 111
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/cassettes/test_openai_counting/test_count_tokens_with_name_field.yaml b/tests/models/cassettes/test_openai_counting/test_count_tokens_with_name_field.yaml
new file mode 100644
index 0000000000..11ca105e49
--- /dev/null
+++ b/tests/models/cassettes/test_openai_counting/test_count_tokens_with_name_field.yaml
@@ -0,0 +1,85 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '284'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: You are a helpful assistant.
+        role: system
+      - content: Hello, my name is Alice.
+        role: user
+      - content: Hello Alice! How can I help you today?
+        role: assistant
+      - content: What is 2 + 2?
+        role: user
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '804'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '159'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: '2'
+          refusal: null
+          role: assistant
+      created: 1765858749
+      id: chatcmpl-CnGofEz3O7iDXG6UfQuEtSyMLthU4
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 50
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 51
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/cassettes/test_openai_counting/test_count_tokens_with_tool_calls.yaml b/tests/models/cassettes/test_openai_counting/test_count_tokens_with_tool_calls.yaml
new file mode 100644
index 0000000000..f88e4e5570
--- /dev/null
+++ b/tests/models/cassettes/test_openai_counting/test_count_tokens_with_tool_calls.yaml
@@ -0,0 +1,90 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '339'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      max_completion_tokens: 1
+      messages:
+      - content: What is the weather in Tokyo?
+        role: user
+      - content: null
+        role: assistant
+        tool_calls:
+        - function:
+            arguments: '{"city": "Tokyo"}'
+            name: get_weather
+          id: call_123
+          type: function
+      - content: Sunny, 25°C
+        role: tool
+        tool_call_id: call_123
+      model: gpt-4o
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    headers:
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '806'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '185'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      choices:
+      - finish_reason: length
+        index: 0
+        logprobs: null
+        message:
+          annotations: []
+          content: The
+          refusal: null
+          role: assistant
+      created: 1765858746
+      id: chatcmpl-CnGockTtljxLezFxAzhumUICQbGvV
+      model: gpt-4o-2024-08-06
+      object: chat.completion
+      service_tier: default
+      system_fingerprint: fp_e413f45763
+      usage:
+        completion_tokens: 1
+        completion_tokens_details:
+          accepted_prediction_tokens: 0
+          audio_tokens: 0
+          reasoning_tokens: 0
+          rejected_prediction_tokens: 0
+        prompt_tokens: 41
+        prompt_tokens_details:
+          audio_tokens: 0
+          cached_tokens: 0
+        total_tokens: 42
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/cassettes/test_openai_counting/test_openai_model_usage_limit_not_exceeded.yaml b/tests/models/cassettes/test_openai_counting/test_openai_model_usage_limit_not_exceeded.yaml
new file mode 100644
index 0000000000..f63eb652f3
--- /dev/null
+++ b/tests/models/cassettes/test_openai_counting/test_openai_model_usage_limit_not_exceeded.yaml
@@ -0,0 +1,105 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '114'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      input:
+      - content: The quick brown fox jumps over the lazydog.
+        role: user
+      model: gpt-4
+      stream: false
+    uri: https://api.openai.com/v1/responses
+  response:
+    headers:
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '1675'
+      content-type:
+      - application/json
+      openai-organization:
+      - user-hmtx7ob9xfpodrsdmshpnklm
+      openai-processing-ms:
+      - '2119'
+      openai-project:
+      - proj_98JsR0JYQV1Ovk2AT0cljXCL
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      background: false
+      billing:
+        payer: developer
+      created_at: 1765858751
+      error: null
+      id: resp_017e284b89eacda1006940ddbf5c7c81938eeeb76d40f3a7f7
+      incomplete_details: null
+      instructions: null
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4-0613
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs: []
+          text: This sentence is famous because it contains every letter in the English alphabet. It's often used to display
+            different fonts or for typing practice. Interestingly, the dog's supposed laziness does not intervene with the
+            fox's athletic endeavor.
+          type: output_text
+        id: msg_017e284b89eacda1006940ddc051b88193acabac2bcc8d6a8b
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: null
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 16
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 47
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 63
+      user: null
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/test_openai_counting.py b/tests/models/test_openai_counting.py
new file mode 100644
index 0000000000..83018fbbd2
--- /dev/null
+++ b/tests/models/test_openai_counting.py
@@ -0,0 +1,836 @@
+from __future__ import annotations as _annotations
+
+from datetime import timezone
+
+import pytest
+from inline_snapshot import snapshot
+
+from pydantic_ai import (
+    Agent,
+    ModelMessage,
+    ModelRequest,
+    ModelResponse,
+    RetryPromptPart,
+    SystemPromptPart,
+    TextPart,
+    ToolCallPart,
+    ToolReturnPart,
+    UsageLimitExceeded,
+    UsageLimits,
+    UserPromptPart,
+)
+from pydantic_ai.models import ModelRequestParameters
+from pydantic_ai.usage import RequestUsage
+
+from ..conftest import IsNow, IsStr, try_import
+
+with try_import() as imports_successful:
+    from openai.types import chat
+
+    from pydantic_ai.models.openai import (
+        OpenAIChatModel,
+        OpenAIResponsesModel,
+    )
+    from pydantic_ai.providers.ollama import OllamaProvider
+    from pydantic_ai.providers.openai import OpenAIProvider
+
+    MockChatCompletion = chat.ChatCompletion | Exception
+    MockChatCompletionChunk = chat.ChatCompletionChunk | Exception
+
+pytestmark = [
+    pytest.mark.skipif(not imports_successful(), reason='openai not installed'),
+    pytest.mark.anyio,
+    pytest.mark.vcr,
+]
+
+
+# ============================================================================
+# VCR-based token counting verification tests
+# These tests verify our token counting matches OpenAI's actual token counts
+# ============================================================================
+
+
+async def _verify_count_against_api(chat_model: OpenAIChatModel, msgs: list[ModelMessage], step_name: str):
+    """Count tokens using our method and verify against OpenAI API.
+
+    Args:
+        chat_model: The OpenAI chat model to use for counting
+        msgs: The messages to count tokens for
+        step_name: Name of the step for error messages
+
+    Returns:
+        The number of input tokens counted
+    """
+    our_count = await chat_model.count_tokens(msgs, {}, ModelRequestParameters())
+    openai_messages = await chat_model._map_messages(msgs, ModelRequestParameters())  # pyright: ignore[reportPrivateUsage]
+    response = await chat_model.client.chat.completions.create(
+        model='gpt-4o',
+        messages=openai_messages,
+        max_completion_tokens=1,
+    )
+    api_count = response.usage.prompt_tokens if response.usage else 0
+    _assert_token_count_within_tolerance(our_count.input_tokens, api_count, test_name=step_name)
+
+
+def _assert_token_count_within_tolerance(
+    our_count: int, api_count: int, tolerance: float = 0.25, test_name: str = ''
+) -> None:
+    """Assert that our token count is within the specified tolerance of the API count.
+
+    Args:
+        our_count: Our calculated token count
+        api_count: The token count from the OpenAI API
+        tolerance: The allowed tolerance as a fraction (default 25% = 0.25)
+        test_name: Optional test name for error messages
+    """
+    if api_count == 0:
+        # If API returns 0, our count should also be 0 or very small
+        assert our_count <= 1, f'{test_name}: API returned 0 tokens but we calculated {our_count}'
+        return
+
+    difference = abs(our_count - api_count)
+    tolerance_tokens = max(1, int(api_count * tolerance))  # At least 1 token tolerance
+
+    assert difference <= tolerance_tokens, (
+        f'{test_name}: Token count outside {tolerance * 100:.0f}% tolerance: '
+        f'our count={our_count}, API count={api_count}, '
+        f'difference={difference}, allowed={tolerance_tokens}'
+    )
+
+
+@pytest.mark.vcr()
+async def test_count_tokens_individual_message_types(
+    allow_model_requests: None,
+    openai_api_key: str,
+    mock_tiktoken_encoding: None,
+):
+    """Test token counting for each ModelMessage type individually against the OpenAI API.
+
+    This test incrementally adds different message types and verifies our token count
+    matches the OpenAI API after each addition. It covers:
+    - SystemPromptPart (system message)
+    - UserPromptPart (user message with string content)
+    - ModelResponse with TextPart (assistant message)
+    - ModelResponse with ToolCallPart + ToolReturnPart (tool call flow)
+    - RetryPromptPart (retry as user message)
+
+    Note: Tool calls and tool returns must be added together because the OpenAI API
+    requires tool calls to be immediately followed by their corresponding tool responses.
+    """
+    chat_model = OpenAIChatModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
+
+    # Track cumulative messages
+    messages: list[ModelMessage] = []
+    # --- 1. System prompt ---
+    messages.append(
+        ModelRequest(
+            parts=[
+                SystemPromptPart(
+                    content='You are a helpful assistant.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step1_system_prompt')
+
+    # --- 2. User prompt (string content) ---
+    messages.append(
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content='Hello, how are you?',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step2_user_prompt')
+
+    # --- 3. Assistant response with TextPart ---
+    messages.append(
+        ModelResponse(
+            parts=[
+                TextPart(content='I am doing well, thank you for asking!'),
+            ],
+            usage=RequestUsage(input_tokens=0, output_tokens=10),
+            model_name='gpt-4o',
+            timestamp=IsNow(tz=timezone.utc),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step3_assistant_text')
+
+    # --- 4. User follow-up ---
+    messages.append(
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content='What is the weather in Paris?',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step4_user_followup')
+
+    # --- 5. Tool call + Tool return (must be added together for valid API request) ---
+    # OpenAI API requires tool calls to be immediately followed by tool responses.
+    # We add both and measure the combined token increase.
+    messages.append(
+        ModelResponse(
+            parts=[
+                ToolCallPart(
+                    tool_name='get_weather',
+                    args='{"city": "Paris"}',
+                    tool_call_id='call_abc123',
+                ),
+            ],
+            usage=RequestUsage(input_tokens=0, output_tokens=5),
+            model_name='gpt-4o',
+            timestamp=IsNow(tz=timezone.utc),
+        )
+    )
+    messages.append(
+        ModelRequest(
+            parts=[
+                ToolReturnPart(
+                    tool_name='get_weather',
+                    content='Sunny, 22°C in Paris today',
+                    tool_call_id='call_abc123',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step5_tool_call_and_return')
+
+    # --- 6. Assistant final response after tool ---
+    messages.append(
+        ModelResponse(
+            parts=[
+                TextPart(content='The weather in Paris is sunny with a temperature of 22°C.'),
+            ],
+            usage=RequestUsage(input_tokens=0, output_tokens=15),
+            model_name='gpt-4o',
+            timestamp=IsNow(tz=timezone.utc),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step6_final_assistant')
+
+    # --- 7. RetryPromptPart (without tool_name, becomes user message) ---
+    messages.append(
+        ModelRequest(
+            parts=[
+                RetryPromptPart(
+                    content='Please provide more details about the weather.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step7_retry_prompt')
+
+
+@pytest.mark.vcr()
+async def test_count_tokens_all_model_request_parts(
+    allow_model_requests: None,
+    openai_api_key: str,
+    mock_tiktoken_encoding: None,
+):
+    """Test token counting for a ModelRequest containing all ModelRequestPart types.
+
+    ModelRequestPart types: SystemPromptPart, UserPromptPart, ToolReturnPart, RetryPromptPart
+
+    This test incrementally builds a conversation and verifies our token count matches
+    the OpenAI API after each step.
+    """
+    chat_model = OpenAIChatModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
+
+    messages: list[ModelMessage] = []
+
+    # --- Step 1: SystemPromptPart + UserPromptPart ---
+    messages.append(
+        ModelRequest(
+            parts=[
+                SystemPromptPart(
+                    content='You are a helpful weather assistant.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+                UserPromptPart(
+                    content='What is the weather like in Tokyo today?',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step1_system_and_user')
+
+    # --- Step 2: ToolCallPart + ToolReturnPart (must be together for valid API request) ---
+    messages.append(
+        ModelResponse(
+            parts=[
+                ToolCallPart(
+                    tool_name='get_weather',
+                    args='{"city": "Tokyo", "units": "celsius"}',
+                    tool_call_id='call_weather_001',
+                ),
+            ],
+            usage=RequestUsage(input_tokens=0, output_tokens=10),
+            model_name='gpt-4o',
+            timestamp=IsNow(tz=timezone.utc),
+        )
+    )
+    messages.append(
+        ModelRequest(
+            parts=[
+                ToolReturnPart(
+                    tool_name='get_weather',
+                    content='{"temperature": 18, "condition": "Partly cloudy", "humidity": 65}',
+                    tool_call_id='call_weather_001',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step2_tool_call_and_return')
+
+    # --- Step 3: TextPart response ---
+    messages.append(
+        ModelResponse(
+            parts=[
+                TextPart(content='The weather in Tokyo is partly cloudy with a temperature of 18°C.'),
+            ],
+            usage=RequestUsage(input_tokens=0, output_tokens=15),
+            model_name='gpt-4o',
+            timestamp=IsNow(tz=timezone.utc),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step3_text_response')
+
+    # --- Step 4: RetryPromptPart ---
+    messages.append(
+        ModelRequest(
+            parts=[
+                RetryPromptPart(
+                    content='Please also include the humidity level in your response.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step4_retry_prompt')
+
+
+@pytest.mark.vcr()
+async def test_count_tokens_all_model_response_parts(
+    allow_model_requests: None,
+    openai_api_key: str,
+    mock_tiktoken_encoding: None,
+):
+    """Test token counting for ModelResponses containing various ModelResponsePart types.
+
+    ModelResponsePart types: TextPart, ToolCallPart (multiple/parallel)
+
+    This test incrementally builds a conversation and verifies our token count matches
+    the OpenAI API after each step.
+    """
+    chat_model = OpenAIChatModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
+
+    messages: list[ModelMessage] = []
+
+    # --- Step 1: Initial user request ---
+    messages.append(
+        ModelRequest(
+            parts=[
+                SystemPromptPart(
+                    content='You are a helpful assistant with access to calculator tools.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+                UserPromptPart(
+                    content='Hello! Can you help me with some math?',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step1_initial_request')
+
+    # --- Step 2: TextPart response ---
+    messages.append(
+        ModelResponse(
+            parts=[
+                TextPart(
+                    content='Of course! I can help you with mathematical calculations. What would you like to compute?'
+                ),
+            ],
+            usage=RequestUsage(input_tokens=0, output_tokens=20),
+            model_name='gpt-4o',
+            timestamp=IsNow(tz=timezone.utc),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step2_text_response')
+
+    # --- Step 3: User asks for calculations ---
+    messages.append(
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content='Calculate 15 * 7 and also 128 / 4 please.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step3_calculation_request')
+
+    # # --- Step 4: Multiple ToolCallParts (parallel) + ToolReturnParts ---
+    messages.append(
+        ModelResponse(
+            parts=[
+                ToolCallPart(
+                    tool_name='multiply',
+                    args='{"a": 15, "b": 7}',
+                    tool_call_id='call_mult_001',
+                )
+            ],
+            usage=RequestUsage(input_tokens=0, output_tokens=25),
+            model_name='gpt-4o',
+            timestamp=IsNow(tz=timezone.utc),
+        )
+    )
+    messages.append(
+        ModelRequest(
+            parts=[
+                ToolReturnPart(
+                    tool_name='multiply',
+                    content='105',
+                    tool_call_id='call_mult_001',
+                    timestamp=IsNow(tz=timezone.utc),
+                )
+            ],
+            run_id=IsStr(),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step4_parallel_tool_calls')
+
+    # --- Step 5: Final TextPart with results ---
+    messages.append(
+        ModelResponse(
+            parts=[
+                TextPart(content='Here are your results: 15 × 7 = 105 and 128 ÷ 4 = 32.'),
+            ],
+            usage=RequestUsage(input_tokens=0, output_tokens=20),
+            model_name='gpt-4o',
+            timestamp=IsNow(tz=timezone.utc),
+        )
+    )
+    await _verify_count_against_api(chat_model, messages, 'step5_final_response')
+
+
+@pytest.mark.vcr()
+async def test_count_tokens_basic(
+    allow_model_requests: None,
+    openai_api_key: str,
+    mock_tiktoken_encoding: None,
+):
+    """Verify token counting for basic system and user prompts against OpenAI API."""
+    test_messages: list[ModelMessage] = [
+        ModelRequest(
+            parts=[
+                SystemPromptPart(
+                    content='You are a helpful assistant.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+                UserPromptPart(
+                    content='Hello, world!',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        )
+    ]
+
+    chat_model = OpenAIChatModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
+    our_count: RequestUsage = await chat_model.count_tokens(test_messages, {}, ModelRequestParameters())
+
+    openai_messages = await chat_model._map_messages(test_messages, ModelRequestParameters())  # pyright: ignore[reportPrivateUsage]
+    response = await chat_model.client.chat.completions.create(
+        model='gpt-4o',
+        messages=openai_messages,
+        max_completion_tokens=1,
+    )
+
+    api_prompt_tokens = response.usage.prompt_tokens if response.usage else 0
+    _assert_token_count_within_tolerance(our_count.input_tokens, api_prompt_tokens, test_name='basic')
+
+
+@pytest.mark.vcr()
+async def test_count_tokens_with_tool_calls(
+    allow_model_requests: None,
+    openai_api_key: str,
+    mock_tiktoken_encoding: None,
+):
+    """Verify token counting for messages with tool calls against OpenAI API."""
+    test_messages: list[ModelMessage] = [
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content='What is the weather in Tokyo?',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        ),
+        ModelResponse(
+            parts=[
+                ToolCallPart(
+                    tool_name='get_weather',
+                    args='{"city": "Tokyo"}',
+                    tool_call_id='call_123',
+                ),
+            ],
+            model_name='gpt-4o',
+            timestamp=IsNow(tz=timezone.utc),
+        ),
+        ModelRequest(
+            parts=[
+                ToolReturnPart(
+                    tool_name='get_weather',
+                    content='Sunny, 25°C',
+                    tool_call_id='call_123',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        ),
+    ]
+
+    chat_model = OpenAIChatModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
+    our_count: RequestUsage = await chat_model.count_tokens(test_messages, {}, ModelRequestParameters())
+
+    openai_messages = await chat_model._map_messages(test_messages, ModelRequestParameters())  # pyright: ignore[reportPrivateUsage]
+    response = await chat_model.client.chat.completions.create(
+        model='gpt-4o',
+        messages=openai_messages,
+        max_completion_tokens=1,
+    )
+
+    api_prompt_tokens = response.usage.prompt_tokens if response.usage else 0
+    _assert_token_count_within_tolerance(our_count.input_tokens, api_prompt_tokens, test_name='tool_calls')
+
+
+@pytest.mark.vcr()
+async def test_count_tokens_multi_turn(
+    allow_model_requests: None,
+    openai_api_key: str,
+    mock_tiktoken_encoding: None,
+):
+    """Verify token counting for multi-turn conversation against OpenAI API."""
+    test_messages: list[ModelMessage] = [
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content='Tell me a joke',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        ),
+        ModelResponse(
+            parts=[
+                TextPart(content='Why did the chicken cross the road? To get to the other side!'),
+            ],
+            usage=RequestUsage(input_tokens=5, output_tokens=15),
+            model_name='gpt-4o',
+            timestamp=IsNow(tz=timezone.utc),
+        ),
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content='Tell me another one',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        ),
+    ]
+
+    chat_model = OpenAIChatModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
+    our_count: RequestUsage = await chat_model.count_tokens(test_messages, {}, ModelRequestParameters())
+
+    openai_messages = await chat_model._map_messages(test_messages, ModelRequestParameters())  # pyright: ignore[reportPrivateUsage]
+    response = await chat_model.client.chat.completions.create(
+        model='gpt-4o',
+        messages=openai_messages,
+        max_completion_tokens=1,
+    )
+
+    api_prompt_tokens = response.usage.prompt_tokens if response.usage else 0
+    _assert_token_count_within_tolerance(our_count.input_tokens, api_prompt_tokens, test_name='multi_turn')
+
+
+@pytest.mark.vcr()
+async def test_count_tokens_multiple_system_prompts(
+    allow_model_requests: None,
+    openai_api_key: str,
+    mock_tiktoken_encoding: None,
+):
+    """Verify token counting for multiple system prompts (OpenAI cookbook example) against OpenAI API."""
+    test_messages: list[ModelMessage] = [
+        ModelRequest(
+            parts=[
+                SystemPromptPart(
+                    content='You are a helpful, pattern-following assistant that translates corporate jargon into plain English.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+                SystemPromptPart(
+                    content='New synergies will help drive top-line growth.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+                SystemPromptPart(
+                    content='Things working well together will increase revenue.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+                SystemPromptPart(
+                    content="Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.",
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+                SystemPromptPart(
+                    content="Let's talk later when we're less busy about how to do better.",
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+                UserPromptPart(
+                    content="This late pivot means we don't have time to boil the ocean for the client deliverable.",
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        )
+    ]
+
+    chat_model = OpenAIChatModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
+    our_count: RequestUsage = await chat_model.count_tokens(test_messages, {}, ModelRequestParameters())
+
+    openai_messages = await chat_model._map_messages(test_messages, ModelRequestParameters())  # pyright: ignore[reportPrivateUsage]
+    response = await chat_model.client.chat.completions.create(
+        model='gpt-4o',
+        messages=openai_messages,
+        max_completion_tokens=1,
+    )
+
+    api_prompt_tokens = response.usage.prompt_tokens if response.usage else 0
+    _assert_token_count_within_tolerance(our_count.input_tokens, api_prompt_tokens, test_name='multiple_system_prompts')
+
+
+@pytest.mark.vcr()
+async def test_count_tokens_multi_tool(
+    allow_model_requests: None,
+    openai_api_key: str,
+    mock_tiktoken_encoding: None,
+):
+    """Verify token counting for conversation with multiple tool calls against OpenAI API."""
+    test_messages: list[ModelMessage] = [
+        ModelResponse(
+            parts=[
+                ToolCallPart(
+                    tool_name='get_weather',
+                    args='{"city": "Paris"}',
+                    tool_call_id='call_paris',
+                ),
+                ToolCallPart(
+                    tool_name='get_weather',
+                    args='{"city": "London"}',
+                    tool_call_id='call_london',
+                ),
+            ],
+            model_name='gpt-4o',
+            timestamp=IsNow(tz=timezone.utc),
+        ),
+        ModelRequest(
+            parts=[
+                ToolReturnPart(
+                    tool_name='get_weather',
+                    content='Paris: Sunny, 22°C',
+                    tool_call_id='call_paris',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+                ToolReturnPart(
+                    tool_name='get_weather',
+                    content='London: Rainy, 15°C',
+                    tool_call_id='call_london',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        ),
+    ]
+
+    chat_model = OpenAIChatModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
+    our_count: RequestUsage = await chat_model.count_tokens(test_messages, {}, ModelRequestParameters())
+
+    openai_messages = await chat_model._map_messages(test_messages, ModelRequestParameters())  # pyright: ignore[reportPrivateUsage]
+    response = await chat_model.client.chat.completions.create(
+        model='gpt-4o',
+        messages=openai_messages,
+        max_completion_tokens=1,
+    )
+
+    api_prompt_tokens = response.usage.prompt_tokens if response.usage else 0
+    _assert_token_count_within_tolerance(our_count.input_tokens, api_prompt_tokens, test_name='multi_tool')
+
+
+@pytest.mark.vcr()
+async def test_count_tokens_with_name_field(
+    allow_model_requests: None,
+    openai_api_key: str,
+    mock_tiktoken_encoding: None,
+):
+    """Verify token counting for messages with name fields against OpenAI API."""
+    test_messages: list[ModelMessage] = [
+        ModelRequest(
+            parts=[
+                SystemPromptPart(
+                    content='You are a helpful assistant.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+                UserPromptPart(
+                    content='Hello, my name is Alice.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        ),
+        ModelResponse(
+            parts=[
+                TextPart(content='Hello Alice! How can I help you today?'),
+            ],
+            usage=RequestUsage(input_tokens=15, output_tokens=10),
+            model_name='gpt-4o',
+            timestamp=IsNow(tz=timezone.utc),
+        ),
+        ModelRequest(
+            parts=[
+                UserPromptPart(
+                    content='What is 2 + 2?',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        ),
+    ]
+
+    chat_model = OpenAIChatModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
+    our_count: RequestUsage = await chat_model.count_tokens(test_messages, {}, ModelRequestParameters())
+
+    openai_messages = await chat_model._map_messages(test_messages, ModelRequestParameters())  # pyright: ignore[reportPrivateUsage]
+    response = await chat_model.client.chat.completions.create(
+        model='gpt-4o',
+        messages=openai_messages,
+        max_completion_tokens=1,
+    )
+
+    api_prompt_tokens = response.usage.prompt_tokens if response.usage else 0
+    _assert_token_count_within_tolerance(our_count.input_tokens, api_prompt_tokens, test_name='with_name_field')
+
+
+@pytest.mark.vcr()
+async def test_count_tokens_gpt4o_mini(
+    allow_model_requests: None,
+    openai_api_key: str,
+    mock_tiktoken_encoding: None,
+):
+    """Verify token counting for gpt-4o-mini model against OpenAI API."""
+    test_messages: list[ModelMessage] = [
+        ModelRequest(
+            parts=[
+                SystemPromptPart(
+                    content='You are a helpful assistant.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+                UserPromptPart(
+                    content='Explain quantum computing in one sentence.',
+                    timestamp=IsNow(tz=timezone.utc),
+                ),
+            ],
+            run_id=IsStr(),
+        )
+    ]
+
+    chat_model = OpenAIChatModel('gpt-4o-mini', provider=OpenAIProvider(api_key=openai_api_key))
+    our_count: RequestUsage = await chat_model.count_tokens(test_messages, {}, ModelRequestParameters())
+
+    openai_messages = await chat_model._map_messages(test_messages, ModelRequestParameters())  # pyright: ignore[reportPrivateUsage]
+    response = await chat_model.client.chat.completions.create(
+        model='gpt-4o-mini',
+        messages=openai_messages,
+        max_completion_tokens=1,
+    )
+
+    api_prompt_tokens = response.usage.prompt_tokens if response.usage else 0
+    _assert_token_count_within_tolerance(our_count.input_tokens, api_prompt_tokens, test_name='gpt4o_mini')
+
+
+@pytest.mark.vcr()
+async def test_openai_model_usage_limit_not_exceeded(
+    allow_model_requests: None,
+    openai_api_key: str,
+    mock_tiktoken_encoding: None,
+):
+    provider = OpenAIProvider(api_key=openai_api_key)
+    model = OpenAIResponsesModel('gpt-4', provider=provider)
+    agent = Agent(model=model)
+
+    result = await agent.run(
+        'The quick brown fox jumps over the lazydog.',
+        usage_limits=UsageLimits(input_tokens_limit=25, count_tokens_before_request=True),
+    )
+    assert result.output == snapshot(
+        "This sentence is famous because it contains every letter in the English alphabet. It's often used to display different fonts or for typing practice. Interestingly, the dog's supposed laziness does not intervene with the fox's athletic endeavor."
+    )
+
+
+@pytest.mark.vcr()
+async def test_openai_model_usage_limit_exceeded(
+    allow_model_requests: None,
+    openai_api_key: str,
+    mock_tiktoken_encoding: None,
+):
+    provider = OpenAIProvider(api_key=openai_api_key)
+    model = OpenAIResponsesModel('gpt-4', provider=provider)
+    agent = Agent(model=model)
+
+    with pytest.raises(
+        UsageLimitExceeded, match='The next request would exceed the input_tokens_limit of 25 \\(input_tokens=28\\)'
+    ):
+        _ = await agent.run(
+            'The quick brown fox jumps over the lazydog. The quick brown fox jumps over the lazydog.',
+            usage_limits=UsageLimits(input_tokens_limit=25, count_tokens_before_request=True),
+        )
+
+
+@pytest.mark.vcr()
+async def test_unsupported_model(
+    allow_model_requests: None,
+    openai_api_key: str,
+    mock_tiktoken_encoding: None,
+):
+    ollama_model = OpenAIChatModel(
+        model_name='llama3.2:1b',
+        provider=OllamaProvider(base_url='http://localhost:11434/v1'),
+    )
+    agent = Agent(model=ollama_model)
+
+    with pytest.raises(NotImplementedError, match='Token counting is only supported for OpenAI system.'):
+        _ = await agent.run(
+            'Hello, world!', usage_limits=UsageLimits(input_tokens_limit=25, count_tokens_before_request=True)
+        )
diff --git a/uv.lock b/uv.lock
index 30fbbf591c..a6929c03f6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -5609,6 +5609,7 @@ mistral = [
 ]
 openai = [
     { name = "openai" },
+    { name = "tiktoken" },
 ]
 openrouter = [
     { name = "openai" },
@@ -5702,6 +5703,7 @@ requires-dist = [
     { name = "tavily-python", marker = "extra == 'tavily'", specifier = ">=0.5.0" },
     { name = "temporalio", marker = "extra == 'temporal'", specifier = "==1.20.0" },
     { name = "tenacity", marker = "extra == 'retries'", specifier = ">=8.2.3" },
+    { name = "tiktoken", marker = "extra == 'openai'", specifier = ">=0.12.0" },
     { name = "torch", marker = "(platform_machine != 'x86_64' and extra == 'outlines-transformers') or (sys_platform != 'darwin' and extra == 'outlines-transformers')" },
     { name = "torch", marker = "(platform_machine != 'x86_64' and extra == 'outlines-vllm-offline') or (sys_platform != 'darwin' and extra == 'outlines-vllm-offline')" },
     { name = "transformers", marker = "extra == 'outlines-transformers'", specifier = ">=4.0.0" },
@@ -7417,38 +7419,63 @@ wheels = [
 
 [[package]]
 name = "tiktoken"
-version = "0.9.0"
+version = "0.12.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "regex" },
     { name = "requests" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ea/cf/756fedf6981e82897f2d570dd25fa597eb3f4459068ae0572d7e888cfd6f/tiktoken-0.9.0.tar.gz", hash = "sha256:d02a5ca6a938e0490e1ff957bc48c8b078c88cb83977be1625b1fd8aac792c5d", size = 35991, upload-time = "2025-02-14T06:03:01.003Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/64/f3/50ec5709fad61641e4411eb1b9ac55b99801d71f1993c29853f256c726c9/tiktoken-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:586c16358138b96ea804c034b8acf3f5d3f0258bd2bc3b0227af4af5d622e382", size = 1065770, upload-time = "2025-02-14T06:02:01.251Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/f8/5a9560a422cf1755b6e0a9a436e14090eeb878d8ec0f80e0cd3d45b78bf4/tiktoken-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d9c59ccc528c6c5dd51820b3474402f69d9a9e1d656226848ad68a8d5b2e5108", size = 1009314, upload-time = "2025-02-14T06:02:02.869Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/20/3ed4cfff8f809cb902900ae686069e029db74567ee10d017cb254df1d598/tiktoken-0.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0968d5beeafbca2a72c595e8385a1a1f8af58feaebb02b227229b69ca5357fd", size = 1143140, upload-time = "2025-02-14T06:02:04.165Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/95/cc2c6d79df8f113bdc6c99cdec985a878768120d87d839a34da4bd3ff90a/tiktoken-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92a5fb085a6a3b7350b8fc838baf493317ca0e17bd95e8642f95fc69ecfed1de", size = 1197860, upload-time = "2025-02-14T06:02:06.268Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/6c/9c1a4cc51573e8867c9381db1814223c09ebb4716779c7f845d48688b9c8/tiktoken-0.9.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:15a2752dea63d93b0332fb0ddb05dd909371ededa145fe6a3242f46724fa7990", size = 1259661, upload-time = "2025-02-14T06:02:08.889Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/4c/22eb8e9856a2b1808d0a002d171e534eac03f96dbe1161978d7389a59498/tiktoken-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:26113fec3bd7a352e4b33dbaf1bd8948de2507e30bd95a44e2b1156647bc01b4", size = 894026, upload-time = "2025-02-14T06:02:12.841Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/ae/4613a59a2a48e761c5161237fc850eb470b4bb93696db89da51b79a871f1/tiktoken-0.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f32cc56168eac4851109e9b5d327637f15fd662aa30dd79f964b7c39fbadd26e", size = 1065987, upload-time = "2025-02-14T06:02:14.174Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/86/55d9d1f5b5a7e1164d0f1538a85529b5fcba2b105f92db3622e5d7de6522/tiktoken-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:45556bc41241e5294063508caf901bf92ba52d8ef9222023f83d2483a3055348", size = 1009155, upload-time = "2025-02-14T06:02:15.384Z" },
-    { url = "https://files.pythonhosted.org/packages/03/58/01fb6240df083b7c1916d1dcb024e2b761213c95d576e9f780dfb5625a76/tiktoken-0.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03935988a91d6d3216e2ec7c645afbb3d870b37bcb67ada1943ec48678e7ee33", size = 1142898, upload-time = "2025-02-14T06:02:16.666Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/73/41591c525680cd460a6becf56c9b17468d3711b1df242c53d2c7b2183d16/tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b3d80aad8d2c6b9238fc1a5524542087c52b860b10cbf952429ffb714bc1136", size = 1197535, upload-time = "2025-02-14T06:02:18.595Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/7c/1069f25521c8f01a1a182f362e5c8e0337907fae91b368b7da9c3e39b810/tiktoken-0.9.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b2a21133be05dc116b1d0372af051cd2c6aa1d2188250c9b553f9fa49301b336", size = 1259548, upload-time = "2025-02-14T06:02:20.729Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/07/c67ad1724b8e14e2b4c8cca04b15da158733ac60136879131db05dda7c30/tiktoken-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:11a20e67fdf58b0e2dea7b8654a288e481bb4fc0289d3ad21291f8d0849915fb", size = 893895, upload-time = "2025-02-14T06:02:22.67Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/e5/21ff33ecfa2101c1bb0f9b6df750553bd873b7fb532ce2cb276ff40b197f/tiktoken-0.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e88f121c1c22b726649ce67c089b90ddda8b9662545a8aeb03cfef15967ddd03", size = 1065073, upload-time = "2025-02-14T06:02:24.768Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/03/a95e7b4863ee9ceec1c55983e4cc9558bcfd8f4f80e19c4f8a99642f697d/tiktoken-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6600660f2f72369acb13a57fb3e212434ed38b045fd8cc6cdd74947b4b5d210", size = 1008075, upload-time = "2025-02-14T06:02:26.92Z" },
-    { url = "https://files.pythonhosted.org/packages/40/10/1305bb02a561595088235a513ec73e50b32e74364fef4de519da69bc8010/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95e811743b5dfa74f4b227927ed86cbc57cad4df859cb3b643be797914e41794", size = 1140754, upload-time = "2025-02-14T06:02:28.124Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/40/da42522018ca496432ffd02793c3a72a739ac04c3794a4914570c9bb2925/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99376e1370d59bcf6935c933cb9ba64adc29033b7e73f5f7569f3aad86552b22", size = 1196678, upload-time = "2025-02-14T06:02:29.845Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/41/1e59dddaae270ba20187ceb8aa52c75b24ffc09f547233991d5fd822838b/tiktoken-0.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:badb947c32739fb6ddde173e14885fb3de4d32ab9d8c591cbd013c22b4c31dd2", size = 1259283, upload-time = "2025-02-14T06:02:33.838Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/64/b16003419a1d7728d0d8c0d56a4c24325e7b10a21a9dd1fc0f7115c02f0a/tiktoken-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:5a62d7a25225bafed786a524c1b9f0910a1128f4232615bf3f8257a73aaa3b16", size = 894897, upload-time = "2025-02-14T06:02:36.265Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/11/09d936d37f49f4f494ffe660af44acd2d99eb2429d60a57c71318af214e0/tiktoken-0.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2b0e8e05a26eda1249e824156d537015480af7ae222ccb798e5234ae0285dbdb", size = 1064919, upload-time = "2025-02-14T06:02:37.494Z" },
-    { url = "https://files.pythonhosted.org/packages/80/0e/f38ba35713edb8d4197ae602e80837d574244ced7fb1b6070b31c29816e0/tiktoken-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:27d457f096f87685195eea0165a1807fae87b97b2161fe8c9b1df5bd74ca6f63", size = 1007877, upload-time = "2025-02-14T06:02:39.516Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/82/9197f77421e2a01373e27a79dd36efdd99e6b4115746ecc553318ecafbf0/tiktoken-0.9.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cf8ded49cddf825390e36dd1ad35cd49589e8161fdcb52aa25f0583e90a3e01", size = 1140095, upload-time = "2025-02-14T06:02:41.791Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/bb/4513da71cac187383541facd0291c4572b03ec23c561de5811781bbd988f/tiktoken-0.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc156cb314119a8bb9748257a2eaebd5cc0753b6cb491d26694ed42fc7cb3139", size = 1195649, upload-time = "2025-02-14T06:02:43Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/5c/74e4c137530dd8504e97e3a41729b1103a4ac29036cbfd3250b11fd29451/tiktoken-0.9.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cd69372e8c9dd761f0ab873112aba55a0e3e506332dd9f7522ca466e817b1b7a", size = 1258465, upload-time = "2025-02-14T06:02:45.046Z" },
-    { url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669, upload-time = "2025-02-14T06:02:47.341Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/89/b3/2cb7c17b6c4cf8ca983204255d3f1d95eda7213e247e6947a0ee2c747a2c/tiktoken-0.12.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3de02f5a491cfd179aec916eddb70331814bd6bf764075d39e21d5862e533970", size = 1051991, upload-time = "2025-10-06T20:21:34.098Z" },
+    { url = "https://files.pythonhosted.org/packages/27/0f/df139f1df5f6167194ee5ab24634582ba9a1b62c6b996472b0277ec80f66/tiktoken-0.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b6cfb6d9b7b54d20af21a912bfe63a2727d9cfa8fbda642fd8322c70340aad16", size = 995798, upload-time = "2025-10-06T20:21:35.579Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/5d/26a691f28ab220d5edc09b9b787399b130f24327ef824de15e5d85ef21aa/tiktoken-0.12.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:cde24cdb1b8a08368f709124f15b36ab5524aac5fa830cc3fdce9c03d4fb8030", size = 1129865, upload-time = "2025-10-06T20:21:36.675Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/94/443fab3d4e5ebecac895712abd3849b8da93b7b7dec61c7db5c9c7ebe40c/tiktoken-0.12.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:6de0da39f605992649b9cfa6f84071e3f9ef2cec458d08c5feb1b6f0ff62e134", size = 1152856, upload-time = "2025-10-06T20:21:37.873Z" },
+    { url = "https://files.pythonhosted.org/packages/54/35/388f941251b2521c70dd4c5958e598ea6d2c88e28445d2fb8189eecc1dfc/tiktoken-0.12.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6faa0534e0eefbcafaccb75927a4a380463a2eaa7e26000f0173b920e98b720a", size = 1195308, upload-time = "2025-10-06T20:21:39.577Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/00/c6681c7f833dd410576183715a530437a9873fa910265817081f65f9105f/tiktoken-0.12.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:82991e04fc860afb933efb63957affc7ad54f83e2216fe7d319007dab1ba5892", size = 1255697, upload-time = "2025-10-06T20:21:41.154Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/d2/82e795a6a9bafa034bf26a58e68fe9a89eeaaa610d51dbeb22106ba04f0a/tiktoken-0.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:6fb2995b487c2e31acf0a9e17647e3b242235a20832642bb7a9d1a181c0c1bb1", size = 879375, upload-time = "2025-10-06T20:21:43.201Z" },
+    { url = "https://files.pythonhosted.org/packages/de/46/21ea696b21f1d6d1efec8639c204bdf20fde8bafb351e1355c72c5d7de52/tiktoken-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e227c7f96925003487c33b1b32265fad2fbcec2b7cf4817afb76d416f40f6bb", size = 1051565, upload-time = "2025-10-06T20:21:44.566Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d9/35c5d2d9e22bb2a5f74ba48266fb56c63d76ae6f66e02feb628671c0283e/tiktoken-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06cf0fcc24c2cb2adb5e185c7082a82cba29c17575e828518c2f11a01f445aa", size = 995284, upload-time = "2025-10-06T20:21:45.622Z" },
+    { url = "https://files.pythonhosted.org/packages/01/84/961106c37b8e49b9fdcf33fe007bb3a8fdcc380c528b20cc7fbba80578b8/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f18f249b041851954217e9fd8e5c00b024ab2315ffda5ed77665a05fa91f42dc", size = 1129201, upload-time = "2025-10-06T20:21:47.074Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/d0/3d9275198e067f8b65076a68894bb52fd253875f3644f0a321a720277b8a/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:47a5bc270b8c3db00bb46ece01ef34ad050e364b51d406b6f9730b64ac28eded", size = 1152444, upload-time = "2025-10-06T20:21:48.139Z" },
+    { url = "https://files.pythonhosted.org/packages/78/db/a58e09687c1698a7c592e1038e01c206569b86a0377828d51635561f8ebf/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:508fa71810c0efdcd1b898fda574889ee62852989f7c1667414736bcb2b9a4bd", size = 1195080, upload-time = "2025-10-06T20:21:49.246Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/1b/a9e4d2bf91d515c0f74afc526fd773a812232dd6cda33ebea7f531202325/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1af81a6c44f008cba48494089dd98cccb8b313f55e961a52f5b222d1e507967", size = 1255240, upload-time = "2025-10-06T20:21:50.274Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/15/963819345f1b1fb0809070a79e9dd96938d4ca41297367d471733e79c76c/tiktoken-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:3e68e3e593637b53e56f7237be560f7a394451cb8c11079755e80ae64b9e6def", size = 879422, upload-time = "2025-10-06T20:21:51.734Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728, upload-time = "2025-10-06T20:21:52.756Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049, upload-time = "2025-10-06T20:21:53.782Z" },
+    { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008, upload-time = "2025-10-06T20:21:54.832Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665, upload-time = "2025-10-06T20:21:56.129Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230, upload-time = "2025-10-06T20:21:57.546Z" },
+    { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688, upload-time = "2025-10-06T20:21:58.619Z" },
+    { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694, upload-time = "2025-10-06T20:21:59.876Z" },
+    { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload-time = "2025-10-06T20:22:00.96Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload-time = "2025-10-06T20:22:02.788Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload-time = "2025-10-06T20:22:03.814Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload-time = "2025-10-06T20:22:05.173Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload-time = "2025-10-06T20:22:06.265Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload-time = "2025-10-06T20:22:07.403Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117, upload-time = "2025-10-06T20:22:08.418Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload-time = "2025-10-06T20:22:10.939Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload-time = "2025-10-06T20:22:12.115Z" },
+    { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload-time = "2025-10-06T20:22:13.541Z" },
+    { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload-time = "2025-10-06T20:22:14.559Z" },
+    { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload-time = "2025-10-06T20:22:15.545Z" },
+    { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload-time = "2025-10-06T20:22:16.624Z" },
+    { url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777, upload-time = "2025-10-06T20:22:18.036Z" },
+    { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload-time = "2025-10-06T20:22:19.563Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload-time = "2025-10-06T20:22:20.702Z" },
+    { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload-time = "2025-10-06T20:22:22.06Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload-time = "2025-10-06T20:22:23.085Z" },
+    { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload-time = "2025-10-06T20:22:24.602Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload-time = "2025-10-06T20:22:25.671Z" },
+    { url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067, upload-time = "2025-10-06T20:22:26.753Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload-time = "2025-10-06T20:22:27.775Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload-time = "2025-10-06T20:22:28.799Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload-time = "2025-10-06T20:22:29.981Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload-time = "2025-10-06T20:22:30.996Z" },
+    { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload-time = "2025-10-06T20:22:32.073Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" },
+    { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" },
 ]
 
 [[package]]