From 5d3fe04e2aca835f5492e85570458e7e60513c52 Mon Sep 17 00:00:00 2001 From: ericcccliu Date: Fri, 26 Apr 2024 15:54:15 -0500 Subject: [PATCH] fix alllama usage (remove usage tracking) --- api/utils/llm_providers/alllama.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/api/utils/llm_providers/alllama.py b/api/utils/llm_providers/alllama.py index a565c6c..3df89e7 100644 --- a/api/utils/llm_providers/alllama.py +++ b/api/utils/llm_providers/alllama.py @@ -1,11 +1,7 @@ # api/utils/llm_providers/alllama.py - -import tiktoken from openai import AsyncOpenAI from starlette.config import Config -from api.utils.conversation_utils import update_user_usage - config = Config(".env") client = AsyncOpenAI( base_url=config("ALLLAMA_API_BASE_URL"), api_key=config("ALLLAMA_API_KEY") @@ -17,10 +13,6 @@ async def alllama_generate_response(conversation): for message in conversation.messages ] - # Count the input tokens - encoding = tiktoken.encoding_for_model(conversation.model.name) - input_tokens = sum(len(encoding.encode(message["content"])) for message in messages) - stream = await client.chat.completions.create( max_tokens=1500, model=conversation.model.name, @@ -29,16 +21,10 @@ async def alllama_generate_response(conversation): ) collected_chunks = [] - output_tokens = 0 async for chunk in stream: content = chunk.choices[0].delta.content if content is None: content = "" collected_chunks.append(content) - output_tokens += len(encoding.encode(content)) yield content - # Update the user's usage - await update_user_usage( - conversation.user_email, conversation.model.name, input_tokens, output_tokens - )