Skip to content

Commit

Permalink
Merge pull request #65 from it-at-m/fix-max-tokens
Browse files Browse the repository at this point in the history
Fix max tokens and version
  • Loading branch information
pilitz authored Sep 11, 2024
2 parents 28d613b + 445bb6a commit 2b96e14
Show file tree
Hide file tree
Showing 24 changed files with 172 additions and 101 deletions.
13 changes: 7 additions & 6 deletions app/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@ async def chat_stream():
try:
impl = cfg["chat_approaches"]
temperature=request_json['temperature'] or 0.7
max_tokens=request_json['max_tokens'] or 4096
max_output_tokens=request_json['max_output_tokens'] or 4096
system_message = request_json['system_message'] or None
model = request_json['model'] or "gpt-4o-mini"
response_generator = impl.run_with_streaming(history= request_json["history"],
temperature=temperature,
max_tokens=max_tokens,
max_output_tokens=max_output_tokens,
system_message=system_message,
model=model,
department= department)
Expand All @@ -128,13 +128,13 @@ async def chat():
try:
impl = cfg["chat_approaches"]
temperature=request_json['temperature'] or 0.7
max_tokens=request_json['max_tokens'] or 4096
max_output_tokens=request_json['max_output_tokens'] or 4096
model_name=request_json['model'] or "gpt-4o-mini"
system_message = request_json['system_message'] or None
history = request_json["history"]
chatResult = impl.run_without_streaming(history= history,
temperature=temperature,
max_tokens=max_tokens,
max_output_tokens=max_output_tokens,
system_message=system_message,
department= department,
model_name= model_name)
Expand All @@ -150,11 +150,12 @@ async def getConfig():
models= cast(List[ModelsConfig], cfg["configuration_features"]["backend"]["models"])
models_dto_list = []
for model in models:
dto = ModelsDTO(model_name=model["model_name"], max_tokens=model["max_tokens"], description=model["description"])
dto = ModelsDTO(model_name=model["model_name"], max_output_tokens=model["max_output_tokens"], max_input_tokens=model["max_input_tokens"], description=model["description"])
models_dto_list.append(dto)
return jsonify({
"frontend": frontend_features,
"models": models_dto_list
"models": models_dto_list,
"version": cfg["configuration_features"]["version"]
})

@bp.route("/statistics", methods=["GET"])
Expand Down
12 changes: 6 additions & 6 deletions app/backend/chat/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repo
self.config = config
self.repo = repo

async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: int, temperature: float, system_message: Optional[str], model: str, department: Optional[str]) -> AsyncGenerator[Chunk, None]:
async def run_with_streaming(self, history: 'list[dict[str, str]]',max_output_tokens: int, temperature: float, system_message: Optional[str], model: str, department: Optional[str]) -> AsyncGenerator[Chunk, None]:
"""call the llm in streaming mode
Args:
history (list[dict[str, str]]): the history,user and ai messages
max_tokens (int): max_tokens to generate
max_output_tokens (int): max_output_tokens to generate
temperature (float): temperature of the llm
system_message (Optional[str]): the system message
department (Optional[str]): from which department comes the call
Expand All @@ -40,7 +40,7 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i
"""
# configure
config: LlmConfigs = {
"llm_max_tokens": max_tokens,
"llm_max_tokens": max_output_tokens,
"llm_temperature": temperature,
"llm_streaming": True,
"llm": model
Expand Down Expand Up @@ -74,12 +74,12 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i
info = ChunkInfo(requesttokens=num_tokens_from_messages([msgs[-1]],model), streamedtokens=num_tokens_from_messages([HumanMessage(result)], model))
yield Chunk(type="I", message=info, order=position)

def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str], model_name:str) -> ChatResult:
def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_output_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str], model_name:str) -> ChatResult:
"""calls the llm in blocking mode, returns the full result
Args:
history (list[dict[str, str]]): the history,user and ai messages
max_tokens (int): max_tokens to generate
max_output_tokens (int): max_output_tokens to generate
temperature (float): temperature of the llm
system_message (Optional[str]): the system message
department (Optional[str]): from which department comes the call
Expand All @@ -88,7 +88,7 @@ def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens:
ChatResult: the generated text from the llm
"""
config: LlmConfigs = {
"llm_max_tokens": max_tokens,
"llm_max_tokens": max_output_tokens,
"llm_temperature": temperature,
"llm_streaming": False,
}
Expand Down
56 changes: 46 additions & 10 deletions app/backend/core/llmhelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class ModelsConfigurationException(Exception):


def getModel(models: List[ModelsConfig],
max_tokens: int,
max_output_tokens: int,
n: int,
temperature: float,
streaming: bool) -> RunnableSerializable:
Expand All @@ -31,7 +31,7 @@ def getModel(models: List[ModelsConfig],
openai_api_key=default_model["api_key"],
azure_endpoint=default_model["endpoint"],
openai_api_version=default_model["api_version"],
max_tokens=max_tokens,
max_tokens=max_output_tokens,
n=n,
streaming=streaming,
temperature=temperature,
Expand All @@ -42,7 +42,7 @@ def getModel(models: List[ModelsConfig],
model=default_model["model_name"],
api_key=default_model["api_key"],
base_url=default_model["endpoint"],
max_tokens=max_tokens,
max_tokens=max_output_tokens,
n=n,
streaming=streaming,
temperature=temperature,
Expand All @@ -59,20 +59,60 @@ def getModel(models: List[ModelsConfig],
azure_endpoint=model["endpoint"],
openai_api_version=model["api_version"],
openai_api_type="azure",
max_tokens=max_tokens,
max_tokens=max_output_tokens,
n=n,
streaming=streaming,
temperature=temperature,
)
).configurable_fields(
temperature=ConfigurableField(
id="llm_temperature",
name="LLM Temperature",
description="The temperature of the LLM",
),
max_tokens= ConfigurableField(
id="llm_max_tokens",
name="LLM max Tokens",
description="The token Limit of the LLM",
),
streaming = ConfigurableField(
id="llm_streaming",
name="Streaming",
description="Should the LLM Stream"),
callbacks = ConfigurableField(
id="llm_callbacks",
name="Callbacks",
description="Callbacks for the llm")

)
elif model["type"] == "OPENAI":
alternative = ChatOpenAI(
model=model["model_name"],
api_key=model["api_key"],
base_url=model["endpoint"],
max_tokens=max_tokens,
max_tokens=max_output_tokens,
n=n,
streaming=streaming,
temperature=temperature,
).configurable_fields(
temperature=ConfigurableField(
id="llm_temperature",
name="LLM Temperature",
description="The temperature of the LLM",
),
max_tokens= ConfigurableField(
id="llm_max_tokens",
name="LLM max Tokens",
description="The token Limit of the LLM",
),
streaming = ConfigurableField(
id="llm_streaming",
name="Streaming",
description="Should the LLM Stream"),
callbacks = ConfigurableField(
id="llm_callbacks",
name="Callbacks",
description="Callbacks for the llm")

)
alternatives[model["model_name"]] = alternative
llm = llm.configurable_fields(
Expand All @@ -86,10 +126,6 @@ def getModel(models: List[ModelsConfig],
name="LLM max Tokens",
description="The token Limit of the LLM",
),
openai_api_key = ConfigurableField(
id="llm_api_key",
name="The api key",
description="The api key"),
streaming = ConfigurableField(
id="llm_streaming",
name="Streaming",
Expand Down
6 changes: 4 additions & 2 deletions app/backend/core/types/Config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ class ModelsConfig(TypedDict):
endpoint: str
api_key: str
api_version: str
max_tokens: int
max_output_tokens: int
max_input_tokens: int

class ModelsDTO(TypedDict):
model_name: str
max_tokens: int
max_output_tokens: int
max_input_tokens: int
description: str

class SSOConfig(TypedDict):
Expand Down
1 change: 0 additions & 1 deletion app/backend/core/types/LlmConfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,5 @@ class LlmConfigs(TypedDict, total=False):
llm: NotRequired[str] # one of the SupportedModels
llm_max_tokens: NotRequired[int]
llm_temperature: NotRequired[float]
llm_api_key: NotRequired[str]
llm_streaming: NotRequired[bool]
llm_callbacks: NotRequired[List]
6 changes: 3 additions & 3 deletions app/backend/init_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,19 @@ def initApproaches(cfg: BackendConfig, repoHelper: Repository) -> Tuple[Chat, Br
"""
brainstormllm = getModel(
models=cfg["models"],
max_tokens = 4000,
max_output_tokens = 4000,
n = 1,
streaming=False,
temperature=0.9)
sumllm = getModel(
models=cfg["models"],
max_tokens = 2000,
max_output_tokens = 2000,
n = 1,
streaming=False,
temperature=0)
chatlllm = getModel(
models=cfg["models"],
max_tokens=4000,
max_output_tokens=4000,
n = 1,
streaming=True,
temperature=0.7)
Expand Down
2 changes: 1 addition & 1 deletion app/frontend/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "mucgpt",
"private": true,
"version": "1.1.3",
"version": "1.1.4",
"type": "module",
"engines": {
"node": ">=16.0.0"
Expand Down
2 changes: 1 addition & 1 deletion app/frontend/src/api/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export async function chatApi(options: ChatRequest): Promise<Response> {
temperature: options.temperature,
language: options.language,
system_message: options.system_message,
max_tokens: options.max_tokens,
max_output_tokens: options.max_output_tokens,
model: options.model
})
});
Expand Down
5 changes: 3 additions & 2 deletions app/frontend/src/api/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export type ChatRequest = {
history: ChatTurn[];
temperature?: number;
language?: string;
max_tokens?: number;
max_output_tokens?: number;
system_message?: string;
shouldStream?: boolean;
model?: string;
Expand Down Expand Up @@ -50,7 +50,8 @@ export interface Frontend {
}

export interface Model {
max_tokens: number;
max_output_tokens: number;
max_input_tokens: number;
model_name: string;
description: string;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ import { LLMContext } from "../LLMSelector/LLMContextProvider";
interface Props {
temperature: number;
setTemperature: (temp: number, id: number) => void;
max_tokens: number;
max_output_tokens: number;
setMaxTokens: (maxTokens: number, id: number) => void;
systemPrompt: string;
setSystemPrompt: (systemPrompt: string, id: number) => void;
current_id: number;
}

export const ChatsettingsDrawer = ({ temperature, setTemperature, max_tokens, setMaxTokens, systemPrompt, setSystemPrompt, current_id }: Props) => {
export const ChatsettingsDrawer = ({ temperature, setTemperature, max_output_tokens, setMaxTokens, systemPrompt, setSystemPrompt, current_id }: Props) => {
const [isOpen, setIsOpen] = useState<boolean>(false);
const { t, i18n } = useTranslation();
const { LLM } = useContext(LLMContext)
Expand All @@ -41,11 +41,10 @@ export const ChatsettingsDrawer = ({ temperature, setTemperature, max_tokens, se
const max_tokensID = useId("input-max_tokens");

const min_max_tokens = 10;
const max_max_tokens = LLM.max_tokens;
const max_max_tokens = LLM.max_output_tokens;
const min_temp = 0;
const max_temp = 1;


const isEmptySystemPrompt = systemPrompt.trim() === "";

const onTemperatureChange: SliderProps["onChange"] = (_, data) =>
Expand Down Expand Up @@ -144,12 +143,12 @@ export const ChatsettingsDrawer = ({ temperature, setTemperature, max_tokens, se
defaultValue={20}
onChange={onMaxtokensChange}
aria-valuetext={t('components.chattsettingsdrawer.max_lenght') + ` ist ${max_tokensID}`}
value={max_tokens}
value={max_output_tokens}
aria-labelledby={max_tokens_headerID}
id={max_tokensID} />
<br></br>
<Label htmlFor={max_tokensID} aria-hidden>
{max_tokens} Tokens
{max_output_tokens} Tokens
</Label>
</div>
</div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ interface ILLMProvider {
}

export const DEFAULTLLM = "gpt-4o-mini";
export const LLMContext = React.createContext<ILLMProvider>({ LLM: { model_name: DEFAULTLLM, max_tokens: 0, description: "" }, setLLM: () => { } });
export const LLMContext = React.createContext<ILLMProvider>({ LLM: { model_name: DEFAULTLLM, max_output_tokens: 0, max_input_tokens: 0, description: "" }, setLLM: () => { } });

export const LLMContextProvider = (props: React.PropsWithChildren<{}>) => {
const [LLM, setLLM] = useState<Model>({ model_name: DEFAULTLLM, max_tokens: 0, description: "" });
const [LLM, setLLM] = useState<Model>({ model_name: DEFAULTLLM, max_output_tokens: 0, max_input_tokens: 0, description: "" });

return (
<LLMContext.Provider value={{ LLM, setLLM }}>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ interface Props {
export const QuestionInput = ({ onSend, disabled, placeholder, clearOnSend, tokens_used, token_limit_tracking = true, question, setQuestion }: Props) => {
const { t, i18n } = useTranslation();
const { LLM } = useContext(LLMContext)
const wordCount = LLM.max_tokens;
const wordCount = LLM.max_input_tokens;
const getDescription = () => {
let actual = countWords(question) + tokens_used;
let text;
Expand Down
2 changes: 1 addition & 1 deletion app/frontend/src/components/SumInput/SumInput.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ export const SumInput = ({ onSend, disabled, placeholder, clearOnSend, tokens_us
const [dragging, setDragging] = useState(false);
const [file, setFile] = useState<File | undefined>(undefined);
const { LLM } = useContext(LLMContext)
const wordCount = LLM.max_tokens;
const wordCount = LLM.max_input_tokens;
const getDescription = () => {
let actual = countWords(question) + tokens_used;
let text;
Expand Down
Loading

0 comments on commit 2b96e14

Please sign in to comment.