From 3fee797904a3f50551acdf8059266ad825eff538 Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Wed, 11 Sep 2024 11:52:27 +0200 Subject: [PATCH 1/6] :bug: fixed bug with max_tokens --- app/backend/app.py | 10 +- app/backend/backend.py | 260 ++++++++++++++++++ app/backend/chat/chat.py | 12 +- app/backend/core/llmhelper.py | 10 +- app/backend/core/types/Config.py | 6 +- app/backend/init_app.py | 6 +- app/frontend/src/api/api.ts | 2 +- app/frontend/src/api/models.ts | 5 +- .../ChatsettingsDrawer/ChatsettingsDrawer.tsx | 11 +- .../LLMSelector/LLMContextProvider.tsx | 4 +- .../QuestionInput/QuestionInput.tsx | 2 +- .../src/components/SumInput/SumInput.tsx | 2 +- app/frontend/src/pages/chat/Chat.tsx | 21 +- app/frontend/src/pages/layout/Layout.tsx | 6 +- app/frontend/src/service/storage.ts | 14 +- config/default.json | 6 +- config/mucgpt_config.schema.json | 39 ++- tests/integration/test_app.py | 13 +- tests/integration/test_config.json | 3 +- tests/unit/test_llmhelper.py | 23 +- 20 files changed, 367 insertions(+), 88 deletions(-) create mode 100644 app/backend/backend.py diff --git a/app/backend/app.py b/app/backend/app.py index db6b7789..28205749 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -101,12 +101,12 @@ async def chat_stream(): try: impl = cfg["chat_approaches"] temperature=request_json['temperature'] or 0.7 - max_tokens=request_json['max_tokens'] or 4096 + max_output_tokens=request_json['max_output_tokens'] or 4096 system_message = request_json['system_message'] or None model = request_json['model'] or "gpt-4o-mini" response_generator = impl.run_with_streaming(history= request_json["history"], temperature=temperature, - max_tokens=max_tokens, + max_output_tokens=max_output_tokens, system_message=system_message, model=model, department= department) @@ -128,13 +128,13 @@ async def chat(): try: impl = cfg["chat_approaches"] temperature=request_json['temperature'] or 0.7 - max_tokens=request_json['max_tokens'] or 4096 + max_output_tokens=request_json['max_output_tokens'] or 4096 model_name=request_json['model'] or "gpt-4o-mini" system_message = request_json['system_message'] or None history = request_json["history"] chatResult = impl.run_without_streaming(history= history, temperature=temperature, - max_tokens=max_tokens, + max_output_tokens=max_output_tokens, system_message=system_message, department= department, model_name= model_name) @@ -150,7 +150,7 @@ async def getConfig(): models= cast(List[ModelsConfig], cfg["configuration_features"]["backend"]["models"]) models_dto_list = [] for model in models: - dto = ModelsDTO(model_name=model["model_name"], max_tokens=model["max_tokens"], description=model["description"]) + dto = ModelsDTO(model_name=model["model_name"], max_output_tokens=model["max_output_tokens"], max_input_tokens=model["max_input_tokens"], description=model["description"]) models_dto_list.append(dto) return jsonify({ "frontend": frontend_features, diff --git a/app/backend/backend.py b/app/backend/backend.py new file mode 100644 index 00000000..4e5bae6c --- /dev/null +++ b/app/backend/backend.py @@ -0,0 +1,260 @@ +import io +import logging +from contextlib import asynccontextmanager +from typing import List, cast + +from fastapi import FastAPI, File, Form, Header, HTTPException, Request, UploadFile +from fastapi.responses import ( + FileResponse, + HTMLResponse, + JSONResponse, + RedirectResponse, + StreamingResponse, +) +from fastapi.staticfiles import StaticFiles +from langchain_core.messages.human import HumanMessage +from pydantic_core import from_json + +from core.authentification import AuthentificationHelper, AuthError +from core.helper import format_as_ndjson +from core.modelhelper import num_tokens_from_messages +from core.types.AppConfig import AppConfig +from core.types.ChatRequest import ChatRequest +from core.types.Config import ModelsConfig, ModelsDTO +from core.types.countresult import CountResult +from core.types.SumRequest import SumRequest +from init_app import initApp + + +@asynccontextmanager +async def lifespan(backend: FastAPI): + backend.state.app_config = await initApp() + yield + + +backend = FastAPI(title="MUCGPT", lifespan=lifespan) +backend.mount("/static", StaticFiles(directory="static"), name="static") +backend.state.app_config = None + + +@backend.exception_handler(AuthError) +async def handleAuthError(error: AuthError): + return error.error, error.status_code + + +@backend.get("/", include_in_schema=False) +async def index(request: Request): + get_config_and_authentificate(request) + with open("static/index.html") as f: + return HTMLResponse(content=f.read()) + + +@backend.get("/favicon.ico", include_in_schema=False) +async def favicon(): + return RedirectResponse(url="/static/favicon.ico") + + +@backend.get("/assets/{path}") +async def assets(request: Request, path: str): + get_config_and_authentificate(request) + return RedirectResponse(url="/static/assets/" + path) + +@backend.post("/sum") +async def sum( + body: str = Form(...), + file: UploadFile = File(None), + id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token"), + access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token") +): + cfg = get_config_and_authentificate(access_token=access_token) + department = get_department(id_token=id_token) + sumRequest = SumRequest.model_validate(from_json(body)) + text =sumRequest.text if file is None else None + if(file is not None): + file_content = io.BytesIO(await file.read()) + else: + file_content = None + try: + impl = cfg["sum_approaches"] + splits = impl.split(detaillevel=sumRequest.detaillevel, file=file_content, text=text) + r = await impl.summarize( + splits=splits, + department=department, + language=sumRequest.language, + model_name=sumRequest.model, + ) + return JSONResponse(content=r) + except Exception as e: + logging.exception("Exception in /sum") + return JSONResponse({"error": str(e)}, status_code=500) +#TODO remove for prod +@backend.exception_handler(HTTPException) +async def http_exception_handler(request: Request, exc: HTTPException): + logging.error(f"Error processing request: {exc.detail}") + return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail}) + + + +@backend.post("/brainstorm") +async def brainstorm(request: Request, + id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token"), + access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")): + cfg = get_config_and_authentificate(access_token=access_token) + try: + request_json = await request.json() + except ValueError: + return JSONResponse(content={"error": "request must be json"}, status_code=415) + department = get_department(id_token=id_token) + + try: + impl = cfg["brainstorm_approaches"] + r = await impl.brainstorm( + topic=request_json["topic"], + language=request_json["language"] or "Deutsch", + department=department, + model_name=request_json["model"] or "gpt-4o-mini", + ) + return JSONResponse(r) + except Exception as e: + logging.exception("Exception in /brainstorm") + msg = ( + "Momentan liegt eine starke Auslastung vor. Bitte in einigen Sekunden erneut versuchen." + if "Rate limit" in str(e) + else str(e) + ) + return JSONResponse({"error": msg}), 500 + + +@backend.post("/chat_stream") +async def chat_stream(request: ChatRequest, + access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token"), + id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token")): + cfg = get_config_and_authentificate(access_token=access_token) + department = get_department(id_token=id_token) + + try: + impl = cfg["chat_approaches"] + response_generator = impl.run_with_streaming( + history=request.history, + temperature=request.temperature, + max_tokens=request.max_tokens, + system_message=request.system_message, + model=request.model, + department=department, + ) + response = StreamingResponse(format_as_ndjson(response_generator)) + response.timeout = None # type: ignore + return response + except Exception as e: + logging.exception("Exception in /chat") + return JSONResponse({"error": str(e)}), 500 + + +@backend.post("/chat") +async def chat(request: ChatRequest, + access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token"), + id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token")): + cfg = get_config_and_authentificate(access_token=access_token) + department = get_department(id_token=id_token) + try: + impl = cfg["chat_approaches"] + chatResult = impl.run_without_streaming( + history=request.history, + temperature=request.temperature, + max_tokens=request.max_tokens, + system_message=request.system_message, + department=department, + model_name=request.model, + ) + return JSONResponse(chatResult) + except Exception as e: + logging.exception("Exception in /chat") + return JSONResponse({"error": str(e)}), 500 + + +@backend.get("/config") +async def getConfig(access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")): + cfg = get_config_and_authentificate(access_token) + frontend_features = cfg["configuration_features"]["frontend"] + models = cast( + List[ModelsConfig], cfg["configuration_features"]["backend"]["models"] + ) + models_dto_list = [] + for model in models: + dto = ModelsDTO( + model_name=model["model_name"], + max_tokens=model["max_tokens"], + description=model["description"], + ) + models_dto_list.append(dto) + return JSONResponse({"frontend": frontend_features, "models": models_dto_list}) + + +@backend.get("/statistics") +async def getStatistics(access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")): + try: + cfg = get_config_and_authentificate(access_token) + repo = cfg["repository"] + sum_by_department = repo.sumByDepartment() + avg_by_department = repo.avgByDepartment() + return JSONResponse({"sum": float(sum_by_department), "avg": float(avg_by_department)}) + except Exception as e: + return JSONResponse(content={"error": e}, status_code=404) + + +@backend.post("/counttokens") +async def counttokens(request: Request, access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token") ): + get_config_and_authentificate(access_token) + if not request.json(): + return JSONResponse({"error": "request must be json"}, status_code=415) + request_json = await request.json() + message = request_json["text"] or "" + model = request_json["model"]["model_name"] or "gpt-4o-mini" + counted_tokens = num_tokens_from_messages([HumanMessage(message)], model) + return JSONResponse(CountResult(count=counted_tokens)) + + +@backend.get("/statistics/export") +async def getStatisticsCSV(request: Request, + access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")): + try: + cfg = get_config_and_authentificate(access_token) + repo = cfg["repository"] + export = repo.export() + return FileResponse(export, filename="statistics.csv", as_attachment=True) + except Exception as e: + return JSONResponse(content={"error": e}, status_code=404) + + +@backend.get("/health") +def health_check(): + return "OK" + + +def get_config(): + return cast(AppConfig, backend.state.app_config) + + +def get_config_and_authentificate(access_token): + cfg = get_config() + if cfg["configuration_features"]["backend"]["enable_auth"]: + ensure_authentification(access_token=access_token) + return cfg + + +def ensure_authentification(access_token): + cfg = get_config() + auth_client: AuthentificationHelper = cfg["authentification_client"] + claims = auth_client.authentificate(accesstoken=access_token) + return auth_client, claims + + +def get_department(id_token): + cfg = get_config() + + if cfg["configuration_features"]["backend"]["enable_auth"]: + auth_client: AuthentificationHelper = cfg["authentification_client"] + id_claims = auth_client.decode(id_token) + return auth_client.getDepartment(claims=id_claims) + else: + return None diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py index 25a43c64..70376d26 100644 --- a/app/backend/chat/chat.py +++ b/app/backend/chat/chat.py @@ -21,12 +21,12 @@ def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repo self.config = config self.repo = repo - async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: int, temperature: float, system_message: Optional[str], model: str, department: Optional[str]) -> AsyncGenerator[Chunk, None]: + async def run_with_streaming(self, history: 'list[dict[str, str]]',max_output_tokens: int, temperature: float, system_message: Optional[str], model: str, department: Optional[str]) -> AsyncGenerator[Chunk, None]: """call the llm in streaming mode Args: history (list[dict[str, str]]): the history,user and ai messages - max_tokens (int): max_tokens to generate + max_output_tokens (int): max_output_tokens to generate temperature (float): temperature of the llm system_message (Optional[str]): the system message department (Optional[str]): from which department comes the call @@ -40,7 +40,7 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i """ # configure config: LlmConfigs = { - "llm_max_tokens": max_tokens, + "llm_max_tokens": max_output_tokens, "llm_temperature": temperature, "llm_streaming": True, "llm": model @@ -74,12 +74,12 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i info = ChunkInfo(requesttokens=num_tokens_from_messages([msgs[-1]],model), streamedtokens=num_tokens_from_messages([HumanMessage(result)], model)) yield Chunk(type="I", message=info, order=position) - def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str], model_name:str) -> ChatResult: + def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_output_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str], model_name:str) -> ChatResult: """calls the llm in blocking mode, returns the full result Args: history (list[dict[str, str]]): the history,user and ai messages - max_tokens (int): max_tokens to generate + max_output_tokens (int): max_output_tokens to generate temperature (float): temperature of the llm system_message (Optional[str]): the system message department (Optional[str]): from which department comes the call @@ -88,7 +88,7 @@ def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: ChatResult: the generated text from the llm """ config: LlmConfigs = { - "llm_max_tokens": max_tokens, + "llm_max_tokens": max_output_tokens, "llm_temperature": temperature, "llm_streaming": False, } diff --git a/app/backend/core/llmhelper.py b/app/backend/core/llmhelper.py index 65dffb05..09d2d56a 100644 --- a/app/backend/core/llmhelper.py +++ b/app/backend/core/llmhelper.py @@ -13,7 +13,7 @@ class ModelsConfigurationException(Exception): def getModel(models: List[ModelsConfig], - max_tokens: int, + max_output_tokens: int, n: int, temperature: float, streaming: bool) -> RunnableSerializable: @@ -31,7 +31,7 @@ def getModel(models: List[ModelsConfig], openai_api_key=default_model["api_key"], azure_endpoint=default_model["endpoint"], openai_api_version=default_model["api_version"], - max_tokens=max_tokens, + max_tokens=max_output_tokens, n=n, streaming=streaming, temperature=temperature, @@ -42,7 +42,7 @@ def getModel(models: List[ModelsConfig], model=default_model["model_name"], api_key=default_model["api_key"], base_url=default_model["endpoint"], - max_tokens=max_tokens, + max_tokens=max_output_tokens, n=n, streaming=streaming, temperature=temperature, @@ -59,7 +59,7 @@ def getModel(models: List[ModelsConfig], azure_endpoint=model["endpoint"], openai_api_version=model["api_version"], openai_api_type="azure", - max_tokens=max_tokens, + max_tokens=max_output_tokens, n=n, streaming=streaming, temperature=temperature, @@ -69,7 +69,7 @@ def getModel(models: List[ModelsConfig], model=model["model_name"], api_key=model["api_key"], base_url=model["endpoint"], - max_tokens=max_tokens, + max_tokens=max_output_tokens, n=n, streaming=streaming, temperature=temperature, diff --git a/app/backend/core/types/Config.py b/app/backend/core/types/Config.py index 552faba4..bfa3cc46 100644 --- a/app/backend/core/types/Config.py +++ b/app/backend/core/types/Config.py @@ -11,11 +11,13 @@ class ModelsConfig(TypedDict): endpoint: str api_key: str api_version: str - max_tokens: int + max_output_tokens: int + max_input_tokens: int class ModelsDTO(TypedDict): model_name: str - max_tokens: int + max_output_tokens: int + max_input_tokens: int description: str class SSOConfig(TypedDict): diff --git a/app/backend/init_app.py b/app/backend/init_app.py index 90ebed16..821f49a6 100644 --- a/app/backend/init_app.py +++ b/app/backend/init_app.py @@ -24,19 +24,19 @@ def initApproaches(cfg: BackendConfig, repoHelper: Repository) -> Tuple[Chat, Br """ brainstormllm = getModel( models=cfg["models"], - max_tokens = 4000, + max_output_tokens = 4000, n = 1, streaming=False, temperature=0.9) sumllm = getModel( models=cfg["models"], - max_tokens = 2000, + max_output_tokens = 2000, n = 1, streaming=False, temperature=0) chatlllm = getModel( models=cfg["models"], - max_tokens=4000, + max_output_tokens=4000, n = 1, streaming=True, temperature=0.7) diff --git a/app/frontend/src/api/api.ts b/app/frontend/src/api/api.ts index 5c379c92..e21241b5 100644 --- a/app/frontend/src/api/api.ts +++ b/app/frontend/src/api/api.ts @@ -14,7 +14,7 @@ export async function chatApi(options: ChatRequest): Promise { temperature: options.temperature, language: options.language, system_message: options.system_message, - max_tokens: options.max_tokens, + max_output_tokens: options.max_output_tokens, model: options.model }) }); diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts index 430ad10a..2f7642ee 100644 --- a/app/frontend/src/api/models.ts +++ b/app/frontend/src/api/models.ts @@ -18,7 +18,7 @@ export type ChatRequest = { history: ChatTurn[]; temperature?: number; language?: string; - max_tokens?: number; + max_output_tokens?: number; system_message?: string; shouldStream?: boolean; model?: string; @@ -50,7 +50,8 @@ export interface Frontend { } export interface Model { - max_tokens: number; + max_output_tokens: number; + max_input_tokens: number; model_name: string; description: string; } diff --git a/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx b/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx index c7041a85..db550ce1 100644 --- a/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx +++ b/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx @@ -20,14 +20,14 @@ import { LLMContext } from "../LLMSelector/LLMContextProvider"; interface Props { temperature: number; setTemperature: (temp: number, id: number) => void; - max_tokens: number; + max_output_tokens: number; setMaxTokens: (maxTokens: number, id: number) => void; systemPrompt: string; setSystemPrompt: (systemPrompt: string, id: number) => void; current_id: number; } -export const ChatsettingsDrawer = ({ temperature, setTemperature, max_tokens, setMaxTokens, systemPrompt, setSystemPrompt, current_id }: Props) => { +export const ChatsettingsDrawer = ({ temperature, setTemperature, max_output_tokens, setMaxTokens, systemPrompt, setSystemPrompt, current_id }: Props) => { const [isOpen, setIsOpen] = useState(false); const { t, i18n } = useTranslation(); const { LLM } = useContext(LLMContext) @@ -41,11 +41,10 @@ export const ChatsettingsDrawer = ({ temperature, setTemperature, max_tokens, se const max_tokensID = useId("input-max_tokens"); const min_max_tokens = 10; - const max_max_tokens = LLM.max_tokens; + const max_max_tokens = LLM.max_output_tokens; const min_temp = 0; const max_temp = 1; - const isEmptySystemPrompt = systemPrompt.trim() === ""; const onTemperatureChange: SliderProps["onChange"] = (_, data) => @@ -144,12 +143,12 @@ export const ChatsettingsDrawer = ({ temperature, setTemperature, max_tokens, se defaultValue={20} onChange={onMaxtokensChange} aria-valuetext={t('components.chattsettingsdrawer.max_lenght') + ` ist ${max_tokensID}`} - value={max_tokens} + value={max_output_tokens} aria-labelledby={max_tokens_headerID} id={max_tokensID} />

diff --git a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx index d291e69d..e1f5913f 100644 --- a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx +++ b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx @@ -8,10 +8,10 @@ interface ILLMProvider { } export const DEFAULTLLM = "gpt-4o-mini"; -export const LLMContext = React.createContext({ LLM: { model_name: DEFAULTLLM, max_tokens: 0, description: "" }, setLLM: () => { } }); +export const LLMContext = React.createContext({ LLM: { model_name: DEFAULTLLM, max_output_tokens: 0, max_input_tokens: 0, description: "" }, setLLM: () => { } }); export const LLMContextProvider = (props: React.PropsWithChildren<{}>) => { - const [LLM, setLLM] = useState({ model_name: DEFAULTLLM, max_tokens: 0, description: "" }); + const [LLM, setLLM] = useState({ model_name: DEFAULTLLM, max_output_tokens: 0, max_input_tokens: 0, description: "" }); return ( diff --git a/app/frontend/src/components/QuestionInput/QuestionInput.tsx b/app/frontend/src/components/QuestionInput/QuestionInput.tsx index 7787220e..801e2195 100644 --- a/app/frontend/src/components/QuestionInput/QuestionInput.tsx +++ b/app/frontend/src/components/QuestionInput/QuestionInput.tsx @@ -21,7 +21,7 @@ interface Props { export const QuestionInput = ({ onSend, disabled, placeholder, clearOnSend, tokens_used, token_limit_tracking = true, question, setQuestion }: Props) => { const { t, i18n } = useTranslation(); const { LLM } = useContext(LLMContext) - const wordCount = LLM.max_tokens; + const wordCount = LLM.max_input_tokens; const getDescription = () => { let actual = countWords(question) + tokens_used; let text; diff --git a/app/frontend/src/components/SumInput/SumInput.tsx b/app/frontend/src/components/SumInput/SumInput.tsx index ad8348a8..8ee9bd30 100644 --- a/app/frontend/src/components/SumInput/SumInput.tsx +++ b/app/frontend/src/components/SumInput/SumInput.tsx @@ -24,7 +24,7 @@ export const SumInput = ({ onSend, disabled, placeholder, clearOnSend, tokens_us const [dragging, setDragging] = useState(false); const [file, setFile] = useState(undefined); const { LLM } = useContext(LLMContext) - const wordCount = LLM.max_tokens; + const wordCount = LLM.max_input_tokens; const getDescription = () => { let actual = countWords(question) + tokens_used; let text; diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 7b913a71..b747c3fe 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -41,12 +41,12 @@ const Chat = () => { const [answers, setAnswers] = useState<[user: string, response: AskResponse, user_tokens: number][]>([]); const [question, setQuestion] = useState(""); - const temperature_pref = Number(localStorage.getItem(STORAGE_KEYS.CHAT_TEMPERATURE)) || 0.7; - const max_tokens_pref = Number(localStorage.getItem(STORAGE_KEYS.CHAT_MAX_TOKENS)) || 4000; + const temperature_pref = Number(localStorage.getItem(STORAGE_KEYS.CHAT_TEMPERATURE) || 0.7); + const max_output_tokens_pref = Number(localStorage.getItem(STORAGE_KEYS.CHAT_MAX_TOKENS)) || 4000; const systemPrompt_pref = localStorage.getItem(STORAGE_KEYS.CHAT_SYSTEM_PROMPT) || ""; const [temperature, setTemperature] = useState(temperature_pref); - const [max_tokens, setMaxTokens] = useState(max_tokens_pref); + const [max_output_tokens, setMaxOutputTokens] = useState(max_output_tokens_pref); const [systemPrompt, setSystemPrompt] = useState(systemPrompt_pref); const storage: indexedDBStorage = { @@ -69,6 +69,9 @@ const Chat = () => { useEffect(() => { makeTokenCountRequest(); + if (max_output_tokens > LLM.max_output_tokens && LLM.max_output_tokens != 0) { + onMaxTokensChanged(LLM.max_output_tokens, currentId) + } }, [debouncedSystemPrompt, LLM, makeTokenCountRequest]); useEffect(() => { @@ -119,7 +122,7 @@ const Chat = () => { error && setError(undefined); setIsLoading(true); let askResponse: AskResponse = {} as AskResponse; - saveToDB([question, { ...askResponse, answer: "", tokens: 0 }, 0], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens, LLM.model_name) + saveToDB([question, { ...askResponse, answer: "", tokens: 0 }, 0], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_output_tokens, LLM.model_name) try { const history: ChatTurn[] = answers.map(a => ({ user: a[0], bot: a[1].answer })); const request: ChatRequest = { @@ -128,7 +131,7 @@ const Chat = () => { language: language, temperature: temperature, system_message: system ? system : "", - max_tokens: max_tokens, + max_output_tokens: max_output_tokens, model: LLM.model_name }; @@ -166,7 +169,7 @@ const Chat = () => { } } if (startId == currentId) { - saveToDB([question, latestResponse, user_tokens], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens, LLM.model_name) + saveToDB([question, latestResponse, user_tokens], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_output_tokens, LLM.model_name) } } else { const parsedResponse: AskResponse = await response.json(); @@ -175,7 +178,7 @@ const Chat = () => { } setAnswers([...answers, [question, parsedResponse, 0]]); if (startId == currentId) { - saveToDB([question, parsedResponse, 0], storage, currentId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens, LLM.model_name) + saveToDB([question, parsedResponse, 0], storage, currentId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_output_tokens, LLM.model_name) } } } catch (e) { @@ -221,7 +224,7 @@ const Chat = () => { }; const onMaxTokensChanged = (maxTokens: number, id: number) => { - setMaxTokens(maxTokens); + setMaxOutputTokens(maxTokens); localStorage.setItem(STORAGE_KEYS.CHAT_MAX_TOKENS, maxTokens.toString()); changeMaxTokensInDb(maxTokens, id, storage); }; @@ -251,7 +254,7 @@ const Chat = () => { { const [config, setConfig] = useState({ models: [{ "model_name": "KICC GPT", - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, "description": "" }, { "model_name": "Unknown GPT", - "max_tokens": 100, + "max_input_tokens": 128000, + "max_output_tokens": 128000, "description": "" }], frontend: { diff --git a/app/frontend/src/service/storage.ts b/app/frontend/src/service/storage.ts index 1a87518d..456262c9 100644 --- a/app/frontend/src/service/storage.ts +++ b/app/frontend/src/service/storage.ts @@ -34,7 +34,7 @@ export async function saveToDB( language?: string, temperature?: number, system_message?: string, - max_tokens?: number, + max_output_tokens?: number, model?: string ) { let openRequest = indexedDB.open(storage.db_name, storage.db_version); @@ -59,7 +59,7 @@ export async function saveToDB( result.Data.LastEdited = Date.now(); if (storage.objectStore_name === "chat") { result.Options.system = system_message; - result.Options.maxTokens = max_tokens; + result.Options.maxTokens = max_output_tokens; result.Options.temperature = temperature; } data = result; @@ -67,8 +67,8 @@ export async function saveToDB( // if the chat does not exist in the DB let name: string = ""; let new_idcounter = id_counter; - if (language != undefined && temperature != undefined && system_message != undefined && max_tokens != undefined && model != undefined) { - name = await (await getChatName(a, language, temperature, system_message, max_tokens, model)).content; + if (language != undefined && temperature != undefined && system_message != undefined && max_output_tokens != undefined && model != undefined) { + name = await (await getChatName(a, language, temperature, system_message, max_output_tokens, model)).content; name = name.replaceAll('"', "").replaceAll(".", ""); } if (storage.objectStore_name === "chat") { @@ -78,7 +78,7 @@ export async function saveToDB( data = { Data: { Answers: [a], Name: name, LastEdited: Date.now() }, id: new_idcounter, - Options: { favorite: false, system: system_message, maxTokens: max_tokens, temperature: temperature } + Options: { favorite: false, system: system_message, maxTokens: max_output_tokens, temperature: temperature } }; } else { data = { @@ -100,7 +100,7 @@ export async function saveToDB( }; } -export async function getChatName(answers: any, language: string, temperature: number, system_message: string, max_tokens: number, model: string) { +export async function getChatName(answers: any, language: string, temperature: number, system_message: string, max_output_tokens: number, model: string) { const history: ChatTurn[] = [{ user: answers[0], bot: answers[1].answer }]; const request: ChatRequest = { history: [ @@ -114,7 +114,7 @@ export async function getChatName(answers: any, language: string, temperature: n language: language, temperature: temperature, system_message: system_message, - max_tokens: max_tokens, + max_output_tokens: max_output_tokens, model: model }; const response = await chatApi(request); diff --git a/config/default.json b/config/default.json index e1167fc7..87ff5c3c 100644 --- a/config/default.json +++ b/config/default.json @@ -34,7 +34,8 @@ "model_name": "TODO", "endpoint": "TODO", "api_key": "TODO", - "max_tokens": 128000 + "max_input_tokens": 128000, + "max_output_tokens": 3000 }, { "type": "AZURE", @@ -43,7 +44,8 @@ "endpoint": "TODO", "api_key": "TODO", "api_version": "TODO", - "max_tokens": 0 + "max_input_tokens": 128000, + "max_output_tokens": 4000 } ] } diff --git a/config/mucgpt_config.schema.json b/config/mucgpt_config.schema.json index ff4d1c89..526b53b5 100644 --- a/config/mucgpt_config.schema.json +++ b/config/mucgpt_config.schema.json @@ -243,7 +243,8 @@ "model_name", "endpoint", "api_key", - "max_tokens", + "max_input_tokens", + "max_output_tokens", "deployment", "api_version" ], @@ -277,14 +278,22 @@ "BLABLUBLAUBLAUBLA" ] }, - "max_tokens": { + "max_input_tokens": { "type": "integer", - "title": "The context length of the LLM", + "title": "The input context length of the LLM", "examples": [ 128000, 0 ] }, + "max_output_tokens": { + "type": "integer", + "title": "The output context length of the LLM", + "examples": [ + 8192, + 0 + ] + }, "deployment": { "type": "string", "default": "", @@ -307,7 +316,8 @@ "model_name": "mucgpt-mini", "endpoint": "mucgptmini.openai.azure.com", "api_key": "BALBLBLABUALB", - "max_tokens": 128000 + "max_input_tokens": 128000, + "max_output_tokens": 8192 }, { "type": "AZURE", @@ -316,7 +326,8 @@ "endpoint": "mucgpt-maxi.openai.azure.com", "api_key": "BALBABUALB", "api_version": "2024-01", - "max_tokens": 1000000 + "max_input_tokens": 128000, + "max_output_tokens": 8192 }] }, "examples": [ @@ -325,7 +336,8 @@ "model_name": "mucgpt-mini", "endpoint": "mucgptmini.openai.azure.com", "api_key": "BALBLBLABUALB", - "max_tokens": 128000 + "max_input_tokens": 128000, + "max_output_tokens": 8192 }, { "type": "AZURE", @@ -334,7 +346,8 @@ "endpoint": "mucgpt-maxi.openai.azure.com", "api_key": "BALBABUALB", "api_version": "2024-01", - "max_tokens": 1000000 + "max_input_tokens": 128000, + "max_output_tokens": 8192 }] ] } @@ -366,7 +379,8 @@ "model_name": "mucgpt-mini", "endpoint": "mucgptmini.openai.azure.com", "api_key": "BALBLBLABUALB", - "max_tokens": 128000 + "max_input_tokens": 128000, + "max_output_tokens": 8192 }, { "type": "AZURE", @@ -375,7 +389,8 @@ "endpoint": "mucgpt-maxi.openai.azure.com", "api_key": "BALBABUALB", "api_version": "2024-01", - "max_tokens": 1000000 + "max_input_tokens": 128000, + "max_output_tokens": 8192 }] }] } @@ -414,7 +429,8 @@ "model_name": "mucgpt-mini", "endpoint": "mucgptmini.openai.azure.com", "api_key": "BALBLBLABUALB", - "max_tokens": 128000 + "max_input_tokens": 128000, + "max_output_tokens": 8192 }, { "type": "AZURE", @@ -423,7 +439,8 @@ "endpoint": "mucgpt-maxi.openai.azure.com", "api_key": "BALBABUALB", "api_version": "2024-01", - "max_tokens": 1000000 + "max_input_tokens": 128000, + "max_output_tokens": 8192 }] } }] diff --git a/tests/integration/test_app.py b/tests/integration/test_app.py index f921ff39..51ff83e4 100644 --- a/tests/integration/test_app.py +++ b/tests/integration/test_app.py @@ -45,17 +45,6 @@ def fake_response(http_code): ) - -@pytest.mark.asyncio -@pytest.mark.integration -async def test_missing_env_vars(): - quart_app = app.create_app() - - with pytest.raises(quart.testing.app.LifespanError) as exc_info: - async with quart_app.test_app() as test_app: - test_app.test_client() - assert str(exc_info.value) == "Lifespan failure in startup. ''AZURE_OPENAI_EMB_DEPLOYMENT''" - @pytest.mark.asyncio @pytest.mark.integration async def test_index(client): @@ -195,7 +184,7 @@ async def test_chatstream(client, mocker): mocker.patch("chat.chat.Chat.run_without_streaming", mock.AsyncMock(return_value=streaming_generator)) data = { "temperature": 0.1, - "max_tokens": 2400, + "max_output_tokens": 2400, "system_message": "", "model": "TEST_MODEL", "history": [{"user": "hi"}] diff --git a/tests/integration/test_config.json b/tests/integration/test_config.json index f33398e5..0a5445bd 100644 --- a/tests/integration/test_config.json +++ b/tests/integration/test_config.json @@ -35,7 +35,8 @@ "deployment": "NOT USED", "endpoint": "NOT USED", "api_key":"NOT USED", - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 8192, "api_version": "NOT USED", "description": "NOT USED" } diff --git a/tests/unit/test_llmhelper.py b/tests/unit/test_llmhelper.py index 7ec597ec..ad020373 100644 --- a/tests/unit/test_llmhelper.py +++ b/tests/unit/test_llmhelper.py @@ -14,7 +14,8 @@ def setUp(self): "model_name": "model1", "endpoint": "TODO", "api_key": "TODO", - "max_tokens": 128000 + "max_input_tokens": 128000, + "max_output_tokens": 8192 } self.model2 ={ "type": "AZURE", @@ -23,14 +24,16 @@ def setUp(self): "api_version": "preview", "endpoint": "TODO", "api_key": "TODO", - "max_tokens": 128000 + "max_input_tokens": 128000, + "max_output_tokens": 8192 } self.model3 ={ "type": "TODO", "model_name": "model2", "endpoint": "TODO", "api_key": "TODO", - "max_tokens": 128000 + "max_input_tokens": 128000, + "max_output_tokens": 8192 } @pytest.mark.asyncio @@ -39,7 +42,7 @@ def test_getModel_returns_llm(self): model = getModel(models=[self.model1, self.model2], - max_tokens=10, + max_output_tokens=10, n=1, temperature=0.5, streaming=True) @@ -50,7 +53,7 @@ def test_getModel_returns_llm(self): def test_getModel_wrong_type(self): with self.assertRaises(ModelsConfigurationException): getModel(models=[self.model3], - max_tokens=10, + max_output_tokens=10, n=1, temperature=0.5, streaming=True) @@ -59,7 +62,7 @@ def test_getModel_wrong_type(self): @pytest.mark.unit def test_getModel_azure_first(self): model = getModel(models=[self.model2, self.model1], - max_tokens=10, + max_output_tokens=10, n=1, temperature=0.5, streaming=True) @@ -70,7 +73,7 @@ def test_getModel_azure_first(self): def test_getModel_no_model(self): with self.assertRaises(ModelsConfigurationException): getModel(models=[], - max_tokens=10, + max_output_tokens=10, n=1, temperature=0.5, streaming=True) @@ -79,7 +82,7 @@ def test_getModel_no_model(self): @pytest.mark.unit def test_getModel_configurable_fields(self): model = getModel(models=[self.model1, self.model2], - max_tokens=10, + max_output_tokens=10, n=1, temperature=0.5, streaming=True) @@ -91,7 +94,7 @@ def test_getModel_configurable_fields(self): @pytest.mark.unit def test_getModel_configurable_alternatives(self): model = getModel(models=[self.model1, self.model2], - max_tokens=10, + max_output_tokens=10, n=1, temperature=0.5, streaming=True) @@ -101,7 +104,7 @@ def test_getModel_configurable_alternatives(self): @pytest.mark.unit def test_getModel_fake_llm(self): model = getModel(models=[self.model1, self.model2], - max_tokens=10, + max_output_tokens=10, n=1, temperature=0.5, streaming=True) From e8d339d5a603b21050b9f65be6fdf407fdb5fc88 Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Wed, 11 Sep 2024 14:53:24 +0200 Subject: [PATCH 2/6] :bug: added version to /config --- app/backend/app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/backend/app.py b/app/backend/app.py index 28205749..5c54baa7 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -154,7 +154,8 @@ async def getConfig(): models_dto_list.append(dto) return jsonify({ "frontend": frontend_features, - "models": models_dto_list + "models": models_dto_list, + "version": cfg["configuration_features"]["version"] }) @bp.route("/statistics", methods=["GET"]) From e2b898774e19b8c6add8d4871e350da9ae66d2de Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Wed, 11 Sep 2024 16:06:55 +0200 Subject: [PATCH 3/6] :bookmark: Version 1.1.4 --- app/frontend/package.json | 2 +- app/frontend/src/pages/version/Version.tsx | 20 +++++++++++++++++++- config/base.json | 2 +- tests/integration/base.json | 2 +- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/app/frontend/package.json b/app/frontend/package.json index fb1e576a..72293ef3 100644 --- a/app/frontend/package.json +++ b/app/frontend/package.json @@ -1,7 +1,7 @@ { "name": "mucgpt", "private": true, - "version": "1.1.3", + "version": "1.1.4", "type": "module", "engines": { "node": ">=16.0.0" diff --git a/app/frontend/src/pages/version/Version.tsx b/app/frontend/src/pages/version/Version.tsx index 56b304cb..31fbb101 100644 --- a/app/frontend/src/pages/version/Version.tsx +++ b/app/frontend/src/pages/version/Version.tsx @@ -32,7 +32,25 @@ const Version = () => {

{t('version.header')}

- + + + [1.1.4] 11.09.2024 + +
+

{t('version.added')}

+

{t('version.fixed')}

+
    +
  • + Version wird wieder richtig gespeichert und in den Einstellungen angezeigt. +
  • +
  • + Maximale Tokens aus der Config aufgeteilt in Input- und Output- Tokens. Dadurch laufen Modelle mit kleineren Contextfenster (wie z.B. Mistral) nicht mehr in einen Fehler. +
  • +
+

{t('version.changed')}

+
+
+
[1.1.3] 28.08.2024 diff --git a/config/base.json b/config/base.json index 15b52047..ccd05e57 100644 --- a/config/base.json +++ b/config/base.json @@ -1,3 +1,3 @@ { - "version": "1.1.3" + "version": "1.1.4" } \ No newline at end of file diff --git a/tests/integration/base.json b/tests/integration/base.json index 15b52047..ccd05e57 100644 --- a/tests/integration/base.json +++ b/tests/integration/base.json @@ -1,3 +1,3 @@ { - "version": "1.1.3" + "version": "1.1.4" } \ No newline at end of file From c6b7ec7cc4907dc032a035fcb24a6a1e49c58ebc Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Wed, 11 Sep 2024 16:19:35 +0200 Subject: [PATCH 4/6] :pencil: Typo --- app/backend/backend.py | 260 --------------------- app/frontend/src/pages/version/Version.tsx | 4 +- 2 files changed, 2 insertions(+), 262 deletions(-) delete mode 100644 app/backend/backend.py diff --git a/app/backend/backend.py b/app/backend/backend.py deleted file mode 100644 index 4e5bae6c..00000000 --- a/app/backend/backend.py +++ /dev/null @@ -1,260 +0,0 @@ -import io -import logging -from contextlib import asynccontextmanager -from typing import List, cast - -from fastapi import FastAPI, File, Form, Header, HTTPException, Request, UploadFile -from fastapi.responses import ( - FileResponse, - HTMLResponse, - JSONResponse, - RedirectResponse, - StreamingResponse, -) -from fastapi.staticfiles import StaticFiles -from langchain_core.messages.human import HumanMessage -from pydantic_core import from_json - -from core.authentification import AuthentificationHelper, AuthError -from core.helper import format_as_ndjson -from core.modelhelper import num_tokens_from_messages -from core.types.AppConfig import AppConfig -from core.types.ChatRequest import ChatRequest -from core.types.Config import ModelsConfig, ModelsDTO -from core.types.countresult import CountResult -from core.types.SumRequest import SumRequest -from init_app import initApp - - -@asynccontextmanager -async def lifespan(backend: FastAPI): - backend.state.app_config = await initApp() - yield - - -backend = FastAPI(title="MUCGPT", lifespan=lifespan) -backend.mount("/static", StaticFiles(directory="static"), name="static") -backend.state.app_config = None - - -@backend.exception_handler(AuthError) -async def handleAuthError(error: AuthError): - return error.error, error.status_code - - -@backend.get("/", include_in_schema=False) -async def index(request: Request): - get_config_and_authentificate(request) - with open("static/index.html") as f: - return HTMLResponse(content=f.read()) - - -@backend.get("/favicon.ico", include_in_schema=False) -async def favicon(): - return RedirectResponse(url="/static/favicon.ico") - - -@backend.get("/assets/{path}") -async def assets(request: Request, path: str): - get_config_and_authentificate(request) - return RedirectResponse(url="/static/assets/" + path) - -@backend.post("/sum") -async def sum( - body: str = Form(...), - file: UploadFile = File(None), - id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token"), - access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token") -): - cfg = get_config_and_authentificate(access_token=access_token) - department = get_department(id_token=id_token) - sumRequest = SumRequest.model_validate(from_json(body)) - text =sumRequest.text if file is None else None - if(file is not None): - file_content = io.BytesIO(await file.read()) - else: - file_content = None - try: - impl = cfg["sum_approaches"] - splits = impl.split(detaillevel=sumRequest.detaillevel, file=file_content, text=text) - r = await impl.summarize( - splits=splits, - department=department, - language=sumRequest.language, - model_name=sumRequest.model, - ) - return JSONResponse(content=r) - except Exception as e: - logging.exception("Exception in /sum") - return JSONResponse({"error": str(e)}, status_code=500) -#TODO remove for prod -@backend.exception_handler(HTTPException) -async def http_exception_handler(request: Request, exc: HTTPException): - logging.error(f"Error processing request: {exc.detail}") - return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail}) - - - -@backend.post("/brainstorm") -async def brainstorm(request: Request, - id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token"), - access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")): - cfg = get_config_and_authentificate(access_token=access_token) - try: - request_json = await request.json() - except ValueError: - return JSONResponse(content={"error": "request must be json"}, status_code=415) - department = get_department(id_token=id_token) - - try: - impl = cfg["brainstorm_approaches"] - r = await impl.brainstorm( - topic=request_json["topic"], - language=request_json["language"] or "Deutsch", - department=department, - model_name=request_json["model"] or "gpt-4o-mini", - ) - return JSONResponse(r) - except Exception as e: - logging.exception("Exception in /brainstorm") - msg = ( - "Momentan liegt eine starke Auslastung vor. Bitte in einigen Sekunden erneut versuchen." - if "Rate limit" in str(e) - else str(e) - ) - return JSONResponse({"error": msg}), 500 - - -@backend.post("/chat_stream") -async def chat_stream(request: ChatRequest, - access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token"), - id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token")): - cfg = get_config_and_authentificate(access_token=access_token) - department = get_department(id_token=id_token) - - try: - impl = cfg["chat_approaches"] - response_generator = impl.run_with_streaming( - history=request.history, - temperature=request.temperature, - max_tokens=request.max_tokens, - system_message=request.system_message, - model=request.model, - department=department, - ) - response = StreamingResponse(format_as_ndjson(response_generator)) - response.timeout = None # type: ignore - return response - except Exception as e: - logging.exception("Exception in /chat") - return JSONResponse({"error": str(e)}), 500 - - -@backend.post("/chat") -async def chat(request: ChatRequest, - access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token"), - id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token")): - cfg = get_config_and_authentificate(access_token=access_token) - department = get_department(id_token=id_token) - try: - impl = cfg["chat_approaches"] - chatResult = impl.run_without_streaming( - history=request.history, - temperature=request.temperature, - max_tokens=request.max_tokens, - system_message=request.system_message, - department=department, - model_name=request.model, - ) - return JSONResponse(chatResult) - except Exception as e: - logging.exception("Exception in /chat") - return JSONResponse({"error": str(e)}), 500 - - -@backend.get("/config") -async def getConfig(access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")): - cfg = get_config_and_authentificate(access_token) - frontend_features = cfg["configuration_features"]["frontend"] - models = cast( - List[ModelsConfig], cfg["configuration_features"]["backend"]["models"] - ) - models_dto_list = [] - for model in models: - dto = ModelsDTO( - model_name=model["model_name"], - max_tokens=model["max_tokens"], - description=model["description"], - ) - models_dto_list.append(dto) - return JSONResponse({"frontend": frontend_features, "models": models_dto_list}) - - -@backend.get("/statistics") -async def getStatistics(access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")): - try: - cfg = get_config_and_authentificate(access_token) - repo = cfg["repository"] - sum_by_department = repo.sumByDepartment() - avg_by_department = repo.avgByDepartment() - return JSONResponse({"sum": float(sum_by_department), "avg": float(avg_by_department)}) - except Exception as e: - return JSONResponse(content={"error": e}, status_code=404) - - -@backend.post("/counttokens") -async def counttokens(request: Request, access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token") ): - get_config_and_authentificate(access_token) - if not request.json(): - return JSONResponse({"error": "request must be json"}, status_code=415) - request_json = await request.json() - message = request_json["text"] or "" - model = request_json["model"]["model_name"] or "gpt-4o-mini" - counted_tokens = num_tokens_from_messages([HumanMessage(message)], model) - return JSONResponse(CountResult(count=counted_tokens)) - - -@backend.get("/statistics/export") -async def getStatisticsCSV(request: Request, - access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")): - try: - cfg = get_config_and_authentificate(access_token) - repo = cfg["repository"] - export = repo.export() - return FileResponse(export, filename="statistics.csv", as_attachment=True) - except Exception as e: - return JSONResponse(content={"error": e}, status_code=404) - - -@backend.get("/health") -def health_check(): - return "OK" - - -def get_config(): - return cast(AppConfig, backend.state.app_config) - - -def get_config_and_authentificate(access_token): - cfg = get_config() - if cfg["configuration_features"]["backend"]["enable_auth"]: - ensure_authentification(access_token=access_token) - return cfg - - -def ensure_authentification(access_token): - cfg = get_config() - auth_client: AuthentificationHelper = cfg["authentification_client"] - claims = auth_client.authentificate(accesstoken=access_token) - return auth_client, claims - - -def get_department(id_token): - cfg = get_config() - - if cfg["configuration_features"]["backend"]["enable_auth"]: - auth_client: AuthentificationHelper = cfg["authentification_client"] - id_claims = auth_client.decode(id_token) - return auth_client.getDepartment(claims=id_claims) - else: - return None diff --git a/app/frontend/src/pages/version/Version.tsx b/app/frontend/src/pages/version/Version.tsx index 31fbb101..c3b7078d 100644 --- a/app/frontend/src/pages/version/Version.tsx +++ b/app/frontend/src/pages/version/Version.tsx @@ -41,10 +41,10 @@ const Version = () => {

{t('version.fixed')}

  • - Version wird wieder richtig gespeichert und in den Einstellungen angezeigt. + Versionsnummer wird wieder richtig gespeichert und in den Einstellungen angezeigt.
  • - Maximale Tokens aus der Config aufgeteilt in Input- und Output- Tokens. Dadurch laufen Modelle mit kleineren Contextfenster (wie z.B. Mistral) nicht mehr in einen Fehler. + Maximale Tokens aus der Config aufgeteilt in Input- und Output-Tokens. Dadurch laufen Modelle mit kleineren Contextfenster (wie z.B. Mistral) nicht mehr in einen Fehler.

{t('version.changed')}

From da4a38a7d6323bad1a66244efb830554e5abc7d8 Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Wed, 11 Sep 2024 16:25:31 +0200 Subject: [PATCH 5/6] :shirt: make ruff happy --- tests/integration/test_app.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/integration/test_app.py b/tests/integration/test_app.py index 51ff83e4..fdcbe0f0 100644 --- a/tests/integration/test_app.py +++ b/tests/integration/test_app.py @@ -4,12 +4,10 @@ import PyPDF2 import pytest -import quart.testing.app from httpx import Request, Response from openai import BadRequestError from quart.datastructures import FileStorage -import app from brainstorm.brainstormresult import BrainstormResult from core.types.Chunk import Chunk from summarize.summarizeresult import SummarizeResult From 445bb6a073a858c934d3f50f6fcf53e32b7ec957 Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 11 Sep 2024 17:25:26 +0200 Subject: [PATCH 6/6] =?UTF-8?q?=F0=9F=9A=91=20fix=20max=20tokens=20not=20w?= =?UTF-8?q?orking,=20on=20other=20then=20default=20llm?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/backend/core/llmhelper.py | 46 +++++++++++++++++++++++++--- app/backend/core/types/LlmConfigs.py | 1 - 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/app/backend/core/llmhelper.py b/app/backend/core/llmhelper.py index 09d2d56a..3bc107c6 100644 --- a/app/backend/core/llmhelper.py +++ b/app/backend/core/llmhelper.py @@ -63,7 +63,27 @@ def getModel(models: List[ModelsConfig], n=n, streaming=streaming, temperature=temperature, - ) + ).configurable_fields( + temperature=ConfigurableField( + id="llm_temperature", + name="LLM Temperature", + description="The temperature of the LLM", + ), + max_tokens= ConfigurableField( + id="llm_max_tokens", + name="LLM max Tokens", + description="The token Limit of the LLM", + ), + streaming = ConfigurableField( + id="llm_streaming", + name="Streaming", + description="Should the LLM Stream"), + callbacks = ConfigurableField( + id="llm_callbacks", + name="Callbacks", + description="Callbacks for the llm") + + ) elif model["type"] == "OPENAI": alternative = ChatOpenAI( model=model["model_name"], @@ -73,6 +93,26 @@ def getModel(models: List[ModelsConfig], n=n, streaming=streaming, temperature=temperature, + ).configurable_fields( + temperature=ConfigurableField( + id="llm_temperature", + name="LLM Temperature", + description="The temperature of the LLM", + ), + max_tokens= ConfigurableField( + id="llm_max_tokens", + name="LLM max Tokens", + description="The token Limit of the LLM", + ), + streaming = ConfigurableField( + id="llm_streaming", + name="Streaming", + description="Should the LLM Stream"), + callbacks = ConfigurableField( + id="llm_callbacks", + name="Callbacks", + description="Callbacks for the llm") + ) alternatives[model["model_name"]] = alternative llm = llm.configurable_fields( @@ -86,10 +126,6 @@ def getModel(models: List[ModelsConfig], name="LLM max Tokens", description="The token Limit of the LLM", ), - openai_api_key = ConfigurableField( - id="llm_api_key", - name="The api key", - description="The api key"), streaming = ConfigurableField( id="llm_streaming", name="Streaming", diff --git a/app/backend/core/types/LlmConfigs.py b/app/backend/core/types/LlmConfigs.py index 9d23d498..7326f320 100644 --- a/app/backend/core/types/LlmConfigs.py +++ b/app/backend/core/types/LlmConfigs.py @@ -10,6 +10,5 @@ class LlmConfigs(TypedDict, total=False): llm: NotRequired[str] # one of the SupportedModels llm_max_tokens: NotRequired[int] llm_temperature: NotRequired[float] - llm_api_key: NotRequired[str] llm_streaming: NotRequired[bool] llm_callbacks: NotRequired[List] \ No newline at end of file