From 3fee797904a3f50551acdf8059266ad825eff538 Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Wed, 11 Sep 2024 11:52:27 +0200
Subject: [PATCH 1/6] :bug: fixed bug with max_tokens

---
 app/backend/app.py                            |  10 +-
 app/backend/backend.py                        | 260 ++++++++++++++++++
 app/backend/chat/chat.py                      |  12 +-
 app/backend/core/llmhelper.py                 |  10 +-
 app/backend/core/types/Config.py              |   6 +-
 app/backend/init_app.py                       |   6 +-
 app/frontend/src/api/api.ts                   |   2 +-
 app/frontend/src/api/models.ts                |   5 +-
 .../ChatsettingsDrawer/ChatsettingsDrawer.tsx |  11 +-
 .../LLMSelector/LLMContextProvider.tsx        |   4 +-
 .../QuestionInput/QuestionInput.tsx           |   2 +-
 .../src/components/SumInput/SumInput.tsx      |   2 +-
 app/frontend/src/pages/chat/Chat.tsx          |  21 +-
 app/frontend/src/pages/layout/Layout.tsx      |   6 +-
 app/frontend/src/service/storage.ts           |  14 +-
 config/default.json                           |   6 +-
 config/mucgpt_config.schema.json              |  39 ++-
 tests/integration/test_app.py                 |  13 +-
 tests/integration/test_config.json            |   3 +-
 tests/unit/test_llmhelper.py                  |  23 +-
 20 files changed, 367 insertions(+), 88 deletions(-)
 create mode 100644 app/backend/backend.py

diff --git a/app/backend/app.py b/app/backend/app.py
index db6b7789..28205749 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -101,12 +101,12 @@ async def chat_stream():
     try:
         impl = cfg["chat_approaches"]
         temperature=request_json['temperature'] or 0.7
-        max_tokens=request_json['max_tokens'] or 4096
+        max_output_tokens=request_json['max_output_tokens'] or 4096
         system_message = request_json['system_message'] or None
         model = request_json['model'] or "gpt-4o-mini"
         response_generator = impl.run_with_streaming(history= request_json["history"],
                                                     temperature=temperature,
-                                                    max_tokens=max_tokens,
+                                                    max_output_tokens=max_output_tokens,
                                                     system_message=system_message,
                                                     model=model,
                                                     department= department)
@@ -128,13 +128,13 @@ async def chat():
     try:
         impl = cfg["chat_approaches"]
         temperature=request_json['temperature'] or 0.7
-        max_tokens=request_json['max_tokens'] or 4096
+        max_output_tokens=request_json['max_output_tokens'] or 4096
         model_name=request_json['model'] or "gpt-4o-mini"
         system_message = request_json['system_message'] or None
         history =  request_json["history"]
         chatResult = impl.run_without_streaming(history= history,
                                                     temperature=temperature,
-                                                    max_tokens=max_tokens,
+                                                    max_output_tokens=max_output_tokens,
                                                     system_message=system_message,
                                                     department= department,
                                                     model_name= model_name)
@@ -150,7 +150,7 @@ async def getConfig():
     models= cast(List[ModelsConfig], cfg["configuration_features"]["backend"]["models"])
     models_dto_list = []
     for model in models:
-        dto = ModelsDTO(model_name=model["model_name"], max_tokens=model["max_tokens"], description=model["description"])
+        dto = ModelsDTO(model_name=model["model_name"], max_output_tokens=model["max_output_tokens"], max_input_tokens=model["max_input_tokens"], description=model["description"])
         models_dto_list.append(dto)
     return jsonify({
         "frontend": frontend_features,
diff --git a/app/backend/backend.py b/app/backend/backend.py
new file mode 100644
index 00000000..4e5bae6c
--- /dev/null
+++ b/app/backend/backend.py
@@ -0,0 +1,260 @@
+import io
+import logging
+from contextlib import asynccontextmanager
+from typing import List, cast
+
+from fastapi import FastAPI, File, Form, Header, HTTPException, Request, UploadFile
+from fastapi.responses import (
+    FileResponse,
+    HTMLResponse,
+    JSONResponse,
+    RedirectResponse,
+    StreamingResponse,
+)
+from fastapi.staticfiles import StaticFiles
+from langchain_core.messages.human import HumanMessage
+from pydantic_core import from_json
+
+from core.authentification import AuthentificationHelper, AuthError
+from core.helper import format_as_ndjson
+from core.modelhelper import num_tokens_from_messages
+from core.types.AppConfig import AppConfig
+from core.types.ChatRequest import ChatRequest
+from core.types.Config import ModelsConfig, ModelsDTO
+from core.types.countresult import CountResult
+from core.types.SumRequest import SumRequest
+from init_app import initApp
+
+
+@asynccontextmanager
+async def lifespan(backend: FastAPI):
+    backend.state.app_config = await initApp()
+    yield
+
+
+backend = FastAPI(title="MUCGPT", lifespan=lifespan)
+backend.mount("/static", StaticFiles(directory="static"), name="static")
+backend.state.app_config = None
+
+
+@backend.exception_handler(AuthError)
+async def handleAuthError(error: AuthError):
+    return error.error, error.status_code
+
+
+@backend.get("/", include_in_schema=False)
+async def index(request: Request):
+    get_config_and_authentificate(request)
+    with open("static/index.html") as f:
+        return HTMLResponse(content=f.read())
+
+
+@backend.get("/favicon.ico", include_in_schema=False)
+async def favicon():
+    return RedirectResponse(url="/static/favicon.ico")
+
+
+@backend.get("/assets/{path}")
+async def assets(request: Request, path: str):
+    get_config_and_authentificate(request)
+    return RedirectResponse(url="/static/assets/" + path)
+
+@backend.post("/sum")
+async def sum(
+    body: str = Form(...),
+    file: UploadFile = File(None), 
+    id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token"),
+    access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")  
+):
+    cfg = get_config_and_authentificate(access_token=access_token)
+    department = get_department(id_token=id_token)
+    sumRequest = SumRequest.model_validate(from_json(body))
+    text =sumRequest.text if file is None else None
+    if(file is not None):
+        file_content = io.BytesIO(await file.read())
+    else:
+        file_content = None
+    try:
+        impl = cfg["sum_approaches"]
+        splits = impl.split(detaillevel=sumRequest.detaillevel, file=file_content, text=text)
+        r = await impl.summarize(
+            splits=splits,
+            department=department,
+            language=sumRequest.language,
+            model_name=sumRequest.model,
+        )
+        return JSONResponse(content=r)
+    except Exception as e:
+        logging.exception("Exception in /sum")
+        return JSONResponse({"error": str(e)}, status_code=500)
+#TODO remove for prod
+@backend.exception_handler(HTTPException)
+async def http_exception_handler(request: Request, exc: HTTPException):
+    logging.error(f"Error processing request: {exc.detail}")
+    return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
+
+
+
+@backend.post("/brainstorm")
+async def brainstorm(request: Request,
+                    id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token"),
+                    access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")):
+    cfg = get_config_and_authentificate(access_token=access_token)
+    try:
+        request_json = await request.json()
+    except ValueError:
+        return JSONResponse(content={"error": "request must be json"}, status_code=415)
+    department = get_department(id_token=id_token)
+
+    try:
+        impl = cfg["brainstorm_approaches"]
+        r = await impl.brainstorm(
+            topic=request_json["topic"],
+            language=request_json["language"] or "Deutsch",
+            department=department,
+            model_name=request_json["model"] or "gpt-4o-mini",
+        )
+        return JSONResponse(r)
+    except Exception as e:
+        logging.exception("Exception in /brainstorm")
+        msg = (
+            "Momentan liegt eine starke Auslastung vor. Bitte in einigen Sekunden erneut versuchen."
+            if "Rate limit" in str(e)
+            else str(e)
+        )
+        return JSONResponse({"error": msg}), 500
+
+
+@backend.post("/chat_stream")
+async def chat_stream(request: ChatRequest,
+                      access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token"),
+                      id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token")):
+    cfg = get_config_and_authentificate(access_token=access_token)
+    department = get_department(id_token=id_token)
+
+    try:
+        impl = cfg["chat_approaches"]
+        response_generator = impl.run_with_streaming(
+            history=request.history,
+            temperature=request.temperature,
+            max_tokens=request.max_tokens,
+            system_message=request.system_message,
+            model=request.model,
+            department=department,
+        )
+        response = StreamingResponse(format_as_ndjson(response_generator))
+        response.timeout = None  # type: ignore
+        return response
+    except Exception as e:
+        logging.exception("Exception in /chat")
+        return JSONResponse({"error": str(e)}), 500
+
+
+@backend.post("/chat")
+async def chat(request: ChatRequest,
+               access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token"),
+               id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token")):
+    cfg = get_config_and_authentificate(access_token=access_token)
+    department = get_department(id_token=id_token)
+    try:
+        impl = cfg["chat_approaches"]
+        chatResult = impl.run_without_streaming(
+            history=request.history,
+            temperature=request.temperature,
+            max_tokens=request.max_tokens,
+            system_message=request.system_message,
+            department=department,
+            model_name=request.model,
+        )
+        return JSONResponse(chatResult)
+    except Exception as e:
+        logging.exception("Exception in /chat")
+        return JSONResponse({"error": str(e)}), 500
+
+
+@backend.get("/config")
+async def getConfig(access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")):
+    cfg = get_config_and_authentificate(access_token)
+    frontend_features = cfg["configuration_features"]["frontend"]
+    models = cast(
+        List[ModelsConfig], cfg["configuration_features"]["backend"]["models"]
+    )
+    models_dto_list = []
+    for model in models:
+        dto = ModelsDTO(
+            model_name=model["model_name"],
+            max_tokens=model["max_tokens"],
+            description=model["description"],
+        )
+        models_dto_list.append(dto)
+    return JSONResponse({"frontend": frontend_features, "models": models_dto_list})
+
+
+@backend.get("/statistics")
+async def getStatistics(access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")):
+    try:
+        cfg = get_config_and_authentificate(access_token)
+        repo = cfg["repository"]
+        sum_by_department = repo.sumByDepartment()
+        avg_by_department = repo.avgByDepartment()
+        return JSONResponse({"sum": float(sum_by_department), "avg": float(avg_by_department)})
+    except Exception as e:
+        return JSONResponse(content={"error": e}, status_code=404)
+
+
+@backend.post("/counttokens")
+async def counttokens(request: Request, access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")  ):
+    get_config_and_authentificate(access_token)
+    if not request.json():
+        return JSONResponse({"error": "request must be json"}, status_code=415)
+    request_json = await request.json()
+    message = request_json["text"] or ""
+    model = request_json["model"]["model_name"] or "gpt-4o-mini"
+    counted_tokens = num_tokens_from_messages([HumanMessage(message)], model)
+    return JSONResponse(CountResult(count=counted_tokens))
+
+
+@backend.get("/statistics/export")
+async def getStatisticsCSV(request: Request, 
+                           access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")):
+    try:
+        cfg = get_config_and_authentificate(access_token)
+        repo = cfg["repository"]
+        export = repo.export()
+        return FileResponse(export, filename="statistics.csv", as_attachment=True)
+    except Exception as e:
+        return JSONResponse(content={"error": e}, status_code=404)
+
+
+@backend.get("/health")
+def health_check():
+    return "OK"
+
+
+def get_config():
+    return cast(AppConfig, backend.state.app_config)
+
+
+def get_config_and_authentificate(access_token):
+    cfg = get_config()
+    if cfg["configuration_features"]["backend"]["enable_auth"]:
+        ensure_authentification(access_token=access_token)
+    return cfg
+
+
+def ensure_authentification(access_token):
+    cfg = get_config()
+    auth_client: AuthentificationHelper = cfg["authentification_client"]
+    claims = auth_client.authentificate(accesstoken=access_token)
+    return auth_client, claims
+
+
+def get_department(id_token):
+    cfg = get_config()
+
+    if cfg["configuration_features"]["backend"]["enable_auth"]:
+        auth_client: AuthentificationHelper = cfg["authentification_client"]
+        id_claims = auth_client.decode(id_token)
+        return auth_client.getDepartment(claims=id_claims)
+    else:
+        return None
diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py
index 25a43c64..70376d26 100644
--- a/app/backend/chat/chat.py
+++ b/app/backend/chat/chat.py
@@ -21,12 +21,12 @@ def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repo
         self.config = config
         self.repo = repo
     
-    async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: int, temperature: float, system_message: Optional[str], model: str, department: Optional[str]) -> AsyncGenerator[Chunk, None]:
+    async def run_with_streaming(self, history: 'list[dict[str, str]]',max_output_tokens: int, temperature: float, system_message: Optional[str], model: str, department: Optional[str]) -> AsyncGenerator[Chunk, None]:
         """call the llm in streaming mode
 
         Args:
             history (list[dict[str, str]]): the history,user and ai messages 
-            max_tokens (int): max_tokens to generate
+            max_output_tokens (int): max_output_tokens to generate
             temperature (float): temperature of the llm
             system_message (Optional[str]): the system message
             department (Optional[str]): from which department comes the call
@@ -40,7 +40,7 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i
         """
         # configure
         config: LlmConfigs = {
-            "llm_max_tokens": max_tokens,
+            "llm_max_tokens": max_output_tokens,
             "llm_temperature": temperature,
             "llm_streaming": True,
             "llm": model
@@ -74,12 +74,12 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i
             info = ChunkInfo(requesttokens=num_tokens_from_messages([msgs[-1]],model), streamedtokens=num_tokens_from_messages([HumanMessage(result)], model)) 
             yield Chunk(type="I", message=info, order=position)
     
-    def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str], model_name:str) -> ChatResult:
+    def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_output_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str], model_name:str) -> ChatResult:
         """calls the llm in blocking mode, returns the full result
 
         Args:
             history (list[dict[str, str]]): the history,user and ai messages 
-            max_tokens (int): max_tokens to generate
+            max_output_tokens (int): max_output_tokens to generate
             temperature (float): temperature of the llm
             system_message (Optional[str]): the system message
             department (Optional[str]): from which department comes the call
@@ -88,7 +88,7 @@ def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens:
             ChatResult: the generated text from the llm
         """
         config: LlmConfigs = {
-            "llm_max_tokens": max_tokens,
+            "llm_max_tokens": max_output_tokens,
             "llm_temperature": temperature,
             "llm_streaming": False,
         }
diff --git a/app/backend/core/llmhelper.py b/app/backend/core/llmhelper.py
index 65dffb05..09d2d56a 100644
--- a/app/backend/core/llmhelper.py
+++ b/app/backend/core/llmhelper.py
@@ -13,7 +13,7 @@ class ModelsConfigurationException(Exception):
 
 
 def getModel(models: List[ModelsConfig], 
-             max_tokens: int,
+             max_output_tokens: int,
              n: int,
              temperature: float,
              streaming: bool) -> RunnableSerializable:
@@ -31,7 +31,7 @@ def getModel(models: List[ModelsConfig],
                         openai_api_key=default_model["api_key"],
                         azure_endpoint=default_model["endpoint"],
                         openai_api_version=default_model["api_version"],
-                        max_tokens=max_tokens,
+                        max_tokens=max_output_tokens,
                         n=n,
                         streaming=streaming,
                         temperature=temperature,
@@ -42,7 +42,7 @@ def getModel(models: List[ModelsConfig],
                         model=default_model["model_name"],
                         api_key=default_model["api_key"],
                         base_url=default_model["endpoint"],
-                        max_tokens=max_tokens,
+                        max_tokens=max_output_tokens,
                         n=n,
                         streaming=streaming,
                         temperature=temperature,
@@ -59,7 +59,7 @@ def getModel(models: List[ModelsConfig],
                                 azure_endpoint=model["endpoint"],
                                 openai_api_version=model["api_version"],
                                 openai_api_type="azure",
-                                max_tokens=max_tokens,
+                                max_tokens=max_output_tokens,
                                 n=n,
                                 streaming=streaming,
                                 temperature=temperature,
@@ -69,7 +69,7 @@ def getModel(models: List[ModelsConfig],
                                         model=model["model_name"],
                                         api_key=model["api_key"],
                                         base_url=model["endpoint"],
-                                        max_tokens=max_tokens,
+                                        max_tokens=max_output_tokens,
                                         n=n,
                                         streaming=streaming,
                                         temperature=temperature,
diff --git a/app/backend/core/types/Config.py b/app/backend/core/types/Config.py
index 552faba4..bfa3cc46 100644
--- a/app/backend/core/types/Config.py
+++ b/app/backend/core/types/Config.py
@@ -11,11 +11,13 @@ class ModelsConfig(TypedDict):
     endpoint: str
     api_key: str
     api_version: str
-    max_tokens: int
+    max_output_tokens: int
+    max_input_tokens: int
 
 class ModelsDTO(TypedDict):
     model_name: str
-    max_tokens: int
+    max_output_tokens: int
+    max_input_tokens: int
     description: str
 
 class SSOConfig(TypedDict):
diff --git a/app/backend/init_app.py b/app/backend/init_app.py
index 90ebed16..821f49a6 100644
--- a/app/backend/init_app.py
+++ b/app/backend/init_app.py
@@ -24,19 +24,19 @@ def initApproaches(cfg: BackendConfig, repoHelper: Repository) -> Tuple[Chat, Br
     """
     brainstormllm = getModel(
                     models=cfg["models"],
-                    max_tokens =  4000,
+                    max_output_tokens =  4000,
                     n = 1,
                     streaming=False,
                     temperature=0.9)
     sumllm = getModel(
                     models=cfg["models"],
-                    max_tokens =  2000,
+                    max_output_tokens =  2000,
                     n = 1,
                     streaming=False,
                     temperature=0)
     chatlllm = getModel(
                     models=cfg["models"],
-                    max_tokens=4000,
+                    max_output_tokens=4000,
                     n = 1,
                     streaming=True,
                     temperature=0.7)
diff --git a/app/frontend/src/api/api.ts b/app/frontend/src/api/api.ts
index 5c379c92..e21241b5 100644
--- a/app/frontend/src/api/api.ts
+++ b/app/frontend/src/api/api.ts
@@ -14,7 +14,7 @@ export async function chatApi(options: ChatRequest): Promise<Response> {
             temperature: options.temperature,
             language: options.language,
             system_message: options.system_message,
-            max_tokens: options.max_tokens,
+            max_output_tokens: options.max_output_tokens,
             model: options.model
         })
     });
diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts
index 430ad10a..2f7642ee 100644
--- a/app/frontend/src/api/models.ts
+++ b/app/frontend/src/api/models.ts
@@ -18,7 +18,7 @@ export type ChatRequest = {
     history: ChatTurn[];
     temperature?: number;
     language?: string;
-    max_tokens?: number;
+    max_output_tokens?: number;
     system_message?: string;
     shouldStream?: boolean;
     model?: string;
@@ -50,7 +50,8 @@ export interface Frontend {
 }
 
 export interface Model {
-    max_tokens: number;
+    max_output_tokens: number;
+    max_input_tokens: number;
     model_name: string;
     description: string;
 }
diff --git a/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx b/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx
index c7041a85..db550ce1 100644
--- a/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx
+++ b/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx
@@ -20,14 +20,14 @@ import { LLMContext } from "../LLMSelector/LLMContextProvider";
 interface Props {
     temperature: number;
     setTemperature: (temp: number, id: number) => void;
-    max_tokens: number;
+    max_output_tokens: number;
     setMaxTokens: (maxTokens: number, id: number) => void;
     systemPrompt: string;
     setSystemPrompt: (systemPrompt: string, id: number) => void;
     current_id: number;
 }
 
-export const ChatsettingsDrawer = ({ temperature, setTemperature, max_tokens, setMaxTokens, systemPrompt, setSystemPrompt, current_id }: Props) => {
+export const ChatsettingsDrawer = ({ temperature, setTemperature, max_output_tokens, setMaxTokens, systemPrompt, setSystemPrompt, current_id }: Props) => {
     const [isOpen, setIsOpen] = useState<boolean>(false);
     const { t, i18n } = useTranslation();
     const { LLM } = useContext(LLMContext)
@@ -41,11 +41,10 @@ export const ChatsettingsDrawer = ({ temperature, setTemperature, max_tokens, se
     const max_tokensID = useId("input-max_tokens");
 
     const min_max_tokens = 10;
-    const max_max_tokens = LLM.max_tokens;
+    const max_max_tokens = LLM.max_output_tokens;
     const min_temp = 0;
     const max_temp = 1;
 
-
     const isEmptySystemPrompt = systemPrompt.trim() === "";
 
     const onTemperatureChange: SliderProps["onChange"] = (_, data) =>
@@ -144,12 +143,12 @@ export const ChatsettingsDrawer = ({ temperature, setTemperature, max_tokens, se
                             defaultValue={20}
                             onChange={onMaxtokensChange}
                             aria-valuetext={t('components.chattsettingsdrawer.max_lenght') + ` ist ${max_tokensID}`}
-                            value={max_tokens}
+                            value={max_output_tokens}
                             aria-labelledby={max_tokens_headerID}
                             id={max_tokensID} />
                         <br></br>
                         <Label htmlFor={max_tokensID} aria-hidden>
-                            {max_tokens} Tokens
+                            {max_output_tokens} Tokens
                         </Label>
                     </div>
                 </div>
diff --git a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
index d291e69d..e1f5913f 100644
--- a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
+++ b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
@@ -8,10 +8,10 @@ interface ILLMProvider {
 }
 
 export const DEFAULTLLM = "gpt-4o-mini";
-export const LLMContext = React.createContext<ILLMProvider>({ LLM: { model_name: DEFAULTLLM, max_tokens: 0, description: "" }, setLLM: () => { } });
+export const LLMContext = React.createContext<ILLMProvider>({ LLM: { model_name: DEFAULTLLM, max_output_tokens: 0, max_input_tokens: 0, description: "" }, setLLM: () => { } });
 
 export const LLMContextProvider = (props: React.PropsWithChildren<{}>) => {
-    const [LLM, setLLM] = useState<Model>({ model_name: DEFAULTLLM, max_tokens: 0, description: "" });
+    const [LLM, setLLM] = useState<Model>({ model_name: DEFAULTLLM, max_output_tokens: 0, max_input_tokens: 0, description: "" });
 
     return (
         <LLMContext.Provider value={{ LLM, setLLM }}>
diff --git a/app/frontend/src/components/QuestionInput/QuestionInput.tsx b/app/frontend/src/components/QuestionInput/QuestionInput.tsx
index 7787220e..801e2195 100644
--- a/app/frontend/src/components/QuestionInput/QuestionInput.tsx
+++ b/app/frontend/src/components/QuestionInput/QuestionInput.tsx
@@ -21,7 +21,7 @@ interface Props {
 export const QuestionInput = ({ onSend, disabled, placeholder, clearOnSend, tokens_used, token_limit_tracking = true, question, setQuestion }: Props) => {
     const { t, i18n } = useTranslation();
     const { LLM } = useContext(LLMContext)
-    const wordCount = LLM.max_tokens;
+    const wordCount = LLM.max_input_tokens;
     const getDescription = () => {
         let actual = countWords(question) + tokens_used;
         let text;
diff --git a/app/frontend/src/components/SumInput/SumInput.tsx b/app/frontend/src/components/SumInput/SumInput.tsx
index ad8348a8..8ee9bd30 100644
--- a/app/frontend/src/components/SumInput/SumInput.tsx
+++ b/app/frontend/src/components/SumInput/SumInput.tsx
@@ -24,7 +24,7 @@ export const SumInput = ({ onSend, disabled, placeholder, clearOnSend, tokens_us
     const [dragging, setDragging] = useState(false);
     const [file, setFile] = useState<File | undefined>(undefined);
     const { LLM } = useContext(LLMContext)
-    const wordCount = LLM.max_tokens;
+    const wordCount = LLM.max_input_tokens;
     const getDescription = () => {
         let actual = countWords(question) + tokens_used;
         let text;
diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx
index 7b913a71..b747c3fe 100644
--- a/app/frontend/src/pages/chat/Chat.tsx
+++ b/app/frontend/src/pages/chat/Chat.tsx
@@ -41,12 +41,12 @@ const Chat = () => {
     const [answers, setAnswers] = useState<[user: string, response: AskResponse, user_tokens: number][]>([]);
     const [question, setQuestion] = useState<string>("");
 
-    const temperature_pref = Number(localStorage.getItem(STORAGE_KEYS.CHAT_TEMPERATURE)) || 0.7;
-    const max_tokens_pref = Number(localStorage.getItem(STORAGE_KEYS.CHAT_MAX_TOKENS)) || 4000;
+    const temperature_pref = Number(localStorage.getItem(STORAGE_KEYS.CHAT_TEMPERATURE) || 0.7);
+    const max_output_tokens_pref = Number(localStorage.getItem(STORAGE_KEYS.CHAT_MAX_TOKENS)) || 4000;
     const systemPrompt_pref = localStorage.getItem(STORAGE_KEYS.CHAT_SYSTEM_PROMPT) || "";
 
     const [temperature, setTemperature] = useState(temperature_pref);
-    const [max_tokens, setMaxTokens] = useState(max_tokens_pref);
+    const [max_output_tokens, setMaxOutputTokens] = useState(max_output_tokens_pref);
     const [systemPrompt, setSystemPrompt] = useState<string>(systemPrompt_pref);
 
     const storage: indexedDBStorage = {
@@ -69,6 +69,9 @@ const Chat = () => {
 
     useEffect(() => {
         makeTokenCountRequest();
+        if (max_output_tokens > LLM.max_output_tokens && LLM.max_output_tokens != 0) {
+            onMaxTokensChanged(LLM.max_output_tokens, currentId)
+        }
     }, [debouncedSystemPrompt, LLM, makeTokenCountRequest]);
 
     useEffect(() => {
@@ -119,7 +122,7 @@ const Chat = () => {
         error && setError(undefined);
         setIsLoading(true);
         let askResponse: AskResponse = {} as AskResponse;
-        saveToDB([question, { ...askResponse, answer: "", tokens: 0 }, 0], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens, LLM.model_name)
+        saveToDB([question, { ...askResponse, answer: "", tokens: 0 }, 0], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_output_tokens, LLM.model_name)
         try {
             const history: ChatTurn[] = answers.map(a => ({ user: a[0], bot: a[1].answer }));
             const request: ChatRequest = {
@@ -128,7 +131,7 @@ const Chat = () => {
                 language: language,
                 temperature: temperature,
                 system_message: system ? system : "",
-                max_tokens: max_tokens,
+                max_output_tokens: max_output_tokens,
                 model: LLM.model_name
             };
 
@@ -166,7 +169,7 @@ const Chat = () => {
                     }
                 }
                 if (startId == currentId) {
-                    saveToDB([question, latestResponse, user_tokens], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens, LLM.model_name)
+                    saveToDB([question, latestResponse, user_tokens], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_output_tokens, LLM.model_name)
                 }
             } else {
                 const parsedResponse: AskResponse = await response.json();
@@ -175,7 +178,7 @@ const Chat = () => {
                 }
                 setAnswers([...answers, [question, parsedResponse, 0]]);
                 if (startId == currentId) {
-                    saveToDB([question, parsedResponse, 0], storage, currentId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens, LLM.model_name)
+                    saveToDB([question, parsedResponse, 0], storage, currentId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_output_tokens, LLM.model_name)
                 }
             }
         } catch (e) {
@@ -221,7 +224,7 @@ const Chat = () => {
     };
 
     const onMaxTokensChanged = (maxTokens: number, id: number) => {
-        setMaxTokens(maxTokens);
+        setMaxOutputTokens(maxTokens);
         localStorage.setItem(STORAGE_KEYS.CHAT_MAX_TOKENS, maxTokens.toString());
         changeMaxTokensInDb(maxTokens, id, storage);
     };
@@ -251,7 +254,7 @@ const Chat = () => {
                 <ChatsettingsDrawer
                     temperature={temperature}
                     setTemperature={onTemperatureChanged}
-                    max_tokens={max_tokens}
+                    max_output_tokens={max_output_tokens}
                     setMaxTokens={onMaxTokensChanged}
                     systemPrompt={systemPrompt}
                     setSystemPrompt={onSystemPromptChanged}
diff --git a/app/frontend/src/pages/layout/Layout.tsx b/app/frontend/src/pages/layout/Layout.tsx
index 22ba0b10..e3482765 100644
--- a/app/frontend/src/pages/layout/Layout.tsx
+++ b/app/frontend/src/pages/layout/Layout.tsx
@@ -30,12 +30,14 @@ export const Layout = () => {
     const [config, setConfig] = useState<ApplicationConfig>({
         models: [{
             "model_name": "KICC GPT",
-            "max_tokens": 128000,
+            "max_input_tokens": 128000,
+            "max_output_tokens": 128000,
             "description": ""
         },
         {
             "model_name": "Unknown GPT",
-            "max_tokens": 100,
+            "max_input_tokens": 128000,
+            "max_output_tokens": 128000,
             "description": ""
         }],
         frontend: {
diff --git a/app/frontend/src/service/storage.ts b/app/frontend/src/service/storage.ts
index 1a87518d..456262c9 100644
--- a/app/frontend/src/service/storage.ts
+++ b/app/frontend/src/service/storage.ts
@@ -34,7 +34,7 @@ export async function saveToDB(
     language?: string,
     temperature?: number,
     system_message?: string,
-    max_tokens?: number,
+    max_output_tokens?: number,
     model?: string
 ) {
     let openRequest = indexedDB.open(storage.db_name, storage.db_version);
@@ -59,7 +59,7 @@ export async function saveToDB(
                 result.Data.LastEdited = Date.now();
                 if (storage.objectStore_name === "chat") {
                     result.Options.system = system_message;
-                    result.Options.maxTokens = max_tokens;
+                    result.Options.maxTokens = max_output_tokens;
                     result.Options.temperature = temperature;
                 }
                 data = result;
@@ -67,8 +67,8 @@ export async function saveToDB(
                 // if the chat does not exist in the DB
                 let name: string = "";
                 let new_idcounter = id_counter;
-                if (language != undefined && temperature != undefined && system_message != undefined && max_tokens != undefined && model != undefined) {
-                    name = await (await getChatName(a, language, temperature, system_message, max_tokens, model)).content;
+                if (language != undefined && temperature != undefined && system_message != undefined && max_output_tokens != undefined && model != undefined) {
+                    name = await (await getChatName(a, language, temperature, system_message, max_output_tokens, model)).content;
                     name = name.replaceAll('"', "").replaceAll(".", "");
                 }
                 if (storage.objectStore_name === "chat") {
@@ -78,7 +78,7 @@ export async function saveToDB(
                     data = {
                         Data: { Answers: [a], Name: name, LastEdited: Date.now() },
                         id: new_idcounter,
-                        Options: { favorite: false, system: system_message, maxTokens: max_tokens, temperature: temperature }
+                        Options: { favorite: false, system: system_message, maxTokens: max_output_tokens, temperature: temperature }
                     };
                 } else {
                     data = {
@@ -100,7 +100,7 @@ export async function saveToDB(
     };
 }
 
-export async function getChatName(answers: any, language: string, temperature: number, system_message: string, max_tokens: number, model: string) {
+export async function getChatName(answers: any, language: string, temperature: number, system_message: string, max_output_tokens: number, model: string) {
     const history: ChatTurn[] = [{ user: answers[0], bot: answers[1].answer }];
     const request: ChatRequest = {
         history: [
@@ -114,7 +114,7 @@ export async function getChatName(answers: any, language: string, temperature: n
         language: language,
         temperature: temperature,
         system_message: system_message,
-        max_tokens: max_tokens,
+        max_output_tokens: max_output_tokens,
         model: model
     };
     const response = await chatApi(request);
diff --git a/config/default.json b/config/default.json
index e1167fc7..87ff5c3c 100644
--- a/config/default.json
+++ b/config/default.json
@@ -34,7 +34,8 @@
                 "model_name": "TODO",
                 "endpoint": "TODO",
                 "api_key": "TODO",
-                "max_tokens": 128000
+                "max_input_tokens": 128000,
+                "max_output_tokens": 3000
             },
             {
                 "type": "AZURE",
@@ -43,7 +44,8 @@
                 "endpoint": "TODO",
                 "api_key": "TODO",
                 "api_version": "TODO",
-                "max_tokens": 0
+                "max_input_tokens": 128000,
+                "max_output_tokens": 4000
             }
         ]
     }
diff --git a/config/mucgpt_config.schema.json b/config/mucgpt_config.schema.json
index ff4d1c89..526b53b5 100644
--- a/config/mucgpt_config.schema.json
+++ b/config/mucgpt_config.schema.json
@@ -243,7 +243,8 @@
                             "model_name",
                             "endpoint",
                             "api_key",
-                            "max_tokens",
+                            "max_input_tokens",
+                            "max_output_tokens",
                             "deployment",
                             "api_version"
                         ],
@@ -277,14 +278,22 @@
                                     "BLABLUBLAUBLAUBLA"
                                 ]
                             },
-                            "max_tokens": {
+                            "max_input_tokens": {
                                 "type": "integer",
-                                "title": "The context length of the LLM",
+                                "title": "The input context length of the LLM",
                                 "examples": [
                                     128000,
                                     0
                                 ]
                             },
+                            "max_output_tokens": {
+                                "type": "integer",
+                                "title": "The output context length of the LLM",
+                                "examples": [
+                                    8192,
+                                    0
+                                ]
+                            },
                             "deployment": {
                                 "type": "string",
                                 "default": "",
@@ -307,7 +316,8 @@
                             "model_name": "mucgpt-mini",
                             "endpoint": "mucgptmini.openai.azure.com",
                             "api_key": "BALBLBLABUALB",
-                            "max_tokens": 128000
+                            "max_input_tokens": 128000,
+                            "max_output_tokens": 8192
                         },
                         {
                             "type": "AZURE",
@@ -316,7 +326,8 @@
                             "endpoint": "mucgpt-maxi.openai.azure.com",
                             "api_key": "BALBABUALB",
                             "api_version": "2024-01",
-                            "max_tokens": 1000000
+                            "max_input_tokens": 128000,
+                            "max_output_tokens": 8192
                         }]
                     },
                     "examples": [
@@ -325,7 +336,8 @@
                             "model_name": "mucgpt-mini",
                             "endpoint": "mucgptmini.openai.azure.com",
                             "api_key": "BALBLBLABUALB",
-                            "max_tokens": 128000
+                            "max_input_tokens": 128000,
+                            "max_output_tokens": 8192
                         },
                         {
                             "type": "AZURE",
@@ -334,7 +346,8 @@
                             "endpoint": "mucgpt-maxi.openai.azure.com",
                             "api_key": "BALBABUALB",
                             "api_version": "2024-01",
-                            "max_tokens": 1000000
+                            "max_input_tokens": 128000,
+                            "max_output_tokens": 8192
                         }]
                     ]
                 }
@@ -366,7 +379,8 @@
                     "model_name": "mucgpt-mini",
                     "endpoint": "mucgptmini.openai.azure.com",
                     "api_key": "BALBLBLABUALB",
-                    "max_tokens": 128000
+                    "max_input_tokens": 128000,
+                    "max_output_tokens": 8192
                 },
                 {
                     "type": "AZURE",
@@ -375,7 +389,8 @@
                     "endpoint": "mucgpt-maxi.openai.azure.com",
                     "api_key": "BALBABUALB",
                     "api_version": "2024-01",
-                    "max_tokens": 1000000
+                    "max_input_tokens": 128000,
+                    "max_output_tokens": 8192
                 }]
             }]
         }
@@ -414,7 +429,8 @@
                 "model_name": "mucgpt-mini",
                 "endpoint": "mucgptmini.openai.azure.com",
                 "api_key": "BALBLBLABUALB",
-                "max_tokens": 128000
+                "max_input_tokens": 128000,
+                "max_output_tokens": 8192
             },
             {
                 "type": "AZURE",
@@ -423,7 +439,8 @@
                 "endpoint": "mucgpt-maxi.openai.azure.com",
                 "api_key": "BALBABUALB",
                 "api_version": "2024-01",
-                "max_tokens": 1000000
+                "max_input_tokens": 128000,
+                "max_output_tokens": 8192
             }]
         }
     }]
diff --git a/tests/integration/test_app.py b/tests/integration/test_app.py
index f921ff39..51ff83e4 100644
--- a/tests/integration/test_app.py
+++ b/tests/integration/test_app.py
@@ -45,17 +45,6 @@ def fake_response(http_code):
 )
 
 
-
-@pytest.mark.asyncio
-@pytest.mark.integration
-async def test_missing_env_vars():
-    quart_app = app.create_app()
-
-    with pytest.raises(quart.testing.app.LifespanError) as exc_info:
-        async with quart_app.test_app() as test_app:
-            test_app.test_client()
-        assert str(exc_info.value) == "Lifespan failure in startup. ''AZURE_OPENAI_EMB_DEPLOYMENT''"
-
 @pytest.mark.asyncio
 @pytest.mark.integration
 async def test_index(client):
@@ -195,7 +184,7 @@ async def test_chatstream(client, mocker):
     mocker.patch("chat.chat.Chat.run_without_streaming", mock.AsyncMock(return_value=streaming_generator))
     data = {
         "temperature": 0.1,
-        "max_tokens": 2400,
+        "max_output_tokens": 2400,
         "system_message": "",
         "model": "TEST_MODEL",
         "history": [{"user": "hi"}]
diff --git a/tests/integration/test_config.json b/tests/integration/test_config.json
index f33398e5..0a5445bd 100644
--- a/tests/integration/test_config.json
+++ b/tests/integration/test_config.json
@@ -35,7 +35,8 @@
                 "deployment": "NOT USED",
                 "endpoint": "NOT USED",
                 "api_key":"NOT USED",
-                "max_tokens": 128000,
+                "max_input_tokens": 128000,
+                "max_output_tokens": 8192,
                 "api_version": "NOT USED",
                 "description": "NOT USED"
             }
diff --git a/tests/unit/test_llmhelper.py b/tests/unit/test_llmhelper.py
index 7ec597ec..ad020373 100644
--- a/tests/unit/test_llmhelper.py
+++ b/tests/unit/test_llmhelper.py
@@ -14,7 +14,8 @@ def setUp(self):
                 "model_name": "model1",
                 "endpoint": "TODO",
                 "api_key": "TODO",
-                "max_tokens": 128000
+                "max_input_tokens": 128000,
+                "max_output_tokens": 8192
             }
         self.model2 ={
                 "type": "AZURE",
@@ -23,14 +24,16 @@ def setUp(self):
                 "api_version": "preview",
                 "endpoint": "TODO",
                 "api_key": "TODO",
-                "max_tokens": 128000
+                "max_input_tokens": 128000,
+                "max_output_tokens": 8192
             }
         self.model3 ={
                 "type": "TODO",
                 "model_name": "model2",
                 "endpoint": "TODO",
                 "api_key": "TODO",
-                "max_tokens": 128000
+                "max_input_tokens": 128000,
+                "max_output_tokens": 8192
             }
 
     @pytest.mark.asyncio    
@@ -39,7 +42,7 @@ def test_getModel_returns_llm(self):
         
     
         model = getModel(models=[self.model1, self.model2],
-                         max_tokens=10,
+                         max_output_tokens=10,
                          n=1,
                          temperature=0.5,
                          streaming=True)
@@ -50,7 +53,7 @@ def test_getModel_returns_llm(self):
     def test_getModel_wrong_type(self):
         with self.assertRaises(ModelsConfigurationException):
             getModel(models=[self.model3],
-                         max_tokens=10,
+                         max_output_tokens=10,
                          n=1,
                          temperature=0.5,
                          streaming=True)
@@ -59,7 +62,7 @@ def test_getModel_wrong_type(self):
     @pytest.mark.unit
     def test_getModel_azure_first(self):
         model = getModel(models=[self.model2, self.model1],
-                         max_tokens=10,
+                         max_output_tokens=10,
                          n=1,
                          temperature=0.5,
                          streaming=True)
@@ -70,7 +73,7 @@ def test_getModel_azure_first(self):
     def test_getModel_no_model(self):
         with self.assertRaises(ModelsConfigurationException):
             getModel(models=[],
-                            max_tokens=10,
+                            max_output_tokens=10,
                             n=1,
                             temperature=0.5,
                             streaming=True)
@@ -79,7 +82,7 @@ def test_getModel_no_model(self):
     @pytest.mark.unit    
     def test_getModel_configurable_fields(self):
         model = getModel(models=[self.model1, self.model2],
-                         max_tokens=10,
+                         max_output_tokens=10,
                          n=1,
                         temperature=0.5,
                          streaming=True)
@@ -91,7 +94,7 @@ def test_getModel_configurable_fields(self):
     @pytest.mark.unit    
     def test_getModel_configurable_alternatives(self):
         model = getModel(models=[self.model1, self.model2],
-                         max_tokens=10,
+                         max_output_tokens=10,
                          n=1,
                          temperature=0.5,
                          streaming=True)
@@ -101,7 +104,7 @@ def test_getModel_configurable_alternatives(self):
     @pytest.mark.unit    
     def test_getModel_fake_llm(self):
         model = getModel(models=[self.model1, self.model2],
-                         max_tokens=10,
+                         max_output_tokens=10,
                          n=1,
                          temperature=0.5,
                          streaming=True)

From e8d339d5a603b21050b9f65be6fdf407fdb5fc88 Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Wed, 11 Sep 2024 14:53:24 +0200
Subject: [PATCH 2/6] :bug: added version to /config

---
 app/backend/app.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/backend/app.py b/app/backend/app.py
index 28205749..5c54baa7 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -154,7 +154,8 @@ async def getConfig():
         models_dto_list.append(dto)
     return jsonify({
         "frontend": frontend_features,
-        "models": models_dto_list
+        "models": models_dto_list,
+        "version": cfg["configuration_features"]["version"]
     })
 
 @bp.route("/statistics", methods=["GET"])

From e2b898774e19b8c6add8d4871e350da9ae66d2de Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Wed, 11 Sep 2024 16:06:55 +0200
Subject: [PATCH 3/6] :bookmark: Version 1.1.4

---
 app/frontend/package.json                  |  2 +-
 app/frontend/src/pages/version/Version.tsx | 20 +++++++++++++++++++-
 config/base.json                           |  2 +-
 tests/integration/base.json                |  2 +-
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/app/frontend/package.json b/app/frontend/package.json
index fb1e576a..72293ef3 100644
--- a/app/frontend/package.json
+++ b/app/frontend/package.json
@@ -1,7 +1,7 @@
 {
   "name": "mucgpt",
   "private": true,
-  "version": "1.1.3",
+  "version": "1.1.4",
   "type": "module",
   "engines": {
     "node": ">=16.0.0"
diff --git a/app/frontend/src/pages/version/Version.tsx b/app/frontend/src/pages/version/Version.tsx
index 56b304cb..31fbb101 100644
--- a/app/frontend/src/pages/version/Version.tsx
+++ b/app/frontend/src/pages/version/Version.tsx
@@ -32,7 +32,25 @@ const Version = () => {
 
             <div className={styles.versionRoot}>
                 <h1 className={styles.header}>{t('version.header')}</h1>
-                <Accordion multiple collapsible navigation="linear" defaultOpenItems="7">
+                <Accordion multiple collapsible navigation="linear" defaultOpenItems="8">
+                    <AccordionItem value="8">
+                        <AccordionHeader>[1.1.4] 11.09.2024</AccordionHeader>
+                        <AccordionPanel >
+                            <div className={styles.panel}>
+                                <h3>{t('version.added')}</h3>
+                                <h3>{t('version.fixed')}</h3>
+                                <ul>
+                                    <li>
+                                        Version wird wieder richtig gespeichert und in den Einstellungen angezeigt.
+                                    </li>
+                                    <li>
+                                        Maximale Tokens aus der Config aufgeteilt in Input- und Output- Tokens. Dadurch laufen Modelle mit kleineren Contextfenster (wie z.B. Mistral) nicht mehr in einen Fehler.
+                                    </li>
+                                </ul>
+                                <h3>{t('version.changed')}</h3>
+                            </div>
+                        </AccordionPanel>
+                    </AccordionItem >
                     <AccordionItem value="7">
                         <AccordionHeader>[1.1.3] 28.08.2024</AccordionHeader>
                         <AccordionPanel >
diff --git a/config/base.json b/config/base.json
index 15b52047..ccd05e57 100644
--- a/config/base.json
+++ b/config/base.json
@@ -1,3 +1,3 @@
 {
-    "version": "1.1.3"
+    "version": "1.1.4"
 }
\ No newline at end of file
diff --git a/tests/integration/base.json b/tests/integration/base.json
index 15b52047..ccd05e57 100644
--- a/tests/integration/base.json
+++ b/tests/integration/base.json
@@ -1,3 +1,3 @@
 {
-    "version": "1.1.3"
+    "version": "1.1.4"
 }
\ No newline at end of file

From c6b7ec7cc4907dc032a035fcb24a6a1e49c58ebc Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Wed, 11 Sep 2024 16:19:35 +0200
Subject: [PATCH 4/6] :pencil: Typo

---
 app/backend/backend.py                     | 260 ---------------------
 app/frontend/src/pages/version/Version.tsx |   4 +-
 2 files changed, 2 insertions(+), 262 deletions(-)
 delete mode 100644 app/backend/backend.py

diff --git a/app/backend/backend.py b/app/backend/backend.py
deleted file mode 100644
index 4e5bae6c..00000000
--- a/app/backend/backend.py
+++ /dev/null
@@ -1,260 +0,0 @@
-import io
-import logging
-from contextlib import asynccontextmanager
-from typing import List, cast
-
-from fastapi import FastAPI, File, Form, Header, HTTPException, Request, UploadFile
-from fastapi.responses import (
-    FileResponse,
-    HTMLResponse,
-    JSONResponse,
-    RedirectResponse,
-    StreamingResponse,
-)
-from fastapi.staticfiles import StaticFiles
-from langchain_core.messages.human import HumanMessage
-from pydantic_core import from_json
-
-from core.authentification import AuthentificationHelper, AuthError
-from core.helper import format_as_ndjson
-from core.modelhelper import num_tokens_from_messages
-from core.types.AppConfig import AppConfig
-from core.types.ChatRequest import ChatRequest
-from core.types.Config import ModelsConfig, ModelsDTO
-from core.types.countresult import CountResult
-from core.types.SumRequest import SumRequest
-from init_app import initApp
-
-
-@asynccontextmanager
-async def lifespan(backend: FastAPI):
-    backend.state.app_config = await initApp()
-    yield
-
-
-backend = FastAPI(title="MUCGPT", lifespan=lifespan)
-backend.mount("/static", StaticFiles(directory="static"), name="static")
-backend.state.app_config = None
-
-
-@backend.exception_handler(AuthError)
-async def handleAuthError(error: AuthError):
-    return error.error, error.status_code
-
-
-@backend.get("/", include_in_schema=False)
-async def index(request: Request):
-    get_config_and_authentificate(request)
-    with open("static/index.html") as f:
-        return HTMLResponse(content=f.read())
-
-
-@backend.get("/favicon.ico", include_in_schema=False)
-async def favicon():
-    return RedirectResponse(url="/static/favicon.ico")
-
-
-@backend.get("/assets/{path}")
-async def assets(request: Request, path: str):
-    get_config_and_authentificate(request)
-    return RedirectResponse(url="/static/assets/" + path)
-
-@backend.post("/sum")
-async def sum(
-    body: str = Form(...),
-    file: UploadFile = File(None), 
-    id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token"),
-    access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")  
-):
-    cfg = get_config_and_authentificate(access_token=access_token)
-    department = get_department(id_token=id_token)
-    sumRequest = SumRequest.model_validate(from_json(body))
-    text =sumRequest.text if file is None else None
-    if(file is not None):
-        file_content = io.BytesIO(await file.read())
-    else:
-        file_content = None
-    try:
-        impl = cfg["sum_approaches"]
-        splits = impl.split(detaillevel=sumRequest.detaillevel, file=file_content, text=text)
-        r = await impl.summarize(
-            splits=splits,
-            department=department,
-            language=sumRequest.language,
-            model_name=sumRequest.model,
-        )
-        return JSONResponse(content=r)
-    except Exception as e:
-        logging.exception("Exception in /sum")
-        return JSONResponse({"error": str(e)}, status_code=500)
-#TODO remove for prod
-@backend.exception_handler(HTTPException)
-async def http_exception_handler(request: Request, exc: HTTPException):
-    logging.error(f"Error processing request: {exc.detail}")
-    return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
-
-
-
-@backend.post("/brainstorm")
-async def brainstorm(request: Request,
-                    id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token"),
-                    access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")):
-    cfg = get_config_and_authentificate(access_token=access_token)
-    try:
-        request_json = await request.json()
-    except ValueError:
-        return JSONResponse(content={"error": "request must be json"}, status_code=415)
-    department = get_department(id_token=id_token)
-
-    try:
-        impl = cfg["brainstorm_approaches"]
-        r = await impl.brainstorm(
-            topic=request_json["topic"],
-            language=request_json["language"] or "Deutsch",
-            department=department,
-            model_name=request_json["model"] or "gpt-4o-mini",
-        )
-        return JSONResponse(r)
-    except Exception as e:
-        logging.exception("Exception in /brainstorm")
-        msg = (
-            "Momentan liegt eine starke Auslastung vor. Bitte in einigen Sekunden erneut versuchen."
-            if "Rate limit" in str(e)
-            else str(e)
-        )
-        return JSONResponse({"error": msg}), 500
-
-
-@backend.post("/chat_stream")
-async def chat_stream(request: ChatRequest,
-                      access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token"),
-                      id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token")):
-    cfg = get_config_and_authentificate(access_token=access_token)
-    department = get_department(id_token=id_token)
-
-    try:
-        impl = cfg["chat_approaches"]
-        response_generator = impl.run_with_streaming(
-            history=request.history,
-            temperature=request.temperature,
-            max_tokens=request.max_tokens,
-            system_message=request.system_message,
-            model=request.model,
-            department=department,
-        )
-        response = StreamingResponse(format_as_ndjson(response_generator))
-        response.timeout = None  # type: ignore
-        return response
-    except Exception as e:
-        logging.exception("Exception in /chat")
-        return JSONResponse({"error": str(e)}), 500
-
-
-@backend.post("/chat")
-async def chat(request: ChatRequest,
-               access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token"),
-               id_token: str = Header(None, alias= "X-Ms-Token-Lhmsso-Id-Token")):
-    cfg = get_config_and_authentificate(access_token=access_token)
-    department = get_department(id_token=id_token)
-    try:
-        impl = cfg["chat_approaches"]
-        chatResult = impl.run_without_streaming(
-            history=request.history,
-            temperature=request.temperature,
-            max_tokens=request.max_tokens,
-            system_message=request.system_message,
-            department=department,
-            model_name=request.model,
-        )
-        return JSONResponse(chatResult)
-    except Exception as e:
-        logging.exception("Exception in /chat")
-        return JSONResponse({"error": str(e)}), 500
-
-
-@backend.get("/config")
-async def getConfig(access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")):
-    cfg = get_config_and_authentificate(access_token)
-    frontend_features = cfg["configuration_features"]["frontend"]
-    models = cast(
-        List[ModelsConfig], cfg["configuration_features"]["backend"]["models"]
-    )
-    models_dto_list = []
-    for model in models:
-        dto = ModelsDTO(
-            model_name=model["model_name"],
-            max_tokens=model["max_tokens"],
-            description=model["description"],
-        )
-        models_dto_list.append(dto)
-    return JSONResponse({"frontend": frontend_features, "models": models_dto_list})
-
-
-@backend.get("/statistics")
-async def getStatistics(access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")):
-    try:
-        cfg = get_config_and_authentificate(access_token)
-        repo = cfg["repository"]
-        sum_by_department = repo.sumByDepartment()
-        avg_by_department = repo.avgByDepartment()
-        return JSONResponse({"sum": float(sum_by_department), "avg": float(avg_by_department)})
-    except Exception as e:
-        return JSONResponse(content={"error": e}, status_code=404)
-
-
-@backend.post("/counttokens")
-async def counttokens(request: Request, access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")  ):
-    get_config_and_authentificate(access_token)
-    if not request.json():
-        return JSONResponse({"error": "request must be json"}, status_code=415)
-    request_json = await request.json()
-    message = request_json["text"] or ""
-    model = request_json["model"]["model_name"] or "gpt-4o-mini"
-    counted_tokens = num_tokens_from_messages([HumanMessage(message)], model)
-    return JSONResponse(CountResult(count=counted_tokens))
-
-
-@backend.get("/statistics/export")
-async def getStatisticsCSV(request: Request, 
-                           access_token: str = Header(None, alias="X-Ms-Token-Lhmsso-Access-Token")):
-    try:
-        cfg = get_config_and_authentificate(access_token)
-        repo = cfg["repository"]
-        export = repo.export()
-        return FileResponse(export, filename="statistics.csv", as_attachment=True)
-    except Exception as e:
-        return JSONResponse(content={"error": e}, status_code=404)
-
-
-@backend.get("/health")
-def health_check():
-    return "OK"
-
-
-def get_config():
-    return cast(AppConfig, backend.state.app_config)
-
-
-def get_config_and_authentificate(access_token):
-    cfg = get_config()
-    if cfg["configuration_features"]["backend"]["enable_auth"]:
-        ensure_authentification(access_token=access_token)
-    return cfg
-
-
-def ensure_authentification(access_token):
-    cfg = get_config()
-    auth_client: AuthentificationHelper = cfg["authentification_client"]
-    claims = auth_client.authentificate(accesstoken=access_token)
-    return auth_client, claims
-
-
-def get_department(id_token):
-    cfg = get_config()
-
-    if cfg["configuration_features"]["backend"]["enable_auth"]:
-        auth_client: AuthentificationHelper = cfg["authentification_client"]
-        id_claims = auth_client.decode(id_token)
-        return auth_client.getDepartment(claims=id_claims)
-    else:
-        return None
diff --git a/app/frontend/src/pages/version/Version.tsx b/app/frontend/src/pages/version/Version.tsx
index 31fbb101..c3b7078d 100644
--- a/app/frontend/src/pages/version/Version.tsx
+++ b/app/frontend/src/pages/version/Version.tsx
@@ -41,10 +41,10 @@ const Version = () => {
                                 <h3>{t('version.fixed')}</h3>
                                 <ul>
                                     <li>
-                                        Version wird wieder richtig gespeichert und in den Einstellungen angezeigt.
+                                        Versionsnummer wird wieder richtig gespeichert und in den Einstellungen angezeigt.
                                     </li>
                                     <li>
-                                        Maximale Tokens aus der Config aufgeteilt in Input- und Output- Tokens. Dadurch laufen Modelle mit kleineren Contextfenster (wie z.B. Mistral) nicht mehr in einen Fehler.
+                                        Maximale Tokens aus der Config aufgeteilt in Input- und Output-Tokens. Dadurch laufen Modelle mit kleineren Contextfenster (wie z.B. Mistral) nicht mehr in einen Fehler.
                                     </li>
                                 </ul>
                                 <h3>{t('version.changed')}</h3>

From da4a38a7d6323bad1a66244efb830554e5abc7d8 Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Wed, 11 Sep 2024 16:25:31 +0200
Subject: [PATCH 5/6] :shirt: make ruff happy

---
 tests/integration/test_app.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/integration/test_app.py b/tests/integration/test_app.py
index 51ff83e4..fdcbe0f0 100644
--- a/tests/integration/test_app.py
+++ b/tests/integration/test_app.py
@@ -4,12 +4,10 @@
 
 import PyPDF2
 import pytest
-import quart.testing.app
 from httpx import Request, Response
 from openai import BadRequestError
 from quart.datastructures import FileStorage
 
-import app
 from brainstorm.brainstormresult import BrainstormResult
 from core.types.Chunk import Chunk
 from summarize.summarizeresult import SummarizeResult

From 445bb6a073a858c934d3f50f6fcf53e32b7ec957 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 11 Sep 2024 17:25:26 +0200
Subject: [PATCH 6/6] =?UTF-8?q?=F0=9F=9A=91=20fix=20max=20tokens=20not=20w?=
 =?UTF-8?q?orking,=20on=20other=20then=20default=20llm?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/backend/core/llmhelper.py        | 46 +++++++++++++++++++++++++---
 app/backend/core/types/LlmConfigs.py |  1 -
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/app/backend/core/llmhelper.py b/app/backend/core/llmhelper.py
index 09d2d56a..3bc107c6 100644
--- a/app/backend/core/llmhelper.py
+++ b/app/backend/core/llmhelper.py
@@ -63,7 +63,27 @@ def getModel(models: List[ModelsConfig],
                                 n=n,
                                 streaming=streaming,
                                 temperature=temperature,
-                                )
+                                ).configurable_fields(
+                        temperature=ConfigurableField(
+                                id="llm_temperature",
+                                name="LLM Temperature",
+                                description="The temperature of the LLM",
+                        ),
+                        max_tokens= ConfigurableField(
+                                id="llm_max_tokens",
+                                name="LLM max Tokens",
+                                description="The token Limit of the LLM",
+                        ),
+                        streaming = ConfigurableField(
+                                id="llm_streaming",
+                                name="Streaming",
+                                description="Should the LLM Stream"),
+                        callbacks = ConfigurableField(
+                                id="llm_callbacks",
+                                name="Callbacks",
+                                description="Callbacks for the llm")
+                                
+                        )
                 elif model["type"] == "OPENAI":
                         alternative = ChatOpenAI(
                                         model=model["model_name"],
@@ -73,6 +93,26 @@ def getModel(models: List[ModelsConfig],
                                         n=n,
                                         streaming=streaming,
                                         temperature=temperature,
+                        ).configurable_fields(
+                        temperature=ConfigurableField(
+                                id="llm_temperature",
+                                name="LLM Temperature",
+                                description="The temperature of the LLM",
+                        ),
+                        max_tokens= ConfigurableField(
+                                id="llm_max_tokens",
+                                name="LLM max Tokens",
+                                description="The token Limit of the LLM",
+                        ),
+                        streaming = ConfigurableField(
+                                id="llm_streaming",
+                                name="Streaming",
+                                description="Should the LLM Stream"),
+                        callbacks = ConfigurableField(
+                                id="llm_callbacks",
+                                name="Callbacks",
+                                description="Callbacks for the llm")
+                                
                         )
                 alternatives[model["model_name"]] = alternative
         llm = llm.configurable_fields(
@@ -86,10 +126,6 @@ def getModel(models: List[ModelsConfig],
                                 name="LLM max Tokens",
                                 description="The token Limit of the LLM",
                         ),
-                        openai_api_key = ConfigurableField(
-                                id="llm_api_key",
-                                name="The api key",
-                                description="The api key"),
                         streaming = ConfigurableField(
                                 id="llm_streaming",
                                 name="Streaming",
diff --git a/app/backend/core/types/LlmConfigs.py b/app/backend/core/types/LlmConfigs.py
index 9d23d498..7326f320 100644
--- a/app/backend/core/types/LlmConfigs.py
+++ b/app/backend/core/types/LlmConfigs.py
@@ -10,6 +10,5 @@ class LlmConfigs(TypedDict, total=False):
     llm: NotRequired[str] # one of the SupportedModels
     llm_max_tokens: NotRequired[int]
     llm_temperature: NotRequired[float]
-    llm_api_key: NotRequired[str]
     llm_streaming: NotRequired[bool]
     llm_callbacks: NotRequired[List]
\ No newline at end of file