From 81d5da1ebe34148a7973f9c4a0b92fe0ead06aa1 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 31 Jul 2024 17:29:19 +0200
Subject: [PATCH 01/34] =?UTF-8?q?=F0=9F=8E=86=20first=20local=20running=20?=
 =?UTF-8?q?version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .dockerignore                                |   8 ++
 Dockerfile                                   |  16 +++
 app/backend/app.py                           |  16 +--
 app/backend/brainstorm/brainstorm.py         |   5 +-
 app/backend/chat/chat.py                     |  13 +-
 app/backend/core/llmhelper.py                | 109 ++++++++++-------
 app/backend/core/types/AppConfig.py          |   6 -
 app/backend/core/types/AzureChatGPTConfig.py |  14 ---
 app/backend/core/types/Config.py             |  21 ++++
 app/backend/core/types/SupportedModels.py    |   7 --
 app/backend/gunicorn.conf.py                 |   2 +-
 app/backend/init_app.py                      | 120 +++++--------------
 app/backend/requirements.txt                 |   4 +-
 app/backend/ressources/test.json             |  21 ----
 app/backend/summarize/summarize.py           |   5 +-
 app/backend/text.py                          |   2 -
 docker-compose.yml                           |  13 ++
 17 files changed, 160 insertions(+), 222 deletions(-)
 create mode 100644 .dockerignore
 create mode 100644 Dockerfile
 delete mode 100644 app/backend/core/types/AzureChatGPTConfig.py
 delete mode 100644 app/backend/core/types/SupportedModels.py
 delete mode 100644 app/backend/ressources/test.json
 delete mode 100644 app/backend/text.py
 create mode 100644 docker-compose.yml

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 00000000..3a0dd85d
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,8 @@
+.git*
+**/*.pyc
+.venv/
+/tests
+/notebooks
+/infra
+*.ipynb
+**/node_modules
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..4b7176f4
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,16 @@
+# syntax=docker/dockerfile:1
+FROM node:19-alpine AS builder
+
+WORKDIR /build
+COPY app/ .
+WORKDIR /build/frontend
+RUN npm install
+RUN npm run build
+
+FROM python:3.11
+WORKDIR /code
+COPY --from=builder /build/backend .
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+EXPOSE 8000
+
+CMD ["python","-m","gunicorn","main:app"]
\ No newline at end of file
diff --git a/app/backend/app.py b/app/backend/app.py
index 9d50ce57..7a2b5449 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -1,7 +1,6 @@
 import json
 import logging
 import os
-import time
 from typing import cast
 
 from azure.monitor.opentelemetry import configure_azure_monitor
@@ -162,10 +161,9 @@ async def counttokens():
     if not request.is_json:
         return jsonify({"error": "request must be json"}), 415
     
-    model = cfg["model_info"]["model"]
     request_json = await request.get_json()
     message=request_json['text'] or ""
-    counted_tokens = num_tokens_from_message(message,model)
+    counted_tokens = num_tokens_from_message(message,"gpt-35-turbo") #TODO use correct model
     return jsonify(CountResult(count=counted_tokens))
 
 @bp.route("/statistics/export", methods=["GET"])
@@ -207,18 +205,6 @@ def get_department(request: Request):
     else:
         return None
     
-
-
-@bp.before_request
-async def ensure_openai_token():
-    cfg = get_config()
-    openai_token = cfg["model_info"]["openai_token"]
-    if openai_token.expires_on < time.time() + 60:
-        openai_token = await cfg["azure_credential"].get_token("https://cognitiveservices.azure.com/.default")
-        # updates tokens, the approaches should get the newest version of the token via reference 
-        cfg["model_info"]["openai_token"] = openai_token
-        cfg["model_info"]["openai_api_key"] = openai_token.token
-
 @bp.before_app_serving
 async def setup_clients():
     current_app.config[APPCONFIG_KEY] = await initApp()
diff --git a/app/backend/brainstorm/brainstorm.py b/app/backend/brainstorm/brainstorm.py
index 9a642516..79acc0ee 100644
--- a/app/backend/brainstorm/brainstorm.py
+++ b/app/backend/brainstorm/brainstorm.py
@@ -7,7 +7,6 @@
 
 from brainstorm.brainstormresult import BrainstormResult
 from core.datahelper import Repository, Requestinfo
-from core.types.AzureChatGPTConfig import AzureChatGPTConfig
 from core.types.Config import ApproachConfig
 from core.types.LlmConfigs import LlmConfigs
 
@@ -56,10 +55,9 @@ class Brainstorm:
     Text: 
     {brainstorm}"""
 
-    def __init__(self, llm: RunnableSerializable, config: ApproachConfig, model_info: AzureChatGPTConfig, repo: Repository):
+    def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repository):
         self.llm = llm
         self.config = config
-        self.model_info = model_info
         self.repo = repo
     
     def getBrainstormPrompt(self) -> PromptTemplate:
@@ -91,7 +89,6 @@ async def brainstorm(self, topic: str, language: str, department: Optional[str])
         """
         # configure
         config: LlmConfigs = {
-            "llm_api_key": self.model_info["openai_api_key"]
         }
         llm = self.llm.with_config(configurable=config)
         
diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py
index 7e17fbc6..dba2f2dc 100644
--- a/app/backend/chat/chat.py
+++ b/app/backend/chat/chat.py
@@ -17,7 +17,6 @@
 from chat.chatresult import ChatResult
 from core.datahelper import Repository, Requestinfo
 from core.modelhelper import num_tokens_from_message, num_tokens_from_messages
-from core.types.AzureChatGPTConfig import AzureChatGPTConfig
 from core.types.Chunk import Chunk, ChunkInfo
 from core.types.Config import ApproachConfig
 from core.types.LlmConfigs import LlmConfigs
@@ -27,12 +26,10 @@ class Chat:
     """Chat with a llm via multiple steps.
     """
 
-    def __init__(self, llm: RunnableSerializable, config: ApproachConfig,  model_info: AzureChatGPTConfig, repo: Repository, chatgpt_model: str):
+    def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repository):
         self.llm = llm
         self.config = config
-        self.model_info = model_info
         self.repo = repo
-        self.chatgpt_model = chatgpt_model
 
     async def create_coroutine(self, history: "Sequence[dict[str, str]]", llm: RunnableSerializable, system_message: Optional[str]) -> Any:
         """Calls the llm in streaming mode
@@ -69,7 +66,6 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i
         handler = AsyncIteratorCallbackHandler()
         config: LlmConfigs = {
             "llm_max_tokens": max_tokens,
-            "llm_api_key": self.model_info["openai_api_key"],
             "llm_temperature": temperature,
             "llm_streaming": True,
             "llm_callbacks": [handler],
@@ -102,15 +98,15 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i
             history[-1]["bot"] = result
             system_message_tokens = 0
             if(system_message and  system_message.strip() !=""):
-                system_message_tokens = num_tokens_from_message(system_message,self.chatgpt_model) 
+                system_message_tokens = num_tokens_from_message(system_message,"gpt-35-turbo")  #TODO
             if self.config["log_tokens"]:
                 self.repo.addInfo(Requestinfo( 
-                    tokencount = num_tokens_from_messages(history,self.chatgpt_model) + system_message_tokens,
+                    tokencount = num_tokens_from_messages(history,"gpt-35-turbo") + system_message_tokens, #TODO richtiges Modell und tokenizer auswählen
                     department = department,
                     messagecount=  len(history),
                     method = "Chat"))
             
-            info = ChunkInfo(requesttokens=num_tokens_from_message(history[-1]["user"],self.chatgpt_model), streamedtokens=num_tokens_from_message(result,self.chatgpt_model))
+            info = ChunkInfo(requesttokens=num_tokens_from_message(history[-1]["user"],"gpt-35-turbo"), streamedtokens=num_tokens_from_message(result,"gpt-35-turbo")) #TODO
             yield Chunk(type="I", message=info, order=position)
     
     def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str]) -> ChatResult:
@@ -128,7 +124,6 @@ def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens:
         """
         config: LlmConfigs = {
             "llm_max_tokens": max_tokens,
-            "llm_api_key": self.model_info["openai_api_key"],
             "llm_temperature": temperature,
             "llm_streaming": False,
         }
diff --git a/app/backend/core/llmhelper.py b/app/backend/core/llmhelper.py
index 14378179..16e1b4d8 100644
--- a/app/backend/core/llmhelper.py
+++ b/app/backend/core/llmhelper.py
@@ -1,18 +1,17 @@
 from langchain_community.llms.fake import FakeListLLM
 from langchain_core.runnables import ConfigurableField
 from langchain_core.runnables.base import RunnableSerializable
-from langchain_openai import AzureChatOpenAI
+from langchain_openai import AzureChatOpenAI, ChatOpenAI
+from typing import List
+from core.types.Config import ModelsConfig
 
-from core.types.SupportedModels import SupportedModels
+class ModelsConfigurationException(Exception):
+    pass
 
 
-def getModel(chatgpt_model: str,
+def getModel(models: List[ModelsConfig], 
              max_tokens: int,
              n: int,
-             api_key: str,
-             api_base: str,
-             api_version: str,
-             api_type: str,
              temperature: float,
              streaming: bool) -> RunnableSerializable:
         """returns a configured llm, which can be later be parametrized during runtime
@@ -20,43 +19,61 @@ def getModel(chatgpt_model: str,
         Returns:
                 RunnableSerializable: the configured llm
         """
-        llm = AzureChatOpenAI(
-                model=chatgpt_model,
-                max_tokens=max_tokens,
-                n=n,
-                deployment_name= "chat",
-                openai_api_key=api_key,
-                azure_endpoint=api_base,
-                openai_api_version=api_version,
-                openai_api_type=api_type,
-                temperature=temperature,
-                streaming=streaming,
-                ).configurable_fields(
-                temperature=ConfigurableField(
-                        id="llm_temperature",
-                        name="LLM Temperature",
-                        description="The temperature of the LLM",
-                ),
-                max_tokens= ConfigurableField(
-                        id="llm_max_tokens",
-                        name="LLM max Tokens",
-                        description="The token Limit of the LLM",
-                ),
-                openai_api_key = ConfigurableField(
-                        id="llm_api_key",
-                        name="The api key",
-                        description="The api key"),
-                streaming = ConfigurableField(
-                        id="llm_streaming",
-                        name="Streaming",
-                        description="Should the LLM Stream"),
-                callbacks = ConfigurableField(
-                        id="llm_callbacks",
-                        name="Callbacks",
-                        description="Callbacks for the llm")
-                        
-                ).configurable_alternatives(
-                ConfigurableField(id="llm"),
-                default_key=SupportedModels.AZURE_CHATGPT.value,
-                fake= FakeListLLM(responses=["Hi diggi"]))
+        if len(models) == 0:
+                raise ModelsConfigurationException("No models found in the configuration.json")
+        default_model = models[0]
+        if default_model["type"] == "AZURE":
+                llm = AzureChatOpenAI(
+                        model=default_model["model_name"],
+                        deployment_name= default_model["deployment"],
+                        openai_api_key=default_model["api_key"],
+                        azure_endpoint=default_model["endpoint"],
+                        openai_api_version=default_model["api_version"],
+                        max_tokens=max_tokens,
+                        n=n,
+                        streaming=streaming,
+                        temperature=temperature,
+                        openai_api_type="azure",
+                        )
+        elif default_model["type"] == "OPENAI":
+               llm = ChatOpenAI(
+                        model=default_model["model_name"],
+                        api_key=default_model["api_key"],
+                        base_url=default_model["endpoint"],
+                        max_tokens=max_tokens,
+                        n=n,
+                        streaming=streaming,
+                        temperature=temperature,
+            )
+        else:
+                raise ModelsConfigurationException(f"Unknown model type: {default_model['type']}. Currently only `AZURE` and `OPENAI` are supported.")
+
+        llm = llm.configurable_fields(
+                        temperature=ConfigurableField(
+                                id="llm_temperature",
+                                name="LLM Temperature",
+                                description="The temperature of the LLM",
+                        ),
+                        max_tokens= ConfigurableField(
+                                id="llm_max_tokens",
+                                name="LLM max Tokens",
+                                description="The token Limit of the LLM",
+                        ),
+                        openai_api_key = ConfigurableField(
+                                id="llm_api_key",
+                                name="The api key",
+                                description="The api key"),
+                        streaming = ConfigurableField(
+                                id="llm_streaming",
+                                name="Streaming",
+                                description="Should the LLM Stream"),
+                        callbacks = ConfigurableField(
+                                id="llm_callbacks",
+                                name="Callbacks",
+                                description="Callbacks for the llm")
+                                
+                        ).configurable_alternatives(
+                                ConfigurableField(id="llm"),
+                                default_key=models[0]["model_name"],
+                                fake= FakeListLLM(responses=["Hi diggi"]))
         return llm
diff --git a/app/backend/core/types/AppConfig.py b/app/backend/core/types/AppConfig.py
index 56c4090d..88520801 100644
--- a/app/backend/core/types/AppConfig.py
+++ b/app/backend/core/types/AppConfig.py
@@ -1,12 +1,8 @@
 from typing import TypedDict
-
-from azure.identity.aio import DefaultAzureCredential
-
 from brainstorm.brainstorm import Brainstorm
 from chat.chat import Chat
 from core.authentification import AuthentificationHelper
 from core.datahelper import Repository
-from core.types.AzureChatGPTConfig import AzureChatGPTConfig
 from core.types.Config import BackendConfig, Config
 from summarize.summarize import Summarize
 
@@ -14,8 +10,6 @@
 class AppConfig(TypedDict):
     """Config for the app, contains all clients and informations, that are needed
     """
-    model_info: AzureChatGPTConfig
-    azure_credential: DefaultAzureCredential
     chat_approaches: Chat
     sum_approaches: Summarize
     brainstorm_approaches: Brainstorm
diff --git a/app/backend/core/types/AzureChatGPTConfig.py b/app/backend/core/types/AzureChatGPTConfig.py
deleted file mode 100644
index ef378289..00000000
--- a/app/backend/core/types/AzureChatGPTConfig.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from typing import TypedDict
-
-from azure.core.credentials import AccessToken
-
-
-class AzureChatGPTConfig(TypedDict):
-    """Contains all information, that describes an AzureOpenAI endpoint
-    """
-    model: str
-    openai_token: AccessToken
-    openai_api_key: str
-    openai_api_base: str
-    openai_api_version: str
-    openai_api_type: str
\ No newline at end of file
diff --git a/app/backend/core/types/Config.py b/app/backend/core/types/Config.py
index 85d71320..828a9e2f 100644
--- a/app/backend/core/types/Config.py
+++ b/app/backend/core/types/Config.py
@@ -4,11 +4,32 @@
 class ApproachConfig(TypedDict):
     log_tokens: bool
 
+class ModelsConfig(TypedDict):
+    type: str
+    model_name: str
+    deployment: str
+    endpoint: str
+    api_key: str
+    api_version: str
+    max_tokens: int
+
+class SSOConfig(TypedDict):
+    sso_issuer: str
+    role: str
+class DatabaseConfig(TypedDict):
+    db_host: str
+    db_name: str
+    db_user: str
+    db_passwort: str
 class BackendConfig(TypedDict):
+    enable_auth: bool
     enable_database: bool
+    sso_config: SSOConfig
+    db_config: DatabaseConfig
     chat: ApproachConfig
     brainstorm: ApproachConfig
     sum: ApproachConfig
+    models: ModelsConfig
 
 class LabelsConfig(TypedDict):
     env_name: str
diff --git a/app/backend/core/types/SupportedModels.py b/app/backend/core/types/SupportedModels.py
deleted file mode 100644
index 932dc4e9..00000000
--- a/app/backend/core/types/SupportedModels.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from enum import Enum, unique
-
-
-@unique
-class SupportedModels(Enum):
-    AZURE_CHATGPT = "AZURE_CHATGPT"
-    FAKE = "FAKE"
\ No newline at end of file
diff --git a/app/backend/gunicorn.conf.py b/app/backend/gunicorn.conf.py
index b1aded06..86a6a912 100644
--- a/app/backend/gunicorn.conf.py
+++ b/app/backend/gunicorn.conf.py
@@ -3,7 +3,7 @@
 max_requests = 1000
 max_requests_jitter = 50
 log_file = "-"
-bind = "0.0.0.0"
+bind = "0.0.0.0:8000"
 
 timeout = 230
 # https://learn.microsoft.com/en-us/troubleshoot/azure/app-service/web-apps-performance-faqs#why-does-my-request-time-out-after-230-seconds
diff --git a/app/backend/init_app.py b/app/backend/init_app.py
index ca439183..dee97eb2 100644
--- a/app/backend/init_app.py
+++ b/app/backend/init_app.py
@@ -1,8 +1,5 @@
 import os
 from typing import Tuple
-
-from azure.identity.aio import DefaultAzureCredential
-
 from brainstorm.brainstorm import Brainstorm
 from chat.chat import Chat
 from core.authentification import AuthentificationHelper
@@ -10,54 +7,16 @@
 from core.datahelper import Base, Repository
 from core.llmhelper import getModel
 from core.types.AppConfig import AppConfig
-from core.types.AzureChatGPTConfig import AzureChatGPTConfig
-from core.types.Config import BackendConfig
+from core.types.Config import BackendConfig, DatabaseConfig
 from summarize.summarize import Summarize
 
 
-def read_env():
-    """reads configured values from env
-    """
-    AZURE_OPENAI_SERVICE = os.environ["AZURE_OPENAI_SERVICE"]
-    #AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.environ["AZURE_OPENAI_CHATGPT_DEPLOYMENT"]
-    AZURE_OPENAI_CHATGPT_MODEL = os.environ["AZURE_OPENAI_CHATGPT_MODEL"]
-    SSO_ISSUER = os.environ["SSO_ISSUER"]
-    CONFIG_NAME = os.environ["CONFIG_NAME"]
-    DB_HOST = os.environ["DB_HOST"]
-    DB_NAME = os.environ["DB_NAME"]
-    DB_USER = os.environ["DB_USER"]
-    DB_PASSWORD = os.environ["DB_PASSWORD"]
-    return AZURE_OPENAI_SERVICE,AZURE_OPENAI_CHATGPT_MODEL,SSO_ISSUER,CONFIG_NAME,DB_HOST,DB_NAME,DB_USER,DB_PASSWORD
-
-
-async def get_openai_params(AZURE_OPENAI_SERVICE: str):
-    """get current openai access token
-
-    Args:
-        AZURE_OPENAI_SERVICE (str): the current openaiservice
-    """
-    # Use the current user identity to authenticate with Azure OpenAI (no secrets needed,
-    # just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the
-    # keys for each service
-    # If you encounter a blocking error during a DefaultAzureCredential resolution, you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True)
-    azure_credential = DefaultAzureCredential(exclude_shared_token_cache_credential = True)
-
-    # Used by the OpenAI SDK
-    openai_api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com"
-    openai_api_version = "2023-05-15"
-    openai_api_type = "azure_ad"
-    openai_token = await azure_credential.get_token(
-        "https://cognitiveservices.azure.com/.default"
-    )
-    openai_api_key = openai_token.token
-    return azure_credential,openai_api_base,openai_api_version,openai_api_type,openai_token,openai_api_key
 
 
-def initApproaches(model_info: AzureChatGPTConfig, cfg: BackendConfig, repoHelper: Repository) -> Tuple[Chat, Brainstorm, Summarize]:
+def initApproaches(cfg: BackendConfig, repoHelper: Repository) -> Tuple[Chat, Brainstorm, Summarize]:
     """init different approaches
 
     Args:
-        model_info (AzureChatGPTConfig): defines access key for the current model, gets renewed over time
         cfg (BackendConfig): the config for the backend
         repoHelper (Repository): the repository to save request statistics
 
@@ -65,38 +24,26 @@ def initApproaches(model_info: AzureChatGPTConfig, cfg: BackendConfig, repoHelpe
         Tuple[Chat, Brainstorm, Summarize]: the implementation behind chat, brainstorm and summarize
     """
     brainstormllm = getModel(
-                    chatgpt_model=  model_info["model"],
+                    models=cfg["models"],
                     max_tokens =  4000,
                     n = 1,
-                    api_key =  model_info["openai_api_key"],
-                    api_base =  model_info["openai_api_base"],
-                    api_version =   model_info["openai_api_version"],
-                    api_type =  model_info["openai_api_type"],
                     streaming=False,
                     temperature=0.9)
     sumllm = getModel(
-                    chatgpt_model=  model_info["model"],
+                    models=cfg["models"],
                     max_tokens =  1000,
                     n = 1,
-                    api_key =  model_info["openai_api_key"],
-                    api_base =   model_info["openai_api_base"],
-                    api_version =   model_info["openai_api_version"],
-                    api_type =  model_info["openai_api_type"],
                     streaming=False,
-                    temperature=0.7)
+                    temperature=0.2)
     chatlllm = getModel(
-                    chatgpt_model=  model_info["model"],
+                    models=cfg["models"],
                     max_tokens=4000,
                     n = 1,
-                    api_key =  model_info["openai_api_key"],
-                    api_base =  model_info["openai_api_base"],
-                    api_version =   model_info["openai_api_version"],
-                    api_type = model_info["openai_api_type"],
                     streaming=True,
                     temperature=0.7)
-    chat_approaches = Chat(llm=chatlllm, config=cfg["chat"], model_info=model_info, repo=repoHelper, chatgpt_model=model_info["model"])
-    brainstorm_approaches = Brainstorm(llm=brainstormllm, model_info=model_info, config=cfg["brainstorm"], repo=repoHelper)
-    sum_approaches =  Summarize(llm=sumllm, config=cfg["sum"],model_info=model_info, repo=repoHelper)
+    chat_approaches = Chat(llm=chatlllm, config=cfg["chat"], repo=repoHelper)
+    brainstorm_approaches = Brainstorm(llm=brainstormllm,  config=cfg["brainstorm"], repo=repoHelper)
+    sum_approaches =  Summarize(llm=sumllm, config=cfg["sum"], repo=repoHelper)
     return (chat_approaches, brainstorm_approaches, sum_approaches)
 
 async def initApp() -> AppConfig:
@@ -105,43 +52,34 @@ async def initApp() -> AppConfig:
     Returns:
         AppConfig: contains the configuration for the webservice
     """
-    # Replace these with your own values, either in environment variables or directly here
-    AZURE_OPENAI_SERVICE, AZURE_OPENAI_CHATGPT_MODEL, SSO_ISSUER, CONFIG_NAME, DB_HOST, DB_NAME, DB_USER, DB_PASSWORD = read_env()
-
-    azure_credential, openai_api_base, openai_api_version, openai_api_type, openai_token, openai_api_key = await get_openai_params(AZURE_OPENAI_SERVICE)
+    
+    # read enviornment config
+    config_helper = ConfigHelper(base_path=os.path.dirname(os.path.realpath(__file__))+"/", env="config", base_config_name="base")
+    cfg = config_helper.loadData()
      # Set up authentication helper
     auth_helper = AuthentificationHelper(
-        issuer=SSO_ISSUER,
-        role="lhm-ab-mucgpt-user"
+        issuer=cfg["backend"]["sso_config"]["sso_issuer"],
+        role=cfg["backend"]["sso_config"]["role"]
     )  
     # set up repositorty
-    repoHelper = Repository(
-        username=DB_USER,
-        host=DB_HOST,
-        database=DB_NAME,
-        password=DB_PASSWORD
-    )
-    # read enviornment config
-    config_helper = ConfigHelper(base_path=os.path.dirname(os.path.realpath(__file__))+"/ressources/", env=CONFIG_NAME, base_config_name="base")
-    cfg = config_helper.loadData()
-    
-    model_info = AzureChatGPTConfig(
-            model=AZURE_OPENAI_CHATGPT_MODEL,
-            openai_token = openai_token,
-            openai_api_key =  openai_api_key,
-            openai_api_base =  openai_api_base,
-            openai_api_version =  openai_api_version,
-            openai_api_type = openai_api_type
-    )
+    if(cfg["backend"]["enable_database"]):
+        db_config: DatabaseConfig = cfg["backend"]["db_config"]
+        repoHelper = Repository(
+            username=db_config["db_user"],
+            host=db_config["db_host"],
+            database=db_config["db_name"],
+            password=db_config["db_passwort"]
+        )
+        repoHelper.setup_schema(base=Base)
+    else:
+        repoHelper = None
 
-    (chat_approaches, brainstorm_approaches, sum_approaches) = initApproaches(model_info=model_info, cfg=cfg["backend"], repoHelper=repoHelper)
+    (chat_approaches, brainstorm_approaches, sum_approaches) = initApproaches(cfg=cfg["backend"], repoHelper=repoHelper)
 
-    if cfg["backend"]["enable_database"]:
-        repoHelper.setup_schema(base=Base)
+
+        
 
     return  AppConfig(
-        model_info=model_info,
-        azure_credential=azure_credential,
         authentification_client=auth_helper,
         configuration_features=cfg,
         chat_approaches= chat_approaches,
diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt
index cc01f61f..29086bf0 100644
--- a/app/backend/requirements.txt
+++ b/app/backend/requirements.txt
@@ -1,4 +1,3 @@
-azure-identity==1.17.1
 quart==0.19.6
 langchain==0.2.6
 langchain_openai
@@ -16,4 +15,5 @@ requests
 sqlalchemy==2.0.31
 psycopg2==2.9.9
 pypdf2==3.0.1
-tenacity==8.4.2
\ No newline at end of file
+tenacity==8.4.2
+gunicorn
\ No newline at end of file
diff --git a/app/backend/ressources/test.json b/app/backend/ressources/test.json
deleted file mode 100644
index ae81a7ab..00000000
--- a/app/backend/ressources/test.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-    "frontend": {
-        "labels": {
-            "env_name": "MUCGPT-Test"
-        },
-        "alternative_logo": false 
-    },
-    "backend": {
-        "enable_auth": false,
-        "enable_database": false,
-        "chat":{
-            "log_tokens": false 
-        },
-        "brainstorm": {
-            "log_tokens": false
-        },
-        "sum": {
-            "log_tokens": false
-        }
-    }
-}
\ No newline at end of file
diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py
index 55ee35d7..8f210b89 100644
--- a/app/backend/summarize/summarize.py
+++ b/app/backend/summarize/summarize.py
@@ -10,7 +10,6 @@
 
 from core.datahelper import Repository, Requestinfo
 from core.textsplit import splitPDF, splitText
-from core.types.AzureChatGPTConfig import AzureChatGPTConfig
 from core.types.Config import ApproachConfig
 from core.types.LlmConfigs import LlmConfigs
 from summarize.summarizeresult import SummarizeResult
@@ -84,10 +83,9 @@ class Summarize:
 
 
 
-    def __init__(self, llm: RunnableSerializable, config: ApproachConfig, model_info: AzureChatGPTConfig, repo: Repository, short_split = 2100, medium_split = 1500, long_split = 700, use_last_n_summaries = -2):
+    def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repository, short_split = 2100, medium_split = 1500, long_split = 700, use_last_n_summaries = -2):
         self.llm = llm
         self.config = config
-        self.model_info = model_info
         self.repo = repo
         self.switcher =  {
             "short": short_split,
@@ -109,7 +107,6 @@ def getTranslationCleanupPrompt(self) -> PromptTemplate:
 
     def setup(self) -> SequentialChain:
         config: LlmConfigs = {
-            "llm_api_key": self.model_info["openai_api_key"]
         }
         llm = self.llm.with_config(configurable=config)
         # setup model
diff --git a/app/backend/text.py b/app/backend/text.py
deleted file mode 100644
index 9209778c..00000000
--- a/app/backend/text.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def nonewlines(s: str) -> str:
-    return s.replace('\n', ' ').replace('\r', ' ')
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 00000000..dba02738
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,13 @@
+  version: "3.7"    
+  services:
+    mucgpt:
+      image: mucgpt:latest
+      container_name: MUCGPT
+      ports:
+        - "8000:8000"
+      volumes:
+        - "./config/local.json:/code/config.json"
+        - "./config/base.json:/code/base.json"
+      environment:
+          https_proxy: "http://internet-proxy-client.muenchen.de:80" 
+          http_proxy: "http://internet-proxy-client.muenchen.de:80" 
\ No newline at end of file

From 98103fefd74b2937ea6323063d14c43fe0811fba Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 14 Aug 2024 14:00:35 +0200
Subject: [PATCH 02/34] =?UTF-8?q?First=20working=20terraform=20version=20?=
 =?UTF-8?q?=F0=9F=8E=89?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile         | 14 ++++++--
 infra/main.tf      | 78 ++++++++++++++++++++++++++++++++++++++++++++
 infra/provider.tf  | 30 +++++++++++++++++
 infra/variables.tf | 81 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 200 insertions(+), 3 deletions(-)
 create mode 100644 infra/main.tf
 create mode 100644 infra/provider.tf
 create mode 100644 infra/variables.tf

diff --git a/Dockerfile b/Dockerfile
index 4b7176f4..1d5c9ed0 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,16 +1,24 @@
 # syntax=docker/dockerfile:1
+
 FROM node:19-alpine AS builder
 
+ENV GENERATE_SOURCEMAP=false
+ENV NODE_OPTIONS=--max_old_space_size=4096
 WORKDIR /build
 COPY app/ .
 WORKDIR /build/frontend
 RUN npm install
 RUN npm run build
 
-FROM python:3.11
+FROM python:3.12
 WORKDIR /code
 COPY --from=builder /build/backend .
+
+ARG fromconfig="./config/.local.json"
+COPY $fromconfig /code/config.json
+COPY "./config/base.json"  /code/base.json
+
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
-EXPOSE 8000
 
-CMD ["python","-m","gunicorn","main:app"]
\ No newline at end of file
+EXPOSE 8000
+CMD ["gunicorn","main:app"]
\ No newline at end of file
diff --git a/infra/main.tf b/infra/main.tf
new file mode 100644
index 00000000..9cee9302
--- /dev/null
+++ b/infra/main.tf
@@ -0,0 +1,78 @@
+
+resource "random_id" "server" {
+  byte_length = 16
+}
+
+
+
+resource "azurerm_container_registry" "acr" {
+  name                = var.container_reg_name == "" ? "containerReg${random_id.server.hex}" :  var.container_reg_name 
+  resource_group_name = var.rg_name
+  location            = var.location
+  sku                 = "Standard"
+  admin_enabled       = true
+}
+
+resource "azurerm_service_plan" "asp" {
+  name                = var.service_plan_name == "" ? "${var.prefix}_serviceplan_${random_id.server.hex}" :  var.service_plan_name 
+  location            = var.location
+  resource_group_name = var.rg_name
+  os_type             = "Linux"
+  sku_name            = var.service_plan_sku
+}
+
+resource "azurerm_linux_web_app" "webapp" {
+  name                = var.backend_name == "" ?  "${var.prefix}-backend-${random_id.server.hex}" : var.backend_name
+  location            = var.location
+  resource_group_name = var.rg_name
+  service_plan_id     = azurerm_service_plan.asp.id
+
+  app_settings = {
+    WEBSITES_ENABLE_APP_SERVICE_STORAGE = "false"
+    LHMSSO_PROVIDER_AUTHENTICATION_SECRET = var.sso_secret
+    WEBSITES_PORT = 8000
+    WEBSITES_ENABLE_APP_SERVICE_STORAGE = false
+    DOCKER_ENABLE_CI = "true"
+  }
+
+  site_config {
+    always_on = "true"
+    application_stack {
+      docker_image_name   = var.image_name
+      docker_registry_url = "https://${azurerm_container_registry.acr.login_server}" 
+      docker_registry_username = var.registry_username
+      docker_registry_password = var.registry_password
+    }
+    health_check_path = "/health"
+  }
+  auth_settings_v2 {
+    auth_enabled = true
+    require_authentication = true
+    unauthenticated_action = "RedirectToLoginPage"
+    default_provider = "LHMSSO"
+    excluded_paths=["/health"]
+    custom_oidc_v2 {
+      name = "LHMSSO"
+      client_id = "mucgpt"
+      openid_configuration_endpoint = var.sso_configuration_endpoint
+      scopes = ["openid"]
+    }
+    login {
+
+    }
+  }
+
+
+  logs{
+    application_logs{
+      file_system_level = "Verbose"
+
+    }
+    http_logs{
+      file_system{
+        retention_in_mb =  30
+        retention_in_days = 7
+      }
+    }
+  }
+}
diff --git a/infra/provider.tf b/infra/provider.tf
new file mode 100644
index 00000000..584cdf1e
--- /dev/null
+++ b/infra/provider.tf
@@ -0,0 +1,30 @@
+# Configure desired versions of terraform, azurerm provider
+terraform {
+  required_version = ">= 1.1.7, < 2.0.0"
+  required_providers {
+    azurerm = {
+      version = "~>3.97.1"
+      source  = "hashicorp/azurerm"
+    }
+    azurecaf = {
+      source  = "aztfmod/azurecaf"
+      version = "~>1.2.24"
+    }
+  }
+}
+
+# Enable features for azurerm
+provider "azurerm" {
+  skip_provider_registration = "true"
+  features {
+    key_vault {
+      purge_soft_delete_on_destroy = false
+    }
+    resource_group {
+      prevent_deletion_if_contains_resources = false
+    }
+  }
+}
+
+# Access client_id, tenant_id, subscription_id and object_id configuration values
+data "azurerm_client_config" "current" {}
diff --git a/infra/variables.tf b/infra/variables.tf
new file mode 100644
index 00000000..0366dbf4
--- /dev/null
+++ b/infra/variables.tf
@@ -0,0 +1,81 @@
+variable "location" {
+  description = "(Required) The Azure location where the resource should be deployed"
+  type        = string
+  default     = "westeurope"
+}
+
+variable "prefix" {
+  type        = string
+  description = "The prefix used for all resources in this example"
+  default = "mucgpt"
+}
+
+
+variable "rg_name" {
+  description = "(Required) The RG_name. Please refer to the naming convention described in confluence."
+  type        = string
+}
+
+variable "container_reg_name" {
+  description = "The Container Registry Name. Please refer to the naming convention described in confluence."
+  type        = string
+  default = ""
+}
+
+variable "service_plan_name" {
+  description = "The Service Plan Name."
+  type        = string
+  default = ""
+}
+
+variable "service_plan_sku" {
+  description = "The Service Plan Name"
+  type        = string
+  default = ""
+}
+
+variable "backend_name" {
+  description = "The Webapp Name for the App Service"
+  type        = string
+  default = ""
+}
+
+variable "registry_username" {
+  description = "The username for the container registry"
+  type        = string
+}
+
+variable "registry_password" {
+  description = "The password for the container registry"
+  type        = string
+}
+
+variable "image_name"{
+  description = "The image name of the image in the appservice"
+  type        = string
+}
+
+variable "sso_secret"{
+  description = "The secret for the sso"
+  type        = string
+}
+
+variable "sso_configuration_endpoint"{
+  description = "The configuration endpoint for the openid-connect endpoint. Ends with .well-known/openid-configuration"
+  type        = string
+}
+
+variable "tags" {
+  description = "(Required) The necessary tags defined in the tagging concept are mandatory."
+  type = object({
+        cce-businesscriticality: string
+        cce-costcenter: string
+        cce-businessunit: string
+        cce-expirydate: string
+        cce-requestnumber: string
+        cce-serviceid: string
+        cce-serviceowner: string
+        cce-shortname: string
+        cce-stage: string
+  })
+}

From dbe1de16ec6c1aacb209fbbdafcb43d24670ff2d Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Tue, 20 Aug 2024 11:50:49 +0200
Subject: [PATCH 03/34] =?UTF-8?q?=F0=9F=9B=A0=20default=20config?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile          |  2 +-
 config/base.json    |  3 +++
 config/default.json | 50 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 config/base.json
 create mode 100644 config/default.json

diff --git a/Dockerfile b/Dockerfile
index 1d5c9ed0..7cfedd23 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -14,7 +14,7 @@ FROM python:3.12
 WORKDIR /code
 COPY --from=builder /build/backend .
 
-ARG fromconfig="./config/.local.json"
+ARG fromconfig="./config/default.json"
 COPY $fromconfig /code/config.json
 COPY "./config/base.json"  /code/base.json
 
diff --git a/config/base.json b/config/base.json
new file mode 100644
index 00000000..cb7ad94b
--- /dev/null
+++ b/config/base.json
@@ -0,0 +1,3 @@
+{
+    "version": "1.1.1"
+}
\ No newline at end of file
diff --git a/config/default.json b/config/default.json
new file mode 100644
index 00000000..e1167fc7
--- /dev/null
+++ b/config/default.json
@@ -0,0 +1,50 @@
+{
+    "frontend": {
+        "labels": {
+            "env_name": "MUCGPT"
+        },
+        "alternative_logo": false
+    },
+    "backend": {
+        "enable_auth": false,
+        "enable_database": false,
+        "sso_config": {
+            "sso_issuer": "TODO",
+            "role": "lhm-ab-mucgpt-user"
+        },
+        "db_config": {
+            "db_host": "TODO",
+            "db_name": "postgres",
+            "db_user": "TODO",
+            "db_passwort": "TODO"
+        },
+        "chat": {
+            "log_tokens": false
+        },
+        "brainstorm": {
+            "log_tokens": false
+        },
+        "sum": {
+            "log_tokens": false
+        },
+
+        "models": [
+            {
+                "type": "OPENAI",
+                "model_name": "TODO",
+                "endpoint": "TODO",
+                "api_key": "TODO",
+                "max_tokens": 128000
+            },
+            {
+                "type": "AZURE",
+                "model_name": "TODO",
+                "deployment": "TODO",
+                "endpoint": "TODO",
+                "api_key": "TODO",
+                "api_version": "TODO",
+                "max_tokens": 0
+            }
+        ]
+    }
+}
\ No newline at end of file

From 5773c80ef8c3e877c4a09f67e2611b5bbfb4add2 Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Tue, 20 Aug 2024 11:55:14 +0200
Subject: [PATCH 04/34] :sparkles: Frontend selector of LLM

---
 .../LLMSelector/LLMContextProvider.tsx        | 20 ++++++++
 .../components/LLMSelector/LLMSelector.tsx    | 48 +++++++++++++++++++
 .../SettingsDrawer/SettingsDrawer.tsx         | 12 ++++-
 app/frontend/src/i18n.ts                      |  9 ++--
 app/frontend/src/pages/layout/Layout.tsx      | 14 +++++-
 .../src/pages/layout/LayoutHelper.tsx         |  1 +
 6 files changed, 98 insertions(+), 6 deletions(-)
 create mode 100644 app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
 create mode 100644 app/frontend/src/components/LLMSelector/LLMSelector.tsx

diff --git a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
new file mode 100644
index 00000000..72d59a71
--- /dev/null
+++ b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
@@ -0,0 +1,20 @@
+// Context.js
+import React, { Dispatch, SetStateAction, useState } from "react";
+
+interface ILLMProvider {
+    LLM: string;
+    setLLM: Dispatch<SetStateAction<string>>;
+}
+
+export const DEFAULTLLM = "GPT-4o-mini";
+export const LLMContext = React.createContext<ILLMProvider>({ LLM: DEFAULTLLM, setLLM: () => { } });
+
+export const LLMContextProvider = (props: React.PropsWithChildren<{}>) => {
+    const [LLM, setLLM] = useState<string>(DEFAULTLLM);
+
+    return (
+        <LLMContext.Provider value={{ LLM, setLLM }}>
+            {props.children}
+        </LLMContext.Provider>
+    );
+};
\ No newline at end of file
diff --git a/app/frontend/src/components/LLMSelector/LLMSelector.tsx b/app/frontend/src/components/LLMSelector/LLMSelector.tsx
new file mode 100644
index 00000000..e78fa87f
--- /dev/null
+++ b/app/frontend/src/components/LLMSelector/LLMSelector.tsx
@@ -0,0 +1,48 @@
+import {
+    Dropdown,
+    makeStyles,
+    Option,
+} from "@fluentui/react-components";
+import { SelectionEvents, OptionOnSelectData } from "@fluentui/react-combobox";
+
+const useStyles = makeStyles({
+    root: {
+        // Stack the label above the field with a gap
+
+    },
+    option: {
+    }
+});
+
+interface Props {
+    onSelectionChange: (e: SelectionEvents, selection: OptionOnSelectData) => void;
+    defaultLLM: string;
+}
+
+
+export const LLMSelector = ({ onSelectionChange, defaultLLM }: Props) => {
+    const styles = useStyles();
+    return (
+        <div className={styles.root}>
+            <Dropdown
+                aria-label="Sprachmodell auswählen"
+                defaultValue={defaultLLM}
+                onOptionSelect={onSelectionChange}
+                appearance="underline"
+                size="small" positioning="below-start">
+                <Option text="GPT-4o-mini" className={styles.option}>
+                    GPT-4o-mini
+                </Option>
+                <Option text="LLama" className={styles.option}>
+                    LLama
+                </Option>
+                <Option text="Mistral" className={styles.option}>
+                    Mistral
+                </Option>
+                <Option text="GPT-4o" className={styles.option}>
+                    GPT-4o
+                </Option>
+            </Dropdown>
+        </div >
+    );
+};
\ No newline at end of file
diff --git a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx
index 99642be9..b9b013ca 100644
--- a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx
+++ b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx
@@ -17,6 +17,7 @@ import styles from "./SettingsDrawer.module.css";
 import { ChangeEvent, useCallback, useState } from "react";
 import { SelectionEvents, OptionOnSelectData } from "@fluentui/react-combobox";
 import { LanguageSelector } from "../../components/LanguageSelector";
+import { LLMSelector } from "../LLMSelector/LLMSelector";
 import { useTranslation } from 'react-i18next';
 import cheetsheet from "../../assets/mucgpt_cheatsheet.pdf";
 interface Props {
@@ -27,9 +28,11 @@ interface Props {
     setFontscale: (fontscale: number) => void;
     isLight: boolean;
     setTheme: (isLight: boolean) => void;
+    onLLMSelectionChanged: (e: SelectionEvents, selection: OptionOnSelectData) => void;
+    defaultLLM: string;
 }
 
-export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, version, fontscale, setFontscale, isLight, setTheme }: Props) => {
+export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, version, fontscale, setFontscale, isLight, setTheme, onLLMSelectionChanged, defaultLLM }: Props) => {
     const [isOpen, setIsOpen] = useState<boolean>(false);
     const { t, i18n } = useTranslation();
 
@@ -73,7 +76,12 @@ export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, versio
                 </div>
                 <div className={styles.header} role="heading" aria-level={3}>
                     {t('components.settingsdrawer.fontsize')}
-
+                </div>
+                <div className={styles.header} role="heading" aria-level={3}>
+                    {t('components.settingsdrawer.llm')}
+                </div>
+                <div className={styles.bodyContainer}>
+                    <LLMSelector defaultLLM={defaultLLM} onSelectionChange={onLLMSelectionChanged}></LLMSelector>
                 </div>
                 <div className={styles.bodyContainer}>
                     <div className={styles.verticalContainer}>
diff --git a/app/frontend/src/i18n.ts b/app/frontend/src/i18n.ts
index 20bc5f83..0a579ce7 100644
--- a/app/frontend/src/i18n.ts
+++ b/app/frontend/src/i18n.ts
@@ -106,7 +106,8 @@ i18n
                             theme: "Design",
                             change_theme: "Design wechseln",
                             theme_light: "Hell",
-                            theme_dark: "Dunkel"
+                            theme_dark: "Dunkel",
+                            llm: "Sprachmodell"
                         },
                         questioninput: {
                             tokensused: "Token verbraucht",
@@ -261,7 +262,8 @@ i18n
                             theme: "Theme",
                             change_theme: "Switch theme",
                             theme_light: "Light",
-                            theme_dark: "Dark"
+                            theme_dark: "Dark",
+                            llm: "language model"
                         },
                         questioninput: {
                             tokensused: "Token used",
@@ -416,7 +418,8 @@ i18n
                             theme: "Design",
                             change_theme: "Design wechseln",
                             theme_light: "Hell",
-                            theme_dark: "Dunkel"
+                            theme_dark: "Dunkel",
+                            llm: "Sprachmodell"
                         },
                         questioninput: {
                             tokensused: "Token vabrocht",
diff --git a/app/frontend/src/pages/layout/Layout.tsx b/app/frontend/src/pages/layout/Layout.tsx
index 8465e2e7..b1baaf4a 100644
--- a/app/frontend/src/pages/layout/Layout.tsx
+++ b/app/frontend/src/pages/layout/Layout.tsx
@@ -12,6 +12,7 @@ import { ApplicationConfig, configApi } from "../../api";
 import { SettingsDrawer } from "../../components/SettingsDrawer";
 import { FluentProvider, Theme } from '@fluentui/react-components';
 import { useStyles, STORAGE_KEYS, adjustTheme } from "./LayoutHelper";
+import { DEFAULTLLM, LLMContext } from "../../components/LLMSelector/LLMContextProvider";
 
 const formatDate = (date: Date) => {
     let formatted_date =
@@ -26,9 +27,11 @@ export const Layout = () => {
     const navigate = useNavigate()
     const termsofuseread = localStorage.getItem(STORAGE_KEYS.TERMS_OF_USE_READ) === formatDate(new Date());
     const language_pref = (localStorage.getItem(STORAGE_KEYS.SETTINGS_LANGUAGE)) || DEFAULTLANG;
+    const llm_pref = (localStorage.getItem(STORAGE_KEYS.SETTINGS_LLM)) || DEFAULTLLM;
     const font_scaling_pref = Number(localStorage.getItem(STORAGE_KEYS.SETTINGS_FONT_SCALING)) || 1;
     const ligth_theme_pref = localStorage.getItem(STORAGE_KEYS.SETTINGS_IS_LIGHT_THEME) === null ? true : localStorage.getItem(STORAGE_KEYS.SETTINGS_IS_LIGHT_THEME) == 'true';
     const { language, setLanguage } = useContext(LanguageContext);
+    const { LLM, setLLM } = useContext(LLMContext);
     const { t, i18n } = useTranslation();
     const [config, setConfig] = useState<ApplicationConfig>({
         backend: {
@@ -82,6 +85,12 @@ export const Layout = () => {
         setLanguage(lang);
         localStorage.setItem(STORAGE_KEYS.SETTINGS_LANGUAGE, lang);
     };
+    const onLLMSelectionChanged = (e: SelectionEvents, selection: OptionOnSelectData) => {
+        let llm = selection.optionValue || DEFAULTLLM;
+        setLLM(llm);
+        localStorage.setItem(STORAGE_KEYS.SETTINGS_LLM, llm);
+    };
+
     return (
 
         <FluentProvider theme={theme}>
@@ -122,7 +131,10 @@ export const Layout = () => {
                             fontscale={fontscaling}
                             setFontscale={onFontscaleChange}
                             isLight={isLight}
-                            setTheme={onThemeChange}></SettingsDrawer>
+                            setTheme={onThemeChange}
+                            defaultLLM={llm_pref}
+                            onLLMSelectionChanged={onLLMSelectionChanged}
+                        ></SettingsDrawer>
                     </div>
                 </header>
                 <Outlet />
diff --git a/app/frontend/src/pages/layout/LayoutHelper.tsx b/app/frontend/src/pages/layout/LayoutHelper.tsx
index 5733cc87..b9ca17e6 100644
--- a/app/frontend/src/pages/layout/LayoutHelper.tsx
+++ b/app/frontend/src/pages/layout/LayoutHelper.tsx
@@ -25,6 +25,7 @@ export const useStyles = makeStyles({
 export const enum STORAGE_KEYS {
     TERMS_OF_USE_READ = 'TERMS_OF_USE_READ',
     SETTINGS_LANGUAGE = 'SETTINGS_LANGUAGE',
+    SETTINGS_LLM = 'SETTINGS_LLM',
     SETTINGS_FONT_SCALING = 'SETTINGS_FONT_SCALING',
     SETTINGS_IS_LIGHT_THEME = 'SETTINGS_IS_LIGHT_THEME',
     VERSION_UPDATE_SEEN = 'VERSION_UPDATE_SEEN'

From ba1d2402ab19db936e458a6716a04908a6453edd Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 21 Aug 2024 10:54:28 +0200
Subject: [PATCH 05/34] remove azure dependencies

---
 app/backend/app.py           | 8 +-------
 app/backend/requirements.txt | 1 -
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/app/backend/app.py b/app/backend/app.py
index 7a2b5449..dd570c5d 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -2,9 +2,6 @@
 import logging
 import os
 from typing import cast
-
-from azure.monitor.opentelemetry import configure_azure_monitor
-from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor
 from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware
 from quart import (
     Blueprint,
@@ -210,12 +207,9 @@ async def setup_clients():
     current_app.config[APPCONFIG_KEY] = await initApp()
 
 def create_app():
-    if os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING"):
-        configure_azure_monitor()
-        AioHttpClientInstrumentor().instrument()
     app = Quart(__name__)
     app.register_blueprint(bp)
-    app.asgi_app = OpenTelemetryMiddleware(app.asgi_app)
+    app.asgi_app = OpenTelemetryMiddleware(app = app.asgi_app)
     # Level should be one of https://docs.python.org/3/library/logging.html#logging-levels
     logging.basicConfig(level=os.getenv("APP_LOG_LEVEL", "ERROR"))
     return app
diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt
index 76ed2fb4..ba31fafa 100644
--- a/app/backend/requirements.txt
+++ b/app/backend/requirements.txt
@@ -5,7 +5,6 @@ langchain_community
 tiktoken
 uvicorn[standard]==0.30.3
 aiohttp==3.10.2
-azure-monitor-opentelemetry==1.6.0
 opentelemetry-instrumentation-asgi==0.46b0
 opentelemetry-instrumentation-requests==0.46b0
 opentelemetry-instrumentation-aiohttp-client==0.46b0

From 5350d9b9f9bb5788cb9f28570d07156d1bb3cdb1 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 21 Aug 2024 10:55:01 +0200
Subject: [PATCH 06/34] remove ressources

---
 app/backend/ressources/base.json  |  3 ---
 app/backend/ressources/demo.json  | 21 ---------------------
 app/backend/ressources/dev.json   | 21 ---------------------
 app/backend/ressources/local.json | 21 ---------------------
 app/backend/ressources/nosec.json | 21 ---------------------
 app/backend/ressources/prod.json  | 21 ---------------------
 6 files changed, 108 deletions(-)
 delete mode 100644 app/backend/ressources/base.json
 delete mode 100644 app/backend/ressources/demo.json
 delete mode 100644 app/backend/ressources/dev.json
 delete mode 100644 app/backend/ressources/local.json
 delete mode 100644 app/backend/ressources/nosec.json
 delete mode 100644 app/backend/ressources/prod.json

diff --git a/app/backend/ressources/base.json b/app/backend/ressources/base.json
deleted file mode 100644
index d1733968..00000000
--- a/app/backend/ressources/base.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "version": "1.1.2"
-}
\ No newline at end of file
diff --git a/app/backend/ressources/demo.json b/app/backend/ressources/demo.json
deleted file mode 100644
index 78cc1081..00000000
--- a/app/backend/ressources/demo.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-    "frontend": {
-        "labels": {
-            "env_name": "PILOT"
-        },
-        "alternative_logo": false 
-    },
-    "backend": {
-        "enable_auth": true,
-        "enable_database": true,
-        "chat":{
-            "log_tokens": true 
-        },
-        "brainstorm": {
-            "log_tokens": true
-        },
-        "sum": {
-            "log_tokens": true
-        }
-    }
-}
\ No newline at end of file
diff --git a/app/backend/ressources/dev.json b/app/backend/ressources/dev.json
deleted file mode 100644
index d9b75028..00000000
--- a/app/backend/ressources/dev.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-    "frontend": {
-        "labels": {
-            "env_name": "MUC tschibidi-C"
-        },
-        "alternative_logo": true 
-    },
-    "backend": {
-        "enable_auth": true,
-        "enable_database": true,
-        "chat":{
-            "log_tokens": true 
-        },
-        "brainstorm": {
-            "log_tokens": true
-        },
-        "sum": {
-            "log_tokens": true
-        }
-    }
-}
\ No newline at end of file
diff --git a/app/backend/ressources/local.json b/app/backend/ressources/local.json
deleted file mode 100644
index 44df80e0..00000000
--- a/app/backend/ressources/local.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-    "frontend": {
-        "labels": {
-            "env_name": "MUC tschibidi-C"
-        },
-        "alternative_logo": true 
-    },
-    "backend": {
-        "enable_auth": false,
-        "enable_database": true,
-        "chat":{
-            "log_tokens": true 
-        },
-        "brainstorm": {
-            "log_tokens": true
-        },
-        "sum": {
-            "log_tokens": true
-        }
-    }
-}
\ No newline at end of file
diff --git a/app/backend/ressources/nosec.json b/app/backend/ressources/nosec.json
deleted file mode 100644
index b6152c26..00000000
--- a/app/backend/ressources/nosec.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-    "frontend": {
-        "labels": {
-            "env_name": "NOSEC"
-        },
-        "alternative_logo": false 
-    },
-    "backend": {
-        "enable_auth": false,
-        "enable_database": false,
-        "chat":{
-            "log_tokens": false 
-        },
-        "brainstorm": {
-            "log_tokens": false
-        },
-        "sum": {
-            "log_tokens": false
-        }
-    }
-}
\ No newline at end of file
diff --git a/app/backend/ressources/prod.json b/app/backend/ressources/prod.json
deleted file mode 100644
index 5861cf2f..00000000
--- a/app/backend/ressources/prod.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-    "frontend": {
-        "labels": {
-            "env_name": "PROD"
-        },
-        "alternative_logo": false 
-    },
-    "backend": {
-        "enable_auth": true,
-        "enable_database": true,
-        "chat":{
-            "log_tokens": true 
-        },
-        "brainstorm": {
-            "log_tokens": true
-        },
-        "sum": {
-            "log_tokens": true
-        }
-    }
-}
\ No newline at end of file

From e69e96ca23b5bce313ed160dbd4fc2435212c16a Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 21 Aug 2024 10:55:48 +0200
Subject: [PATCH 07/34] =?UTF-8?q?=F0=9F=9A=AE=20remove=20bicep?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 infra/abbreviations.json                     | 135 --------------
 infra/core/ai/cognitiveservices.bicep        |  41 ----
 infra/core/db/db.bicep                       |  48 -----
 infra/core/host/appservice.bicep             | 105 -----------
 infra/core/host/appserviceplan.bicep         |  21 ---
 infra/core/host/authsettingsV2.bicep         |  73 --------
 infra/core/monitor/applicationinsights.bicep |  17 --
 infra/core/monitor/monitoring.bicep          |  17 --
 infra/main.bicep                             | 185 -------------------
 infra/main.parameters.json                   |  66 -------
 10 files changed, 708 deletions(-)
 delete mode 100644 infra/abbreviations.json
 delete mode 100644 infra/core/ai/cognitiveservices.bicep
 delete mode 100644 infra/core/db/db.bicep
 delete mode 100644 infra/core/host/appservice.bicep
 delete mode 100644 infra/core/host/appserviceplan.bicep
 delete mode 100644 infra/core/host/authsettingsV2.bicep
 delete mode 100644 infra/core/monitor/applicationinsights.bicep
 delete mode 100644 infra/core/monitor/monitoring.bicep
 delete mode 100644 infra/main.bicep
 delete mode 100644 infra/main.parameters.json

diff --git a/infra/abbreviations.json b/infra/abbreviations.json
deleted file mode 100644
index 703e5038..00000000
--- a/infra/abbreviations.json
+++ /dev/null
@@ -1,135 +0,0 @@
-{
-  "analysisServicesServers": "as",
-  "apiManagementService": "apim-",
-  "appConfigurationConfigurationStores": "appcs-",
-  "appManagedEnvironments": "cae-",
-  "appContainerApps": "ca-",
-  "authorizationPolicyDefinitions": "policy-",
-  "automationAutomationAccounts": "aa-",
-  "blueprintBlueprints": "bp-",
-  "blueprintBlueprintsArtifacts": "bpa-",
-  "cacheRedis": "redis-",
-  "cdnProfiles": "cdnp-",
-  "cdnProfilesEndpoints": "cdne-",
-  "cognitiveServicesAccounts": "cog-",
-  "cognitiveServicesFormRecognizer": "cog-fr-",
-  "cognitiveServicesTextAnalytics": "cog-ta-",
-  "computeAvailabilitySets": "avail-",
-  "computeCloudServices": "cld-",
-  "computeDiskEncryptionSets": "des",
-  "computeDisks": "disk",
-  "computeDisksOs": "osdisk",
-  "computeGalleries": "gal",
-  "computeSnapshots": "snap-",
-  "computeVirtualMachines": "vm",
-  "computeVirtualMachineScaleSets": "vmss-",
-  "containerInstanceContainerGroups": "ci",
-  "containerRegistryRegistries": "cr",
-  "containerServiceManagedClusters": "aks-",
-  "databricksWorkspaces": "dbw-",
-  "dataFactoryFactories": "adf-",
-  "dataLakeAnalyticsAccounts": "dla",
-  "dataLakeStoreAccounts": "dls",
-  "dataMigrationServices": "dms-",
-  "dBforMySQLServers": "mysql-",
-  "dBforPostgreSQLServers": "psql-",
-  "devicesIotHubs": "iot-",
-  "devicesProvisioningServices": "provs-",
-  "devicesProvisioningServicesCertificates": "pcert-",
-  "documentDBDatabaseAccounts": "cosmos-",
-  "eventGridDomains": "evgd-",
-  "eventGridDomainsTopics": "evgt-",
-  "eventGridEventSubscriptions": "evgs-",
-  "eventHubNamespaces": "evhns-",
-  "eventHubNamespacesEventHubs": "evh-",
-  "hdInsightClustersHadoop": "hadoop-",
-  "hdInsightClustersHbase": "hbase-",
-  "hdInsightClustersKafka": "kafka-",
-  "hdInsightClustersMl": "mls-",
-  "hdInsightClustersSpark": "spark-",
-  "hdInsightClustersStorm": "storm-",
-  "hybridComputeMachines": "arcs-",
-  "insightsActionGroups": "ag-",
-  "insightsComponents": "appi-",
-  "keyVaultVaults": "kv-",
-  "kubernetesConnectedClusters": "arck",
-  "kustoClusters": "dec",
-  "kustoClustersDatabases": "dedb",
-  "logicIntegrationAccounts": "ia-",
-  "logicWorkflows": "logic-",
-  "machineLearningServicesWorkspaces": "mlw-",
-  "managedIdentityUserAssignedIdentities": "id-",
-  "managementManagementGroups": "mg-",
-  "migrateAssessmentProjects": "migr-",
-  "networkApplicationGateways": "agw-",
-  "networkApplicationSecurityGroups": "asg-",
-  "networkAzureFirewalls": "afw-",
-  "networkBastionHosts": "bas-",
-  "networkConnections": "con-",
-  "networkDnsZones": "dnsz-",
-  "networkExpressRouteCircuits": "erc-",
-  "networkFirewallPolicies": "afwp-",
-  "networkFirewallPoliciesWebApplication": "waf",
-  "networkFirewallPoliciesRuleGroups": "wafrg",
-  "networkFrontDoors": "fd-",
-  "networkFrontdoorWebApplicationFirewallPolicies": "fdfp-",
-  "networkLoadBalancersExternal": "lbe-",
-  "networkLoadBalancersInternal": "lbi-",
-  "networkLoadBalancersInboundNatRules": "rule-",
-  "networkLocalNetworkGateways": "lgw-",
-  "networkNatGateways": "ng-",
-  "networkNetworkInterfaces": "nic-",
-  "networkNetworkSecurityGroups": "nsg-",
-  "networkNetworkSecurityGroupsSecurityRules": "nsgsr-",
-  "networkNetworkWatchers": "nw-",
-  "networkPrivateDnsZones": "pdnsz-",
-  "networkPrivateLinkServices": "pl-",
-  "networkPublicIPAddresses": "pip-",
-  "networkPublicIPPrefixes": "ippre-",
-  "networkRouteFilters": "rf-",
-  "networkRouteTables": "rt-",
-  "networkRouteTablesRoutes": "udr-",
-  "networkTrafficManagerProfiles": "traf-",
-  "networkVirtualNetworkGateways": "vgw-",
-  "networkVirtualNetworks": "vnet-",
-  "networkVirtualNetworksSubnets": "snet-",
-  "networkVirtualNetworksVirtualNetworkPeerings": "peer-",
-  "networkVirtualWans": "vwan-",
-  "networkVpnGateways": "vpng-",
-  "networkVpnGatewaysVpnConnections": "vcn-",
-  "networkVpnGatewaysVpnSites": "vst-",
-  "notificationHubsNamespaces": "ntfns-",
-  "notificationHubsNamespacesNotificationHubs": "ntf-",
-  "operationalInsightsWorkspaces": "log-",
-  "portalDashboards": "dash-",
-  "powerBIDedicatedCapacities": "pbi-",
-  "purviewAccounts": "pview-",
-  "recoveryServicesVaults": "rsv-",
-  "resourcesResourceGroups": "rg-",
-  "searchSearchServices": "srch-",
-  "serviceBusNamespaces": "sb-",
-  "serviceBusNamespacesQueues": "sbq-",
-  "serviceBusNamespacesTopics": "sbt-",
-  "serviceEndPointPolicies": "se-",
-  "serviceFabricClusters": "sf-",
-  "signalRServiceSignalR": "sigr",
-  "sqlManagedInstances": "sqlmi-",
-  "sqlServers": "sql-",
-  "sqlServersDataWarehouse": "sqldw-",
-  "sqlServersDatabases": "sqldb-",
-  "sqlServersDatabasesStretch": "sqlstrdb-",
-  "storageStorageAccounts": "st",
-  "storageStorageAccountsVm": "stvm",
-  "storSimpleManagers": "ssimp",
-  "streamAnalyticsCluster": "asa-",
-  "synapseWorkspaces": "syn",
-  "synapseWorkspacesAnalyticsWorkspaces": "synw",
-  "synapseWorkspacesSqlPoolsDedicated": "syndp",
-  "synapseWorkspacesSqlPoolsSpark": "synsp",
-  "timeSeriesInsightsEnvironments": "tsi-",
-  "webServerFarms": "plan-",
-  "webSitesAppService": "app-",
-  "webSitesAppServiceEnvironment": "ase-",
-  "webSitesFunctions": "func-",
-  "webStaticSites": "stapp-"
-}
diff --git a/infra/core/ai/cognitiveservices.bicep b/infra/core/ai/cognitiveservices.bicep
deleted file mode 100644
index 8efb10ac..00000000
--- a/infra/core/ai/cognitiveservices.bicep
+++ /dev/null
@@ -1,41 +0,0 @@
-param name string
-param location string = resourceGroup().location
-param tags object = {}
-
-param customSubDomainName string = name
-param deployments array = []
-param kind string = 'OpenAI'
-param publicNetworkAccess string = 'Enabled'
-param sku object = {
-  name: 'S0'
-}
-
-resource account 'Microsoft.CognitiveServices/accounts@2023-05-01' = {
-  name: name
-  location: location
-  tags: tags
-  kind: kind
-  properties: {
-    customSubDomainName: customSubDomainName
-    publicNetworkAccess: publicNetworkAccess
-  }
-  sku: sku
-}
-
-@batchSize(1)
-resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for deployment in deployments: {
-  parent: account
-  name: deployment.name
-  properties: {
-    model: deployment.model
-    raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null
-  }
-  sku: contains(deployment, 'sku') ? deployment.sku : {
-    name: 'Standard'
-    capacity: 20
-  }
-}]
-
-output endpoint string = account.properties.endpoint
-output id string = account.id
-output name string = account.name
diff --git a/infra/core/db/db.bicep b/infra/core/db/db.bicep
deleted file mode 100644
index 31f3feed..00000000
--- a/infra/core/db/db.bicep
+++ /dev/null
@@ -1,48 +0,0 @@
-param location string = resourceGroup().location
-param tags object = {}
-param administratorLogin string
-param administratorLoginPassword string
-param name string
-
-
-resource symbolicname 'Microsoft.DBforPostgreSQL/flexibleServers@2022-12-01' = {
-  name: name
-  location: location
-  tags: tags
-  sku: {
-    name: 'Standard_B1ms'
-    tier: 'Burstable'
-  }
-  properties: {
-    administratorLogin: administratorLogin
-    administratorLoginPassword: administratorLoginPassword
-    authConfig: {
-      activeDirectoryAuth: 'Disabled'
-      passwordAuth: 'Enabled'
-    }
-    availabilityZone: '1'
-    backup: {
-      backupRetentionDays: 7
-      geoRedundantBackup: 'Disabled'
-    }
-    createMode: 'Default'
-    dataEncryption: {
-      type: 'SystemManaged'
-    }
-    highAvailability: {
-      mode: 'Disabled'
-    }
-    maintenanceWindow: {
-      customWindow: 'Disabled'
-      dayOfWeek: 0
-      startHour: 0
-      startMinute: 0
-    }
-    pointInTimeUTC: 'string'
-    replicationRole: 'Primary'
-    storage: {
-      storageSizeGB: 32
-    }
-    version: '15'
-  }
-}
diff --git a/infra/core/host/appservice.bicep b/infra/core/host/appservice.bicep
deleted file mode 100644
index 9ba5ec91..00000000
--- a/infra/core/host/appservice.bicep
+++ /dev/null
@@ -1,105 +0,0 @@
-param name string
-param location string = resourceGroup().location
-param tags object = {}
-param ssoSecret string 
-
-// Reference Properties
-param applicationInsightsName string = ''
-param appServicePlanId string
-param keyVaultName string = ''
-param managedIdentity bool = !empty(keyVaultName)
-
-// Runtime Properties
-@allowed([
-  'dotnet', 'dotnetcore', 'dotnet-isolated', 'node', 'python', 'java', 'powershell', 'custom'
-])
-param runtimeName string
-param runtimeNameAndVersion string = '${runtimeName}|${runtimeVersion}'
-param runtimeVersion string
-
-// Microsoft.Web/sites Properties
-param kind string = 'app,linux'
-
-// Microsoft.Web/sites/config
-param allowedOrigins array = []
-param alwaysOn bool = true
-param appCommandLine string = ''
-param appSettings object = {}
-param clientAffinityEnabled bool = false
-param enableOryxBuild bool = contains(kind, 'linux')
-param functionAppScaleLimit int = -1
-param linuxFxVersion string = runtimeNameAndVersion
-param minimumElasticInstanceCount int = -1
-param numberOfWorkers int = -1
-param scmDoBuildDuringDeployment bool = false
-param use32BitWorkerProcess bool = false
-param ftpsState string = 'FtpsOnly'
-param healthCheckPath string
-
-resource appService 'Microsoft.Web/sites@2022-03-01' = {
-  name: name
-  location: location
-  tags: tags
-  kind: kind
-  properties: {
-    serverFarmId: appServicePlanId
-    siteConfig: {
-      linuxFxVersion: linuxFxVersion
-      alwaysOn: alwaysOn
-      ftpsState: ftpsState
-      appCommandLine: appCommandLine
-      numberOfWorkers: numberOfWorkers != -1 ? numberOfWorkers : null
-      minimumElasticInstanceCount: minimumElasticInstanceCount != -1 ? minimumElasticInstanceCount : null
-      minTlsVersion: '1.2'
-      use32BitWorkerProcess: use32BitWorkerProcess
-      functionAppScaleLimit: functionAppScaleLimit != -1 ? functionAppScaleLimit : null
-      healthCheckPath: healthCheckPath
-      cors: {
-        allowedOrigins: union([ 'https://portal.azure.com', 'https://ms.portal.azure.com' ], allowedOrigins)
-      }
-    }
-    clientAffinityEnabled: clientAffinityEnabled
-    httpsOnly: true
-  }
-
-  identity: { type: managedIdentity ? 'SystemAssigned' : 'None' }
-
-  resource configAppSettings 'config' = {
-    name: 'appsettings'
-    properties: union(appSettings,
-      {
-        SCM_DO_BUILD_DURING_DEPLOYMENT: string(scmDoBuildDuringDeployment)
-        ENABLE_ORYX_BUILD: string(enableOryxBuild)
-        SSO_AUTHENTICATION_SECRET: ssoSecret
-        ORYX_DISABLE_TELEMETRY: 'true'
-      },
-      runtimeName == 'python' ? { PYTHON_ENABLE_GUNICORN_MULTIWORKERS: 'true'} : {},
-      !empty(applicationInsightsName) ? { APPLICATIONINSIGHTS_CONNECTION_STRING: applicationInsights.properties.ConnectionString } : {},
-      !empty(keyVaultName) ? { AZURE_KEY_VAULT_ENDPOINT: keyVault.properties.vaultUri } : {})
-  }
-
-  resource configLogs 'config' = {
-    name: 'logs'
-    properties: {
-      applicationLogs: { fileSystem: { level: 'Verbose' } }
-      detailedErrorMessages: { enabled: true }
-      failedRequestsTracing: { enabled: true }
-      httpLogs: { fileSystem: { enabled: true, retentionInDays: 1, retentionInMb: 35 } }
-    }
-    dependsOn: [
-      configAppSettings
-    ]
-  }
-}
-
-resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = if (!(empty(keyVaultName))) {
-  name: keyVaultName
-}
-
-resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = if (!empty(applicationInsightsName)) {
-  name: applicationInsightsName
-}
-
-output identityPrincipalId string = managedIdentity ? appService.identity.principalId : ''
-output name string = appService.name
-output uri string = 'https://${appService.properties.defaultHostName}'
diff --git a/infra/core/host/appserviceplan.bicep b/infra/core/host/appserviceplan.bicep
deleted file mode 100644
index c444f406..00000000
--- a/infra/core/host/appserviceplan.bicep
+++ /dev/null
@@ -1,21 +0,0 @@
-param name string
-param location string = resourceGroup().location
-param tags object = {}
-
-param kind string = ''
-param reserved bool = true
-param sku object
-
-resource appServicePlan 'Microsoft.Web/serverfarms@2022-03-01' = {
-  name: name
-  location: location
-  tags: tags
-  sku: sku
-  kind: kind
-  properties: {
-    reserved: reserved
-  }
-}
-
-output id string = appServicePlan.id
-output name string = appServicePlan.name
diff --git a/infra/core/host/authsettingsV2.bicep b/infra/core/host/authsettingsV2.bicep
deleted file mode 100644
index d2d7c2f0..00000000
--- a/infra/core/host/authsettingsV2.bicep
+++ /dev/null
@@ -1,73 +0,0 @@
-param siteName string
-param location string = resourceGroup().location
-param ssoConfiguration string
-
-resource authsettingsV 'Microsoft.Web/sites/config@2022-09-01' = {
-  name: '${siteName}/authsettingsV2'
-  location: location
-  properties: {
-    platform: {
-      enabled: true
-      runtimeVersion: '~1'
-    }
-    globalValidation: {
-      requireAuthentication:  true
-      unauthenticatedClientAction: 'RedirectToLoginPage'
-      redirectToProvider: 'LHMSSO'
-      excludedPaths: ['/health' ]
-    }
-    identityProviders: {
-      azureActiveDirectory: {
-        enabled: true
-        login: {
-          disableWWWAuthenticate: false
-        }
-      }
-      customOpenIdConnectProviders: {
-        LHMSSO: {
-          registration: {
-            clientId: 'mucgpt'
-            clientCredential: {
-              clientSecretSettingName: 'SSO_AUTHENTICATION_SECRET'
-            }
-            openIdConnectConfiguration: {
-              wellKnownOpenIdConfiguration: ssoConfiguration
-            }
-          }
-          login: {
-            scopes: [
-              'openid'
-            ]
-          }
-        }
-      }
-    }
-    login: {
-      tokenStore: {
-        enabled: true
-        tokenRefreshExtensionHours: 72
-      }
-      preserveUrlFragmentsForLogins: false
-      allowedExternalRedirectUrls: []
-      cookieExpiration: {
-        convention: 'FixedTime'
-        timeToExpiration: '08:00:00'
-      }
-      nonce: {
-        validateNonce: true
-        nonceExpirationInterval: '00:05:00'
-      }
-    }
-    httpSettings: {
-      requireHttps: true
-      routes: {
-        apiPrefix: '/.auth'
-      }
-      forwardProxy: {
-        convention: 'NoProxy'
-      }
-    }
-  }
-}
-
-
diff --git a/infra/core/monitor/applicationinsights.bicep b/infra/core/monitor/applicationinsights.bicep
deleted file mode 100644
index 0d9bc474..00000000
--- a/infra/core/monitor/applicationinsights.bicep
+++ /dev/null
@@ -1,17 +0,0 @@
-param name string
-param location string = resourceGroup().location
-param tags object = {}
-
-resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = {
-  name: name
-  location: location
-  tags: tags
-  kind: 'web'
-  properties: {
-    Application_Type: 'web'
-  }
-}
-
-output connectionString string = applicationInsights.properties.ConnectionString
-output instrumentationKey string = applicationInsights.properties.InstrumentationKey
-output name string = applicationInsights.name
diff --git a/infra/core/monitor/monitoring.bicep b/infra/core/monitor/monitoring.bicep
deleted file mode 100644
index 0143363f..00000000
--- a/infra/core/monitor/monitoring.bicep
+++ /dev/null
@@ -1,17 +0,0 @@
-param applicationInsightsName string
-param location string = resourceGroup().location
-param tags object = {}
-
-module applicationInsights 'applicationinsights.bicep' = {
-  name: 'applicationinsights'
-  params: {
-    name: applicationInsightsName
-    location: location
-    tags: tags
-  }
-}
-
-output applicationInsightsConnectionString string = applicationInsights.outputs.connectionString
-output applicationInsightsInstrumentationKey string = applicationInsights.outputs.instrumentationKey
-output applicationInsightsName string = applicationInsights.outputs.name
-
diff --git a/infra/main.bicep b/infra/main.bicep
deleted file mode 100644
index a1be9fd0..00000000
--- a/infra/main.bicep
+++ /dev/null
@@ -1,185 +0,0 @@
-targetScope = 'subscription'
-
-@minLength(1)
-@maxLength(64)
-@description('Name of the the environment which is used to generate a short unique hash used in all resources.')
-param environmentName string
-
-@minLength(1)
-@description('Primary location for all resources')
-param location string
-
-param appServicePlanName string = ''
-param backendServiceName string = ''
-param resourceGroupName string = ''
-
-param applicationInsightsName string = ''
-
-@secure()
-param ssoSecret string
-param ssoIssuer string
-param configName string
-param tagStage string
-param dbHost string = ''
-param dbName string = ''
-param dbUser string = ''
-param backendSkuName string
-param backendCapacaty int 
-@secure()
-param dbPassword string = ''
-
-param openAiServiceName string = ''
-param openAiResourceGroupName string = ''
-@description('Location for the OpenAI resource group')
-@allowed(['canadaeast', 'eastus', 'francecentral', 'japaneast', 'northcentralus', 'westeurope'])
-@metadata({
-  azd: {
-    type: 'location'
-  }
-})
-param openAiResourceGroupLocation string
-
-param openAiSkuName string = 'S0'
-
-param chatGptDeploymentName string // Set in main.parameters.json
-param chatGptDeploymentCapacity int = 70
-param chatGptModelName string = 'gpt-35-turbo'
-param chatGptModelVersion string = '0301'
-
-@description('Use Application Insights for monitoring and performance tracing')
-param useApplicationInsights bool = false
-
-var ssoConfiguration = concat(ssoIssuer, '/.well-known/openid-configuration')
-var abbrs = loadJsonContent('abbreviations.json')
-var resourceToken = toLower(uniqueString(subscription().id, environmentName, location))
-var tags = {'azd-env-name': environmentName,'BusinessCriticality': 'low','BusinessUnit': 'ITM-KM-DI-KI','CostCenter': '313-2-014400','ExpiryDate': '31.12.2999','RequestNumber': 'Nicht vorhanden','ServiceName': 'MUCGPT','ServiceOwner': 'Michael Jaumann - ITM-KM-DI-KI','Stage': tagStage}
-
-// Organize resources in a resource group
-resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = {
-  name: !empty(resourceGroupName) ? resourceGroupName : '${abbrs.resourcesResourceGroups}${environmentName}'
-  location: location
-  tags: tags
-}
-
-resource openAiResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(openAiResourceGroupName)) {
-  name: !empty(openAiResourceGroupName) ? openAiResourceGroupName : resourceGroup.name
-}
-
-
-// Monitor application with Azure Monitor
-module monitoring 'core/monitor/monitoring.bicep' = if (useApplicationInsights) {
-  name: 'monitoring'
-  scope: resourceGroup
-  params: {
-    location: location
-    tags: tags
-    applicationInsightsName: !empty(applicationInsightsName) ? applicationInsightsName : '${abbrs.insightsComponents}${resourceToken}'
-  }
-}
-
-// Create an App Service Plan to group applications under the same payment plan and SKU
-module appServicePlan 'core/host/appserviceplan.bicep' = {
-  name: 'appserviceplan'
-  scope: resourceGroup
-  params: {
-    name: !empty(appServicePlanName) ? appServicePlanName : '${abbrs.webServerFarms}${resourceToken}'
-    location: location
-    tags: tags
-    sku: {
-      name:  backendSkuName
-      capacity: backendCapacaty
-    }
-    kind: 'linux'
-  }
-}
-
-// Create an App Service Plan to group applications under the same payment plan and SKU
-module db 'core/db/db.bicep' = {
-  name: 'db'
-  scope: resourceGroup
-  params: {
-    name: dbHost
-    location: location
-    tags: tags
-    administratorLogin:  dbUser
-    administratorLoginPassword: dbPassword
-  }
-}
-
-// The application frontend
-module backend 'core/host/appservice.bicep' = {
-  name: 'web'
-  scope: resourceGroup
-  params: {
-    name: !empty(backendServiceName) ? backendServiceName : '${abbrs.webSitesAppService}backend-${resourceToken}'
-    location: location
-    tags: union(tags, { 'azd-service-name': 'backend' })
-    appServicePlanId: appServicePlan.outputs.id
-    runtimeName: 'python'
-    runtimeVersion: '3.10'
-    appCommandLine: 'python3 -m gunicorn main:app'
-    scmDoBuildDuringDeployment: true
-    managedIdentity: true
-    ssoSecret: ssoSecret
-    healthCheckPath: '/health'
-    appSettings: {
-      AZURE_OPENAI_SERVICE: openAi.outputs.name
-      AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGptDeploymentName
-      AZURE_OPENAI_CHATGPT_MODEL: chatGptModelName
-      APPLICATIONINSIGHTS_CONNECTION_STRING: useApplicationInsights ? monitoring.outputs.applicationInsightsConnectionString : ''
-      SSO_ISSUER: ssoIssuer
-      CONFIG_NAME: configName
-      DB_HOST: concat(dbHost, '.postgres.database.azure.com')
-      DB_NAME: dbName
-      DB_USER: dbUser
-      DB_PASSWORD: dbPassword
-    }
-  }
-}
-
-module openAi 'core/ai/cognitiveservices.bicep' = {
-  name: 'openai'
-  scope: openAiResourceGroup
-  params: {
-    name: !empty(openAiServiceName) ? openAiServiceName : '${abbrs.cognitiveServicesAccounts}${resourceToken}'
-    location: openAiResourceGroupLocation
-    tags: tags
-    sku: {
-      name: openAiSkuName
-    }
-    deployments: [
-      {
-        name: chatGptDeploymentName
-        model: {
-          format: 'OpenAI'
-          name: chatGptModelName
-          version: chatGptModelVersion
-        }
-        sku: {
-          name: 'Standard'
-          capacity: chatGptDeploymentCapacity
-        }
-      }
-    ]
-  }
-}
-
-module authsettingsV2 'core/host/authsettingsV2.bicep' = {
-  name: 'authsettingsV2'
-  scope: resourceGroup
-  params: {
-    location: location
-    siteName: backend.outputs.name
-    ssoConfiguration: ssoConfiguration
-  }
-}
-
-output AZURE_LOCATION string = location
-output AZURE_TENANT_ID string = tenant().tenantId
-output AZURE_RESOURCE_GROUP string = resourceGroup.name
-
-output AZURE_OPENAI_SERVICE string = openAi.outputs.name
-output AZURE_OPENAI_RESOURCE_GROUP string = openAiResourceGroup.name
-output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = chatGptDeploymentName
-output AZURE_OPENAI_CHATGPT_MODEL string = chatGptModelName
-output BACKEND_URI string = backend.outputs.uri
diff --git a/infra/main.parameters.json b/infra/main.parameters.json
deleted file mode 100644
index 9b2d3d26..00000000
--- a/infra/main.parameters.json
+++ /dev/null
@@ -1,66 +0,0 @@
-{
-  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
-  "contentVersion": "1.0.0.0",
-  "parameters": {
-    "environmentName": {
-      "value": "${AZURE_ENV_NAME}"
-    },
-    "resourceGroupName": {
-      "value": "${AZURE_RESOURCE_GROUP}"
-    },
-    "location": {
-      "value": "${AZURE_LOCATION}"
-    },
-    "principalId": {
-      "value": "${AZURE_PRINCIPAL_ID}"
-    },
-    "openAiServiceName": {
-      "value": "${AZURE_OPENAI_SERVICE}"
-    },
-    "openAiResourceGroupName": {
-      "value": "${AZURE_OPENAI_RESOURCE_GROUP}"
-    },
-    "openAiSkuName": {
-      "value": "S0"
-    },
-    "chatGptDeploymentName": {
-      "value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT=chat}"
-    },
-    "chatGptDeploymentCapacity": {
-      "value":"${AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY}"
-    }, 
-    "useApplicationInsights": {
-      "value": "${AZURE_USE_APPLICATION_INSIGHTS=false}"
-    },
-    "ssoSecret": {
-      "value": "${SSO_AUTHENTICATION_SECRET}"
-    },
-    "ssoIssuer": {
-      "value": "${SSO_ISSUER}"
-    },
-    "backendSkuName": {
-      "value": "${BACKEND_SKU}"
-    },
-    "backendCapacaty": {
-      "value": "${BACKEND_CAPACITY}"
-    },
-    "configName": {
-      "value": "${CONFIG_NAME}"
-    },
-    "tagStage": {
-      "value": "${TAG_STAGE}"
-    },
-    "dbHost": {
-      "value": "${DB_HOST}"
-    },
-    "dbName": {
-      "value": "${DB_NAME}"
-    },
-    "dbUser": {
-      "value": "${DB_USER}"
-    },
-    "dbPassword": {
-      "value": "${DB_PASSWORD}"
-    }
-  }
-}

From 5915e7b39e0e170d17d32a8dadbec112cfdffcf0 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 21 Aug 2024 19:48:10 +0200
Subject: [PATCH 08/34] remove azure stuff and terraform files

---
 app/start.ps1      | 75 ------------------------------------------
 app/start.sh       | 66 -------------------------------------
 azure.yaml         | 22 -------------
 infra/main.tf      | 78 --------------------------------------------
 infra/provider.tf  | 30 -----------------
 infra/variables.tf | 81 ----------------------------------------------
 6 files changed, 352 deletions(-)
 delete mode 100644 app/start.ps1
 delete mode 100755 app/start.sh
 delete mode 100644 azure.yaml
 delete mode 100644 infra/main.tf
 delete mode 100644 infra/provider.tf
 delete mode 100644 infra/variables.tf

diff --git a/app/start.ps1 b/app/start.ps1
deleted file mode 100644
index 8a582eb2..00000000
--- a/app/start.ps1
+++ /dev/null
@@ -1,75 +0,0 @@
-Write-Host ""
-Write-Host "Loading azd .env file from current environment"
-Write-Host ""
-
-foreach ($line in (& azd env get-values)) {
-    if ($line -match "([^=]+)=(.*)") {
-        $key = $matches[1]
-        $value = $matches[2] -replace '^"|"$'
-        Set-Item -Path "env:\$key" -Value $value
-    }
-}
-
-if ($LASTEXITCODE -ne 0) {
-    Write-Host "Failed to load environment variables from azd environment"
-    exit $LASTEXITCODE
-}
-
-
-Write-Host 'Creating python virtual environment "backend/backend_env"'
-$pythonCmd = Get-Command python -ErrorAction SilentlyContinue
-if (-not $pythonCmd) {
-  # fallback to python3 if python not found
-  $pythonCmd = Get-Command python3 -ErrorAction SilentlyContinue
-}
-Start-Process -FilePath ($pythonCmd).Source -ArgumentList "-m venv ./backend/backend_env" -Wait -NoNewWindow
-
-Write-Host ""
-Write-Host "Restoring backend python packages"
-Write-Host ""
-
-Set-Location backend
-$venvPythonPath = "./backend_env/scripts/python.exe"
-if (Test-Path -Path "/usr") {
-  # fallback to Linux venv path
-  $venvPythonPath = "./backend_env/bin/python"
-}
-
-Start-Process -FilePath $venvPythonPath -ArgumentList "-m pip install -r requirements.txt" -Wait -NoNewWindow
-if ($LASTEXITCODE -ne 0) {
-    Write-Host "Failed to restore backend python packages"
-    exit $LASTEXITCODE
-}
-
-Write-Host ""
-Write-Host "Restoring frontend npm packages"
-Write-Host ""
-Set-Location ../frontend
-npm install
-if ($LASTEXITCODE -ne 0) {
-    Write-Host "Failed to restore frontend npm packages"
-    exit $LASTEXITCODE
-}
-
-Write-Host ""
-Write-Host "Building frontend"
-Write-Host ""
-npm run build
-if ($LASTEXITCODE -ne 0) {
-    Write-Host "Failed to build frontend"
-    exit $LASTEXITCODE
-}
-
-Write-Host ""
-Write-Host "Starting backend"
-Write-Host ""
-Set-Location ../backend
-
-$port = 50505
-$hostname = "localhost"
-Start-Process -FilePath $venvPythonPath -ArgumentList "-m quart --app main:app run --port $port --host $hostname --reload" -Wait -NoNewWindow
-
-if ($LASTEXITCODE -ne 0) {
-    Write-Host "Failed to start backend"
-    exit $LASTEXITCODE
-}
diff --git a/app/start.sh b/app/start.sh
deleted file mode 100755
index a62bb0f3..00000000
--- a/app/start.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/bin/sh
-
-echo ""
-echo "Loading azd .env file from current environment"
-echo ""
-
-while IFS='=' read -r key value; do
-    value=$(echo "$value" | sed 's/^"//' | sed 's/"$//')
-    export "$key=$value"
-done <<EOF
-$(azd env get-values)
-EOF
-
-if [ $? -ne 0 ]; then
-    echo "Failed to load environment variables from azd environment"
-    exit $?
-fi
-
-echo 'Creating python virtual environment "backend/backend_env"'
-python3 -m venv backend/backend_env
-
-echo ""
-echo "Restoring backend python packages"
-echo ""
-
-cd backend
-./backend_env/bin/python -m pip install -r requirements.txt
-if [ $? -ne 0 ]; then
-    echo "Failed to restore backend python packages"
-    exit $?
-fi
-
-echo ""
-echo "Restoring frontend npm packages"
-echo ""
-
-cd ../frontend
-npm install
-if [ $? -ne 0 ]; then
-    echo "Failed to restore frontend npm packages"
-    exit $?
-fi
-
-echo ""
-echo "Building frontend"
-echo ""
-
-npm run build
-if [ $? -ne 0 ]; then
-    echo "Failed to build frontend"
-    exit $?
-fi
-
-echo ""
-echo "Starting backend"
-echo ""
-
-cd ../backend
-
-port=50505
-host=localhost
-./backend_env/bin/python -m quart --app main:app run --port "$port" --host "$host" --reload
-if [ $? -ne 0 ]; then
-    echo "Failed to start backend"
-    exit $?
-fi
diff --git a/azure.yaml b/azure.yaml
deleted file mode 100644
index adc21734..00000000
--- a/azure.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json
-
-name: mucgpt
-metadata:
-  template: azure-search-openai-demo@0.0.2-beta
-services:
-  backend:
-    project: ./app/backend
-    language: py
-    host: appservice
-    hooks:
-      prepackage:
-        windows:
-          shell: pwsh
-          run:  cd ../frontend;npm install;npm run build
-          interactive: true
-          continueOnError: false
-        posix:
-          shell: sh
-          run:  cd ../frontend;npm install;npm run build
-          interactive: true
-          continueOnError: false
diff --git a/infra/main.tf b/infra/main.tf
deleted file mode 100644
index 9cee9302..00000000
--- a/infra/main.tf
+++ /dev/null
@@ -1,78 +0,0 @@
-
-resource "random_id" "server" {
-  byte_length = 16
-}
-
-
-
-resource "azurerm_container_registry" "acr" {
-  name                = var.container_reg_name == "" ? "containerReg${random_id.server.hex}" :  var.container_reg_name 
-  resource_group_name = var.rg_name
-  location            = var.location
-  sku                 = "Standard"
-  admin_enabled       = true
-}
-
-resource "azurerm_service_plan" "asp" {
-  name                = var.service_plan_name == "" ? "${var.prefix}_serviceplan_${random_id.server.hex}" :  var.service_plan_name 
-  location            = var.location
-  resource_group_name = var.rg_name
-  os_type             = "Linux"
-  sku_name            = var.service_plan_sku
-}
-
-resource "azurerm_linux_web_app" "webapp" {
-  name                = var.backend_name == "" ?  "${var.prefix}-backend-${random_id.server.hex}" : var.backend_name
-  location            = var.location
-  resource_group_name = var.rg_name
-  service_plan_id     = azurerm_service_plan.asp.id
-
-  app_settings = {
-    WEBSITES_ENABLE_APP_SERVICE_STORAGE = "false"
-    LHMSSO_PROVIDER_AUTHENTICATION_SECRET = var.sso_secret
-    WEBSITES_PORT = 8000
-    WEBSITES_ENABLE_APP_SERVICE_STORAGE = false
-    DOCKER_ENABLE_CI = "true"
-  }
-
-  site_config {
-    always_on = "true"
-    application_stack {
-      docker_image_name   = var.image_name
-      docker_registry_url = "https://${azurerm_container_registry.acr.login_server}" 
-      docker_registry_username = var.registry_username
-      docker_registry_password = var.registry_password
-    }
-    health_check_path = "/health"
-  }
-  auth_settings_v2 {
-    auth_enabled = true
-    require_authentication = true
-    unauthenticated_action = "RedirectToLoginPage"
-    default_provider = "LHMSSO"
-    excluded_paths=["/health"]
-    custom_oidc_v2 {
-      name = "LHMSSO"
-      client_id = "mucgpt"
-      openid_configuration_endpoint = var.sso_configuration_endpoint
-      scopes = ["openid"]
-    }
-    login {
-
-    }
-  }
-
-
-  logs{
-    application_logs{
-      file_system_level = "Verbose"
-
-    }
-    http_logs{
-      file_system{
-        retention_in_mb =  30
-        retention_in_days = 7
-      }
-    }
-  }
-}
diff --git a/infra/provider.tf b/infra/provider.tf
deleted file mode 100644
index 584cdf1e..00000000
--- a/infra/provider.tf
+++ /dev/null
@@ -1,30 +0,0 @@
-# Configure desired versions of terraform, azurerm provider
-terraform {
-  required_version = ">= 1.1.7, < 2.0.0"
-  required_providers {
-    azurerm = {
-      version = "~>3.97.1"
-      source  = "hashicorp/azurerm"
-    }
-    azurecaf = {
-      source  = "aztfmod/azurecaf"
-      version = "~>1.2.24"
-    }
-  }
-}
-
-# Enable features for azurerm
-provider "azurerm" {
-  skip_provider_registration = "true"
-  features {
-    key_vault {
-      purge_soft_delete_on_destroy = false
-    }
-    resource_group {
-      prevent_deletion_if_contains_resources = false
-    }
-  }
-}
-
-# Access client_id, tenant_id, subscription_id and object_id configuration values
-data "azurerm_client_config" "current" {}
diff --git a/infra/variables.tf b/infra/variables.tf
deleted file mode 100644
index 0366dbf4..00000000
--- a/infra/variables.tf
+++ /dev/null
@@ -1,81 +0,0 @@
-variable "location" {
-  description = "(Required) The Azure location where the resource should be deployed"
-  type        = string
-  default     = "westeurope"
-}
-
-variable "prefix" {
-  type        = string
-  description = "The prefix used for all resources in this example"
-  default = "mucgpt"
-}
-
-
-variable "rg_name" {
-  description = "(Required) The RG_name. Please refer to the naming convention described in confluence."
-  type        = string
-}
-
-variable "container_reg_name" {
-  description = "The Container Registry Name. Please refer to the naming convention described in confluence."
-  type        = string
-  default = ""
-}
-
-variable "service_plan_name" {
-  description = "The Service Plan Name."
-  type        = string
-  default = ""
-}
-
-variable "service_plan_sku" {
-  description = "The Service Plan Name"
-  type        = string
-  default = ""
-}
-
-variable "backend_name" {
-  description = "The Webapp Name for the App Service"
-  type        = string
-  default = ""
-}
-
-variable "registry_username" {
-  description = "The username for the container registry"
-  type        = string
-}
-
-variable "registry_password" {
-  description = "The password for the container registry"
-  type        = string
-}
-
-variable "image_name"{
-  description = "The image name of the image in the appservice"
-  type        = string
-}
-
-variable "sso_secret"{
-  description = "The secret for the sso"
-  type        = string
-}
-
-variable "sso_configuration_endpoint"{
-  description = "The configuration endpoint for the openid-connect endpoint. Ends with .well-known/openid-configuration"
-  type        = string
-}
-
-variable "tags" {
-  description = "(Required) The necessary tags defined in the tagging concept are mandatory."
-  type = object({
-        cce-businesscriticality: string
-        cce-costcenter: string
-        cce-businessunit: string
-        cce-expirydate: string
-        cce-requestnumber: string
-        cce-serviceid: string
-        cce-serviceowner: string
-        cce-shortname: string
-        cce-stage: string
-  })
-}

From f8df7d653bcf2cc1fbf98c52c80bdebba1ddaa24 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 21 Aug 2024 19:48:38 +0200
Subject: [PATCH 09/34] run without docker

---
 README.md                        | 69 ++++++++++----------------------
 app/backend/core/confighelper.py | 11 +++--
 app/backend/init_app.py          |  4 +-
 app/backend/requirements.txt     |  6 +--
 4 files changed, 32 insertions(+), 58 deletions(-)

diff --git a/README.md b/README.md
index 6ddedc16..0a9a91e5 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,6 @@ The documentation project is built with technologies we use in our projects (see
 ### Backend:
 * [Python 3.9, 3.10 or 3.11](https://www.python.org/downloads/)
 * [Quart](https://pgjones.gitlab.io/quart/)
-* [Azure OpenAI](https://azure.microsoft.com/de-de/products/ai-services/openai-service)
 * [LangChain](https://www.langchain.com/)
 
 ### Frontend:
@@ -43,10 +42,10 @@ The documentation project is built with technologies we use in our projects (see
 * [Javascript](https://wiki.selfhtml.org/wiki/JavaScript)
 
 ### Deployment:
-  * [Azure Developer CLI](https://learn.microsoft.com/en-us/azure/developer/azure-developer-cli/install-azd?tabs=winget-windows%2Cbrew-mac%2Cscript-linux&pivots=os-windows)
   * [Node.js 14+](https://nodejs.org/en/download/package-manager)
   * [Git](https://git-scm.com/downloads)
-  * [Powershell 7+ (pwsh)](https://github.com/powershell/powershell)
+  * Python 12
+  * Docker
 
 ## Table of contents
 * [Built With](#built-with)
@@ -64,52 +63,26 @@ The documentation project is built with technologies we use in our projects (see
 
 See the [open issues](https://github.com/it-at-m/mucgpt/issues) for a full list of proposed features (and known issues).
 
+## Run
+ Configure your environment in [config/default.json](config/default.json).  Insert Model Endpoint and API Key for your connection to an OpenAI completion endpoint or an Azure OpenAI completions endpoint.
+### Run locally
+```
+cd app\backend
+pip install --no-cache-dir --upgrade -r requirements.txt   
+cd ..\frontend
+npm run build
+cd ..\backend
+$env:MUCGPT_CONFIG="path to default.json"
+$env:MUCGPT_BASE_CONFIG="path to base.json"
+python -m  quart --app main:app run
+```
+
+
+### Run with docker
+1. Build an Image
+   ``` docker build --tag mucgpt-local . --build-arg   fromconfig="./config/default.json"```
+2. Run the image ```docker run --detach --publish 8080:8000  mucgpt-local```
 
-## Set up on Azure
-As this project bases on a template of Microsoft Azure see also [here](https://github.com/Azure-Samples/azure-search-openai-demo?tab=readme-ov-file#azure-deployment) for the deployment documentation.
-### You need the following requirements to set up MUCGPT on Azure:
-* Azure account
-* Azure subscription with access enabled for the Azure OpenAI service
-* Account Permissions:
-  * `Microsoft.Authorization/roleAssignments/write`
-  * Role Based Access Control Administrator, User Access Administrator, or Owner
-  * subscription-level permissions
-  * `Microsoft.Resources/deployments/write` on the subscription level 
-
-
-### Cost estimation: 
-Pricing varies per region and usage, so it isn't possible to predict exact costs for your usage. However, you can try the [Azure pricing calculator](https://azure.microsoft.com/en-us/pricing/calculator/) for the resources below.
-* Azure App Service
-* Azure OpenAI
-* Flexibler Azure Database for PostgreSQL-Server
-* App Service-Plan
-
-### Deploying
-1. Install the [required tools](#built-with)
-2. Clone the repository with the command `git clone https://github.com/it-at-m/mucgpt` and switch in your terminal to the folder
-3. Login to your Azure account: `azd auth login`
-4. Create a new azd environemnt with `azd env new`. Enter a name that will be used for the resource group. This will create a new folder in the `.azure` folder, and set it as the active environment for any calls to `azd` going forward.
-5. (Optional) This is the point where you can customize the deployment by setting environment variables, in order to use existing resources, enable optional features (such as auth or vision), or deploy to free tiers.
-6. Run `azd up` - This will provision Azure resources and deploy this sample to those resources.
-7. After the application has been successfully deployed you will see a URL printed to the console. Click that URL to interact with the application in your browser. It will look like the following:  
-![](/docs/endpoint.png)
-    > **_NOTE:_**  It may take 5-10 minutes after you see 'SUCCESS' for the application to be fully deployed. If you see a "Python Developer" welcome screen or an error page, then wait a bit and refresh the page.
-
-### Deploying again
-If you've only changed the backend/frontend code in the `app` folder, then you don't need to re-provision the Azure resources. You can just run:
-
-`azd deploy`
-
-If you've changed the infrastructure files (`infra` folder or `azure.yaml`), then you'll need to re-provision the Azure resources. You can do that by running:
-
-`azd up`
-
-### Running locally
-You can only run locally after having successfully run the `azd up` command. If you haven't yet, follow the steps in [Deploying](#deploying) above.
-
-1. Run `azd auth login`
-2. Change dir to app
-3. Run `./start.ps1` or `./start.sh` to start the app
 
 ## Documentation
 ![Architecture](docs/appcomponents_en.png)  
diff --git a/app/backend/core/confighelper.py b/app/backend/core/confighelper.py
index 80213cc5..b8f3b6f9 100644
--- a/app/backend/core/confighelper.py
+++ b/app/backend/core/confighelper.py
@@ -6,14 +6,13 @@
 class ConfigHelper:
     """Loads an available configuration.
     """
-    def __init__(self, base_path: str, env: str, base_config_name: str = "base"):
-        self.base_path = base_path
-        self.base_config_name = base_config_name
-        self.env = env
+    def __init__(self, env_config: str, base_config: str = "base.json"):
+        self.base_config = base_config
+        self.env_config = env_config
     def loadData(self) -> Config:
-        with open(self.base_path + self.env + ".json") as f:
+        with open(self.env_config) as f:
             env_config = json.load(f)
-        with open(self.base_path + self.base_config_name + ".json") as f:
+        with open(self.base_config) as f:
             base_config = json.load(f)
         result_dict =  dict(env_config,**base_config)
         return to_typed_config(result_dict)
\ No newline at end of file
diff --git a/app/backend/init_app.py b/app/backend/init_app.py
index dee97eb2..d99f30e1 100644
--- a/app/backend/init_app.py
+++ b/app/backend/init_app.py
@@ -54,7 +54,9 @@ async def initApp() -> AppConfig:
     """
     
     # read enviornment config
-    config_helper = ConfigHelper(base_path=os.path.dirname(os.path.realpath(__file__))+"/", env="config", base_config_name="base")
+    env_config = os.environ['MUCGPT_CONFIG'] if "MUCGPT_CONFIG" in os.environ else os.path.dirname(os.path.realpath(__file__))+"/config.json"
+    base_config = os.environ['MUCGPT_BASE_CONFIG'] if "MUCGPT_BASE_CONFIG" in os.environ is not None else os.path.dirname(os.path.realpath(__file__))+"/base.json"
+    config_helper = ConfigHelper(env_config=env_config, base_config=base_config)
     cfg = config_helper.loadData()
      # Set up authentication helper
     auth_helper = AuthentificationHelper(
diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt
index ba31fafa..f3459d70 100644
--- a/app/backend/requirements.txt
+++ b/app/backend/requirements.txt
@@ -5,9 +5,9 @@ langchain_community
 tiktoken
 uvicorn[standard]==0.30.3
 aiohttp==3.10.2
-opentelemetry-instrumentation-asgi==0.46b0
-opentelemetry-instrumentation-requests==0.46b0
-opentelemetry-instrumentation-aiohttp-client==0.46b0
+opentelemetry-instrumentation-asgi==0.47b0
+opentelemetry-instrumentation-requests
+opentelemetry-instrumentation-aiohttp-client
 werkzeug==3.0.3
 joserfc
 requests

From 7d3a2177f874ea423cfa5832a4ff4e1cb8ba156a Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 21 Aug 2024 21:57:07 +0200
Subject: [PATCH 10/34] =?UTF-8?q?=F0=9F=94=AE=20select=20model=20from=20co?=
 =?UTF-8?q?nfig=20in=20backend?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/backend/app.py                            | 14 ++++++-
 app/backend/core/types/AppConfig.py           |  4 +-
 app/backend/core/types/Config.py              |  4 ++
 app/frontend/src/api/models.ts                | 11 +++---
 .../ChatsettingsDrawer/ChatsettingsDrawer.tsx | 13 +++----
 .../LLMSelector/LLMContextProvider.tsx        |  9 +++--
 .../components/LLMSelector/LLMSelector.tsx    | 22 +++++------
 .../QuestionInput/QuestionInput.tsx           |  5 ++-
 .../SettingsDrawer/SettingsDrawer.tsx         | 18 ++++-----
 .../src/components/SumInput/SumInput.tsx      |  8 ++--
 app/frontend/src/index.tsx                    |  5 ++-
 app/frontend/src/pages/chat/Chat.tsx          |  1 +
 app/frontend/src/pages/layout/Layout.tsx      | 38 +++++++++++++------
 13 files changed, 94 insertions(+), 58 deletions(-)

diff --git a/app/backend/app.py b/app/backend/app.py
index dd570c5d..7ef9b1cf 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -1,7 +1,7 @@
 import json
 import logging
 import os
-from typing import cast
+from typing import List, cast
 from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware
 from quart import (
     Blueprint,
@@ -15,6 +15,7 @@
     send_from_directory,
 )
 
+from core.types.Config import ModelsConfig, ModelsDTO
 from core.authentification import AuthentificationHelper, AuthError
 from core.helper import format_as_ndjson
 from core.modelhelper import num_tokens_from_message
@@ -139,7 +140,16 @@ async def chat():
 @bp.route("/config", methods=["GET"])
 async def getConfig():
     cfg = get_config_and_authentificate()
-    return jsonify(cfg["configuration_features"])
+    frontend_features = cfg["configuration_features"]["frontend"]
+    models= cast(List[ModelsConfig], cfg["configuration_features"]["backend"]["models"])
+    models_dto_list = []
+    for model in models:
+        dto = ModelsDTO(model_name=model["model_name"], max_tokens=model["max_tokens"])
+        models_dto_list.append(dto)
+    return jsonify({
+        "frontend": frontend_features,
+        "models": models_dto_list
+    })
 
 @bp.route("/statistics", methods=["GET"])
 async def getStatistics():
diff --git a/app/backend/core/types/AppConfig.py b/app/backend/core/types/AppConfig.py
index 88520801..44deae5f 100644
--- a/app/backend/core/types/AppConfig.py
+++ b/app/backend/core/types/AppConfig.py
@@ -1,4 +1,4 @@
-from typing import TypedDict
+from typing import List, TypedDict
 from brainstorm.brainstorm import Brainstorm
 from chat.chat import Chat
 from core.authentification import AuthentificationHelper
@@ -14,6 +14,6 @@ class AppConfig(TypedDict):
     sum_approaches: Summarize
     brainstorm_approaches: Brainstorm
     authentification_client: AuthentificationHelper
-    configuration_features: Config
+    configuration_features: List[Config]
     repository: Repository
     backend_config: BackendConfig
\ No newline at end of file
diff --git a/app/backend/core/types/Config.py b/app/backend/core/types/Config.py
index 828a9e2f..83aeddf0 100644
--- a/app/backend/core/types/Config.py
+++ b/app/backend/core/types/Config.py
@@ -13,6 +13,10 @@ class ModelsConfig(TypedDict):
     api_version: str
     max_tokens: int
 
+class ModelsDTO(TypedDict):
+    model_name: str
+    max_tokens: int
+
 class SSOConfig(TypedDict):
     sso_issuer: str
     role: str
diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts
index d439bf90..fb266720 100644
--- a/app/frontend/src/api/models.ts
+++ b/app/frontend/src/api/models.ts
@@ -36,20 +36,21 @@ export type BrainstormRequest = {
 };
 
 export interface ApplicationConfig {
-    backend: Backend;
+    models: Model[];
     frontend: Frontend;
     version: string;
 }
 
-export interface Backend {
-    enable_auth: boolean;
-}
-
 export interface Frontend {
     alternative_logo: boolean;
     labels: Labels;
 }
 
+export interface Model {
+    max_tokens: number;
+    model_name: string;
+}
+
 export interface Labels {
     env_name: string;
 }
diff --git a/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx b/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx
index a7559640..c7041a85 100644
--- a/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx
+++ b/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx
@@ -1,7 +1,5 @@
-import { ChatSettings24Regular, ChatWarning24Regular, CheckboxWarning24Regular, Delete24Regular, Dismiss24Regular } from "@fluentui/react-icons";
+import { ChatSettings24Regular, ChatWarning24Regular, Dismiss24Regular } from "@fluentui/react-icons";
 import {
-    DrawerHeader,
-    DrawerHeaderTitle,
     OverlayDrawer,
     Button,
     Slider,
@@ -12,13 +10,13 @@ import {
     InfoLabel,
     Tooltip,
     Textarea,
-    TextareaOnChangeData,
-    Badge
+    TextareaOnChangeData
 } from "@fluentui/react-components";
 
 import styles from "./ChatsettingsDrawer.module.css";
-import { useCallback, useState } from "react";
+import { useCallback, useContext, useState } from "react";
 import { useTranslation } from 'react-i18next';
+import { LLMContext } from "../LLMSelector/LLMContextProvider";
 interface Props {
     temperature: number;
     setTemperature: (temp: number, id: number) => void;
@@ -32,6 +30,7 @@ interface Props {
 export const ChatsettingsDrawer = ({ temperature, setTemperature, max_tokens, setMaxTokens, systemPrompt, setSystemPrompt, current_id }: Props) => {
     const [isOpen, setIsOpen] = useState<boolean>(false);
     const { t, i18n } = useTranslation();
+    const { LLM } = useContext(LLMContext)
     const onClickRightButton = useCallback(() => {
         setIsOpen(true);
     }, [])
@@ -42,7 +41,7 @@ export const ChatsettingsDrawer = ({ temperature, setTemperature, max_tokens, se
     const max_tokensID = useId("input-max_tokens");
 
     const min_max_tokens = 10;
-    const max_max_tokens = 4000;
+    const max_max_tokens = LLM.max_tokens;
     const min_temp = 0;
     const max_temp = 1;
 
diff --git a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
index 72d59a71..c1abb484 100644
--- a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
+++ b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
@@ -1,16 +1,17 @@
 // Context.js
 import React, { Dispatch, SetStateAction, useState } from "react";
+import { Model } from "../../api";
 
 interface ILLMProvider {
-    LLM: string;
-    setLLM: Dispatch<SetStateAction<string>>;
+    LLM: Model;
+    setLLM: Dispatch<SetStateAction<Model>>;
 }
 
 export const DEFAULTLLM = "GPT-4o-mini";
-export const LLMContext = React.createContext<ILLMProvider>({ LLM: DEFAULTLLM, setLLM: () => { } });
+export const LLMContext = React.createContext<ILLMProvider>({ LLM: { model_name: DEFAULTLLM, max_tokens: 0 }, setLLM: () => { } });
 
 export const LLMContextProvider = (props: React.PropsWithChildren<{}>) => {
-    const [LLM, setLLM] = useState<string>(DEFAULTLLM);
+    const [LLM, setLLM] = useState<Model>({ model_name: DEFAULTLLM, max_tokens: 0 });
 
     return (
         <LLMContext.Provider value={{ LLM, setLLM }}>
diff --git a/app/frontend/src/components/LLMSelector/LLMSelector.tsx b/app/frontend/src/components/LLMSelector/LLMSelector.tsx
index e78fa87f..31ba027a 100644
--- a/app/frontend/src/components/LLMSelector/LLMSelector.tsx
+++ b/app/frontend/src/components/LLMSelector/LLMSelector.tsx
@@ -4,6 +4,7 @@ import {
     Option,
 } from "@fluentui/react-components";
 import { SelectionEvents, OptionOnSelectData } from "@fluentui/react-combobox";
+import { Model } from "../../api";
 
 const useStyles = makeStyles({
     root: {
@@ -17,10 +18,11 @@ const useStyles = makeStyles({
 interface Props {
     onSelectionChange: (e: SelectionEvents, selection: OptionOnSelectData) => void;
     defaultLLM: string;
+    options: Model[];
 }
 
 
-export const LLMSelector = ({ onSelectionChange, defaultLLM }: Props) => {
+export const LLMSelector = ({ onSelectionChange, defaultLLM, options }: Props) => {
     const styles = useStyles();
     return (
         <div className={styles.root}>
@@ -30,18 +32,12 @@ export const LLMSelector = ({ onSelectionChange, defaultLLM }: Props) => {
                 onOptionSelect={onSelectionChange}
                 appearance="underline"
                 size="small" positioning="below-start">
-                <Option text="GPT-4o-mini" className={styles.option}>
-                    GPT-4o-mini
-                </Option>
-                <Option text="LLama" className={styles.option}>
-                    LLama
-                </Option>
-                <Option text="Mistral" className={styles.option}>
-                    Mistral
-                </Option>
-                <Option text="GPT-4o" className={styles.option}>
-                    GPT-4o
-                </Option>
+                {options.map((item, index) => (
+                    <Option text={item.model_name} className={styles.option} key={index}>
+                        {item.model_name}
+                    </Option>
+
+                ))}
             </Dropdown>
         </div >
     );
diff --git a/app/frontend/src/components/QuestionInput/QuestionInput.tsx b/app/frontend/src/components/QuestionInput/QuestionInput.tsx
index c2733150..7787220e 100644
--- a/app/frontend/src/components/QuestionInput/QuestionInput.tsx
+++ b/app/frontend/src/components/QuestionInput/QuestionInput.tsx
@@ -4,6 +4,8 @@ import { Send28Filled } from "@fluentui/react-icons";
 
 import styles from "./QuestionInput.module.css";
 import { useTranslation } from 'react-i18next';
+import { useContext } from "react";
+import { LLMContext } from "../LLMSelector/LLMContextProvider";
 
 interface Props {
     onSend: (question: string) => void;
@@ -18,7 +20,8 @@ interface Props {
 
 export const QuestionInput = ({ onSend, disabled, placeholder, clearOnSend, tokens_used, token_limit_tracking = true, question, setQuestion }: Props) => {
     const { t, i18n } = useTranslation();
-    const wordCount = 4000;
+    const { LLM } = useContext(LLMContext)
+    const wordCount = LLM.max_tokens;
     const getDescription = () => {
         let actual = countWords(question) + tokens_used;
         let text;
diff --git a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx
index b9b013ca..84f3c565 100644
--- a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx
+++ b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx
@@ -1,10 +1,7 @@
 import { ChevronDown24Regular, DarkTheme20Regular, Dismiss24Regular, FontIncrease20Regular, Mail24Regular } from "@fluentui/react-icons";
 import {
-    DrawerHeader,
-    DrawerHeaderTitle,
     OverlayDrawer,
     Button,
-    CheckboxOnChangeData,
     Slider,
     SliderProps,
     Label,
@@ -14,12 +11,13 @@ import {
 } from "@fluentui/react-components";
 
 import styles from "./SettingsDrawer.module.css";
-import { ChangeEvent, useCallback, useState } from "react";
+import { useCallback, useState } from "react";
 import { SelectionEvents, OptionOnSelectData } from "@fluentui/react-combobox";
 import { LanguageSelector } from "../../components/LanguageSelector";
 import { LLMSelector } from "../LLMSelector/LLMSelector";
 import { useTranslation } from 'react-i18next';
 import cheetsheet from "../../assets/mucgpt_cheatsheet.pdf";
+import { Model } from "../../api";
 interface Props {
     onLanguageSelectionChanged: (e: SelectionEvents, selection: OptionOnSelectData) => void;
     defaultlang: string;
@@ -30,9 +28,10 @@ interface Props {
     setTheme: (isLight: boolean) => void;
     onLLMSelectionChanged: (e: SelectionEvents, selection: OptionOnSelectData) => void;
     defaultLLM: string;
+    llmOptions: Model[];
 }
 
-export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, version, fontscale, setFontscale, isLight, setTheme, onLLMSelectionChanged, defaultLLM }: Props) => {
+export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, version, fontscale, setFontscale, isLight, setTheme, onLLMSelectionChanged, defaultLLM, llmOptions }: Props) => {
     const [isOpen, setIsOpen] = useState<boolean>(false);
     const { t, i18n } = useTranslation();
 
@@ -74,14 +73,15 @@ export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, versio
                 <div className={styles.bodyContainer}>
                     <LanguageSelector defaultlang={defaultlang} onSelectionChange={onLanguageSelectionChanged}></LanguageSelector>
                 </div>
-                <div className={styles.header} role="heading" aria-level={3}>
-                    {t('components.settingsdrawer.fontsize')}
-                </div>
+
                 <div className={styles.header} role="heading" aria-level={3}>
                     {t('components.settingsdrawer.llm')}
                 </div>
                 <div className={styles.bodyContainer}>
-                    <LLMSelector defaultLLM={defaultLLM} onSelectionChange={onLLMSelectionChanged}></LLMSelector>
+                    <LLMSelector defaultLLM={defaultLLM} onSelectionChange={onLLMSelectionChanged} options={llmOptions}></LLMSelector>
+                </div>
+                <div className={styles.header} role="heading" aria-level={3}>
+                    {t('components.settingsdrawer.fontsize')}
                 </div>
                 <div className={styles.bodyContainer}>
                     <div className={styles.verticalContainer}>
diff --git a/app/frontend/src/components/SumInput/SumInput.tsx b/app/frontend/src/components/SumInput/SumInput.tsx
index ea2ac963..ad8348a8 100644
--- a/app/frontend/src/components/SumInput/SumInput.tsx
+++ b/app/frontend/src/components/SumInput/SumInput.tsx
@@ -1,10 +1,11 @@
-import { DragEventHandler, useState } from "react";
-import { Stack, classNamesFunction, } from "@fluentui/react";
+import { useContext, useState } from "react";
+import { Stack, } from "@fluentui/react";
 import { Button, Tooltip, Textarea, TextareaOnChangeData, } from "@fluentui/react-components";
 import { Delete24Regular, Send28Filled } from "@fluentui/react-icons";
 
 import styles from "./SumInput.module.css";
 import { useTranslation } from 'react-i18next';
+import { LLMContext } from "../LLMSelector/LLMContextProvider";
 
 interface Props {
     onSend: (question: string, file?: File) => void;
@@ -22,7 +23,8 @@ export const SumInput = ({ onSend, disabled, placeholder, clearOnSend, tokens_us
     const { t, i18n } = useTranslation();
     const [dragging, setDragging] = useState(false);
     const [file, setFile] = useState<File | undefined>(undefined);
-    const wordCount = 4000;
+    const { LLM } = useContext(LLMContext)
+    const wordCount = LLM.max_tokens;
     const getDescription = () => {
         let actual = countWords(question) + tokens_used;
         let text;
diff --git a/app/frontend/src/index.tsx b/app/frontend/src/index.tsx
index f84f0fa6..4a08ae65 100644
--- a/app/frontend/src/index.tsx
+++ b/app/frontend/src/index.tsx
@@ -14,6 +14,7 @@ import { LanguageContextProvider } from "./components/LanguageSelector/LanguageC
 import Brainstorm from "./pages/brainstorm/Brainstorm";
 import Faq from "./pages/faq/Faq";
 import Version from "./pages/version/Version";
+import { LLMContextProvider } from "./components/LLMSelector/LLMContextProvider";
 initializeIcons();
 
 const router = createHashRouter([
@@ -63,7 +64,9 @@ const router = createHashRouter([
 ReactDOM.createRoot(document.getElementById("root") as HTMLElement).render(
     <React.StrictMode>
         <LanguageContextProvider>
-            <RouterProvider router={router} />
+            <LLMContextProvider>
+                <RouterProvider router={router} />
+            </LLMContextProvider>
         </LanguageContextProvider>
     </React.StrictMode>
 );
diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx
index 087e4042..f5efdda6 100644
--- a/app/frontend/src/pages/chat/Chat.tsx
+++ b/app/frontend/src/pages/chat/Chat.tsx
@@ -16,6 +16,7 @@ import { indexedDBStorage, saveToDB, getStartDataFromDB, popLastMessageInDB, get
 import { History } from "../../components/History/History";
 import useDebounce from "../../hooks/debouncehook";
 import { MessageError } from "./MessageError";
+import { LLMContext } from "../../components/LLMSelector/LLMContextProvider";
 
 const enum STORAGE_KEYS {
     CHAT_TEMPERATURE = 'CHAT_TEMPERATURE',
diff --git a/app/frontend/src/pages/layout/Layout.tsx b/app/frontend/src/pages/layout/Layout.tsx
index b1baaf4a..5be1df4c 100644
--- a/app/frontend/src/pages/layout/Layout.tsx
+++ b/app/frontend/src/pages/layout/Layout.tsx
@@ -27,16 +27,15 @@ export const Layout = () => {
     const navigate = useNavigate()
     const termsofuseread = localStorage.getItem(STORAGE_KEYS.TERMS_OF_USE_READ) === formatDate(new Date());
     const language_pref = (localStorage.getItem(STORAGE_KEYS.SETTINGS_LANGUAGE)) || DEFAULTLANG;
-    const llm_pref = (localStorage.getItem(STORAGE_KEYS.SETTINGS_LLM)) || DEFAULTLLM;
-    const font_scaling_pref = Number(localStorage.getItem(STORAGE_KEYS.SETTINGS_FONT_SCALING)) || 1;
-    const ligth_theme_pref = localStorage.getItem(STORAGE_KEYS.SETTINGS_IS_LIGHT_THEME) === null ? true : localStorage.getItem(STORAGE_KEYS.SETTINGS_IS_LIGHT_THEME) == 'true';
-    const { language, setLanguage } = useContext(LanguageContext);
-    const { LLM, setLLM } = useContext(LLMContext);
-    const { t, i18n } = useTranslation();
     const [config, setConfig] = useState<ApplicationConfig>({
-        backend: {
-            enable_auth: true
+        models: [{
+            "model_name": "KICC GPT",
+            "max_tokens": 128000
         },
+        {
+            "model_name": "Unknown GPT",
+            "max_tokens": 100
+        }],
         frontend: {
             labels: {
                 "env_name": "MUC tschibidi-C"
@@ -45,6 +44,12 @@ export const Layout = () => {
         },
         version: "DEV 1.0.0"
     });
+    const llm_pref = (localStorage.getItem(STORAGE_KEYS.SETTINGS_LLM)) || config.models[0].model_name;
+    const font_scaling_pref = Number(localStorage.getItem(STORAGE_KEYS.SETTINGS_FONT_SCALING)) || 1;
+    const ligth_theme_pref = localStorage.getItem(STORAGE_KEYS.SETTINGS_IS_LIGHT_THEME) === null ? true : localStorage.getItem(STORAGE_KEYS.SETTINGS_IS_LIGHT_THEME) == 'true';
+    const { language, setLanguage } = useContext(LanguageContext);
+    const { LLM, setLLM } = useContext(LLMContext);
+    const { t, i18n } = useTranslation();
     const [isLight, setLight] = useState<boolean>(ligth_theme_pref);
     const [fontscaling, setFontscaling] = useState<number>(font_scaling_pref);
 
@@ -67,7 +72,11 @@ export const Layout = () => {
     useEffect(() => {
         configApi().then(result => {
             setConfig(result);
-        }, () => { console.log("Config nicht geladen"); });
+            if (result.models.length === 0)
+                console.error("Keine Modelle vorhanden");
+            if (result.models.filter((model) => LLM.model_name === model.model_name).length === 0)
+                setLLM(result.models[0])
+        }, () => { console.error("Config nicht geladen"); });
         i18n.changeLanguage(language_pref);
     }, []);
 
@@ -87,10 +96,16 @@ export const Layout = () => {
     };
     const onLLMSelectionChanged = (e: SelectionEvents, selection: OptionOnSelectData) => {
         let llm = selection.optionValue || DEFAULTLLM;
-        setLLM(llm);
-        localStorage.setItem(STORAGE_KEYS.SETTINGS_LLM, llm);
+        let found_llm = models.find((model) => model.model_name == llm);
+        if (found_llm) {
+            setLLM(found_llm);
+            localStorage.setItem(STORAGE_KEYS.SETTINGS_LLM, llm);
+        }
+
     };
 
+    const models = config.models;
+
     return (
 
         <FluentProvider theme={theme}>
@@ -134,6 +149,7 @@ export const Layout = () => {
                             setTheme={onThemeChange}
                             defaultLLM={llm_pref}
                             onLLMSelectionChanged={onLLMSelectionChanged}
+                            llmOptions={models}
                         ></SettingsDrawer>
                     </div>
                 </header>

From 781f9cf6a46ad012f2270080abab424901a197eb Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Thu, 22 Aug 2024 14:52:36 +0200
Subject: [PATCH 11/34] =?UTF-8?q?=F0=9F=8E=86=20GPT4o-mini=20funktioniert?=
 =?UTF-8?q?=20auch?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/backend/app.py                   |   8 +-
 app/backend/chat/chat.py             | 129 ++++++------------------
 app/backend/core/llmhelper.py        |  29 +++++-
 app/backend/core/modelhelper.py      | 141 ++++++++++++---------------
 app/frontend/src/api/api.ts          |   3 +-
 app/frontend/src/api/models.ts       |   1 +
 app/frontend/src/pages/chat/Chat.tsx |   4 +-
 7 files changed, 130 insertions(+), 185 deletions(-)

diff --git a/app/backend/app.py b/app/backend/app.py
index 7ef9b1cf..051790f8 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -14,11 +14,11 @@
     send_file,
     send_from_directory,
 )
-
+from langchain_core.messages.human import HumanMessage
+from core.modelhelper import num_tokens_from_messages
 from core.types.Config import ModelsConfig, ModelsDTO
 from core.authentification import AuthentificationHelper, AuthError
 from core.helper import format_as_ndjson
-from core.modelhelper import num_tokens_from_message
 from core.types.AppConfig import AppConfig
 from core.types.countresult import CountResult
 from init_app import initApp
@@ -101,10 +101,12 @@ async def chat_stream():
         temperature=request_json['temperature'] or 0.7
         max_tokens=request_json['max_tokens'] or 4096
         system_message = request_json['system_message'] or None
+        model = request_json['model']
         response_generator = impl.run_with_streaming(history= request_json["history"],
                                                     temperature=temperature,
                                                     max_tokens=max_tokens,
                                                     system_message=system_message,
+                                                    model=model,
                                                     department= department)
         response = await make_response(format_as_ndjson(response_generator))
         response.timeout = None # type: ignore
@@ -170,7 +172,7 @@ async def counttokens():
     
     request_json = await request.get_json()
     message=request_json['text'] or ""
-    counted_tokens = num_tokens_from_message(message,"gpt-35-turbo") #TODO use correct model
+    counted_tokens = num_tokens_from_messages([HumanMessage(message)],"gpt-35-turbo") #TODO use correct model
     return jsonify(CountResult(count=counted_tokens))
 
 @bp.route("/statistics/export", methods=["GET"])
diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py
index dba2f2dc..1ba1fd21 100644
--- a/app/backend/chat/chat.py
+++ b/app/backend/chat/chat.py
@@ -1,22 +1,13 @@
-import asyncio
-from typing import Any, AsyncGenerator, Optional, Sequence, Tuple
+from typing import AsyncGenerator, Optional, Sequence
 
-from langchain.callbacks.streaming_aiter import AsyncIteratorCallbackHandler
-from langchain.chains import LLMChain
-from langchain.memory import ConversationBufferMemory
-from langchain.prompts import (
-    ChatPromptTemplate,
-    HumanMessagePromptTemplate,
-    MessagesPlaceholder,
-    SystemMessagePromptTemplate,
-)
 from langchain_community.callbacks import get_openai_callback
 from langchain_core.messages import AIMessage
 from langchain_core.runnables.base import RunnableSerializable
+from langchain_core.messages import HumanMessage, SystemMessage
 
 from chat.chatresult import ChatResult
 from core.datahelper import Repository, Requestinfo
-from core.modelhelper import num_tokens_from_message, num_tokens_from_messages
+from core.modelhelper import num_tokens_from_messages
 from core.types.Chunk import Chunk, ChunkInfo
 from core.types.Config import ApproachConfig
 from core.types.LlmConfigs import LlmConfigs
@@ -30,23 +21,8 @@ def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repo
         self.llm = llm
         self.config = config
         self.repo = repo
-
-    async def create_coroutine(self, history: "Sequence[dict[str, str]]", llm: RunnableSerializable, system_message: Optional[str]) -> Any:
-        """Calls the llm in streaming mode
-
-        Args:
-            history (Sequence[dict[str, str]]): given set of messages
-            llm (RunnableSerializable): the llm
-            system_message (Optional[str]): the system message
-
-        Returns:
-            Any: A Coroutine streaming the chat results 
-        """
-        user_q, conversation = self.init_conversation(history, llm, system_message)
-        chat_coroutine = conversation.acall({"question": user_q})
-        return (chat_coroutine)
     
-    async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str]) -> AsyncGenerator[Chunk, None]:
+    async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: int, temperature: float, system_message: Optional[str], model: str, department: Optional[str]) -> AsyncGenerator[Chunk, None]:
         """call the llm in streaming mode
 
         Args:
@@ -55,6 +31,7 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i
             temperature (float): temperature of the llm
             system_message (Optional[str]): the system message
             department (Optional[str]): from which department comes the call
+            model (str): the choosen model
 
         Returns:
             AsyncGenerator[Chunks, None]: a generator returning chunks of messages
@@ -63,50 +40,38 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i
             Iterator[AsyncGenerator[Chunks, None]]: Chunks of chat messages. n messages with content. One final message with infos about the consumed tokens.
         """
         # configure
-        handler = AsyncIteratorCallbackHandler()
         config: LlmConfigs = {
             "llm_max_tokens": max_tokens,
             "llm_temperature": temperature,
             "llm_streaming": True,
-            "llm_callbacks": [handler],
+            "llm": model
         }
         llm = self.llm.with_config(configurable=config)
-        
-        # create coroutine
-        chat_coroutine =  await self.create_coroutine(history, llm=llm, system_message=system_message)
-        task = asyncio.create_task(chat_coroutine)
+        msgs = self.init_messages(history = history, system_message=system_message)
         result = ""
         position = 0
-
         # go over events
-        async for event in handler.aiter():
-            result += str(event)
-            yield Chunk(type="C", message= event, order=position)
-            position += 1
-
-        # await till we have collected all events
-        await task
-        
+        try:
+            async for event in llm.astream(msgs):
+                result += str(event.content)
+                yield Chunk(type="C", message= event.content, order=position)
+                position += 1
+        except Exception as ex:
+             yield Chunk(type="E",message= ex.exception(), order=position)
         # handle exceptions
-        if task.exception():
-            if "Rate limit" in str(task.exception()):
-                yield Chunk(type="E",message= "Momentan liegt eine starke Auslastung vor. Bitte in einigen Sekunden erneut versuchen.", order=position) 
-            else:
-                yield Chunk(type="E",message= task.exception(), order=position)
+        # TODO find ratelimits
+        # TODO use callbacks https://clemenssiebler.com/posts/azure_openai_load_balancing_langchain_with_fallbacks/
         
         else:
             history[-1]["bot"] = result
-            system_message_tokens = 0
-            if(system_message and  system_message.strip() !=""):
-                system_message_tokens = num_tokens_from_message(system_message,"gpt-35-turbo")  #TODO
             if self.config["log_tokens"]:
                 self.repo.addInfo(Requestinfo( 
-                    tokencount = num_tokens_from_messages(history,"gpt-35-turbo") + system_message_tokens, #TODO richtiges Modell und tokenizer auswählen
+                    tokencount = num_tokens_from_messages(messages=msgs,model=model), #TODO richtiges Modell und tokenizer auswählen
                     department = department,
                     messagecount=  len(history),
                     method = "Chat"))
             
-            info = ChunkInfo(requesttokens=num_tokens_from_message(history[-1]["user"],"gpt-35-turbo"), streamedtokens=num_tokens_from_message(result,"gpt-35-turbo")) #TODO
+            info = ChunkInfo(requesttokens=num_tokens_from_messages([msgs[-1]],model), streamedtokens=num_tokens_from_messages([AIMessage(result)], model)) 
             yield Chunk(type="I", message=info, order=position)
     
     def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str]) -> ChatResult:
@@ -128,10 +93,10 @@ def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens:
             "llm_streaming": False,
         }
         llm = self.llm.with_config(configurable=config)
-        user_q, conversation = self.init_conversation(history, llm, system_message)
+        msgs = self.init_messages(history = history, system_message=system_message)
         
         with get_openai_callback() as cb:
-            ai_message: AIMessage = conversation.invoke({"question": user_q})
+            ai_message: AIMessage = llm.invoke(msgs)
         total_tokens = cb.total_tokens
       
         if self.config["log_tokens"]:
@@ -140,58 +105,24 @@ def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens:
                 department = department,
                 messagecount=  1,
                 method = "Brainstorm"))
-        return ChatResult(content=ai_message["chat_history"][-1].content)
-
-    def init_conversation(self, history: "Sequence[dict[str, str]]", llm: RunnableSerializable, system_message:str) -> Tuple[str, Any]:
-        """transform the history into langchain format, initates the llm with the messages
-
-        Args:
-            history (Sequence[dict[str, str]]): the previous chat messages
-            llm (RunnableSerializable): the llm
-            system_message (str): the system message
-
-        Returns:
-            Tuple[str, Any]: (user query, the configured llm with memory)
-        """
-        user_q =   history[-1]["user"]
-        messages = [
-                # The `variable_name` here is what must align with memory
-                MessagesPlaceholder(variable_name="chat_history"),
-                HumanMessagePromptTemplate.from_template("{question}")
-            ]
-        if(system_message and  system_message.strip() !=""):
-            messages.insert(0, 
-                    SystemMessagePromptTemplate.from_template(
-                        system_message
-                    ))
-        prompt = ChatPromptTemplate(
-            messages=messages
-        )
-        # Notice that we `return_messages=True` to fit into the MessagesPlaceholder
-        # Notice that `"chat_history"` aligns with the MessagesPlaceholder name.
-        memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-        ## initialize memory with our own chat model.
-        self.init_mem(history[:-1],memory=memory)
-        conversation = LLMChain(
-            llm=llm,
-            prompt=prompt,
-            memory=memory
-        )
-        
-        return user_q,conversation
+        return ChatResult(content=ai_message.content)
 
 
-    def init_mem(self, messages:"Sequence[dict[str, str]]", memory: ConversationBufferMemory) :
+    def init_messages(self, history:"Sequence[dict[str, str]]", system_message:  Optional[str] ) :
         """initialises memory with chat messages
 
         Args:
             messages (Sequence[dict[str, str]]): history of messages, are converted into langchain format
-            memory (ConversationBufferMemory): a memory for the messages
+            system_message ( Optional[str]): the system message
         """
-        for conversation in messages:
+        langchain_messages = []
+        if(system_message and  system_message.strip() !=""):
+             langchain_messages.append(SystemMessage(system_message))
+        for conversation in history:
             if("user" in conversation and conversation["user"]):
                 userMsg = conversation["user"]
-                memory.chat_memory.add_user_message(userMsg)
+                langchain_messages.append(HumanMessage(userMsg))
             if("bot" in conversation and conversation["bot"]):
                 aiMsg = conversation["bot"]
-                memory.chat_memory.add_ai_message(aiMsg)
\ No newline at end of file
+                langchain_messages.append(AIMessage(aiMsg))
+        return langchain_messages
\ No newline at end of file
diff --git a/app/backend/core/llmhelper.py b/app/backend/core/llmhelper.py
index 16e1b4d8..e935f994 100644
--- a/app/backend/core/llmhelper.py
+++ b/app/backend/core/llmhelper.py
@@ -24,7 +24,6 @@ def getModel(models: List[ModelsConfig],
         default_model = models[0]
         if default_model["type"] == "AZURE":
                 llm = AzureChatOpenAI(
-                        model=default_model["model_name"],
                         deployment_name= default_model["deployment"],
                         openai_api_key=default_model["api_key"],
                         azure_endpoint=default_model["endpoint"],
@@ -48,6 +47,31 @@ def getModel(models: List[ModelsConfig],
         else:
                 raise ModelsConfigurationException(f"Unknown model type: {default_model['type']}. Currently only `AZURE` and `OPENAI` are supported.")
 
+        alternatives = {"fake" : FakeListLLM(responses=["Hi diggi"])}
+        for model in models[1:]:
+                if model["type"] == "AZURE":
+                        alternative = AzureChatOpenAI(
+                                deployment_name= model["deployment"],
+                                openai_api_key=model["api_key"],
+                                azure_endpoint=model["endpoint"],
+                                openai_api_version=model["api_version"],
+                                openai_api_type="azure",
+                                max_tokens=max_tokens,
+                                n=n,
+                                streaming=streaming,
+                                temperature=temperature,
+                                )
+                elif model["type"] == "OPENAI":
+                        alternative = ChatOpenAI(
+                                        model=model["model_name"],
+                                        api_key=model["api_key"],
+                                        base_url=model["endpoint"],
+                                        max_tokens=max_tokens,
+                                        n=n,
+                                        streaming=streaming,
+                                        temperature=temperature,
+                        )
+                alternatives[model["model_name"]] = alternative
         llm = llm.configurable_fields(
                         temperature=ConfigurableField(
                                 id="llm_temperature",
@@ -75,5 +99,6 @@ def getModel(models: List[ModelsConfig],
                         ).configurable_alternatives(
                                 ConfigurableField(id="llm"),
                                 default_key=models[0]["model_name"],
-                                fake= FakeListLLM(responses=["Hi diggi"]))
+                                **alternatives
+                               )
         return llm
diff --git a/app/backend/core/modelhelper.py b/app/backend/core/modelhelper.py
index db268ee1..8965f5aa 100644
--- a/app/backend/core/modelhelper.py
+++ b/app/backend/core/modelhelper.py
@@ -1,84 +1,67 @@
 from __future__ import annotations
+from typing import List
 
 import tiktoken
+from langchain_core.messages.base import BaseMessage
 
-MODELS_2_TOKEN_LIMITS = {
-    "gpt-35-turbo": 4000,
-    "gpt-3.5-turbo": 4000,
-    "gpt-35-turbo-16k": 16000,
-    "gpt-3.5-turbo-16k": 16000,
-    "gpt-4": 8100,
-    "gpt-4-32k": 32000
-}
-
-AOAI_2_OAI = {
-    "gpt-35-turbo": "gpt-3.5-turbo",
-    "gpt-35-turbo-16k": "gpt-3.5-turbo-16k"
-}
-
-
-def get_token_limit(model_id: str) -> int:
-    """returns the token limit for a given model
-
-    Args:
-        model_id (str): id of the model
-
-    Raises:
-        ValueError: if the model is not available
-
-    Returns:
-        int: the token limit of the model
-    """
-    if model_id not in MODELS_2_TOKEN_LIMITS:
-        raise ValueError("Expected model gpt-35-turbo and above")
-    return MODELS_2_TOKEN_LIMITS[model_id]
-
-
-def num_tokens_from_messages(messages: list[dict[str, str]], model: str) -> int:
-    """  Calculate the number of tokens required to encode a list of messages
-
-    Args:
-        messages (list[dict[str, str]]): list of messages
-        model (str): for which model
-
-    Returns:
-        int: The total number of tokens required to encode the message.
-    """
+def num_tokens_from_messages(messages: List[BaseMessage], model: str):
+    print(messages)
+    """Return the number of tokens used by a list of messages."""
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except KeyError:
+        print("Warning: model not found. Using cl100k_base encoding.")
+        encoding = tiktoken.get_encoding("cl100k_base")
+    if model in {
+        "gpt-3.5-turbo-0613",
+        "gpt-3.5-turbo-16k-0613",
+        "gpt-4-0314",
+        "gpt-4-32k-0314",
+        "gpt-4-0613",
+        "gpt-4-32k-0613",
+        "gpt-4-turbo",
+        "gpt-4-turbo-2024-04-09",
+        "gpt-4o",
+        "gpt-4o-mini",
+        "gpt-4o-2024-05-13",
+        "Mistral-large-2407" #TODO use https://docs.mistral.ai/guides/tokenization/ for estimation
+        }:
+        tokens_per_message = 3
+        tokens_per_name = 1
+    elif model == "gpt-3.5-turbo-0301":
+        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
+        tokens_per_name = -1  # if there's a name, the role is omitted
+    elif "gpt-3.5-turbo" in model:
+        print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
+        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
+    elif "gpt-4o" in model:
+        print(
+            "Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13.")
+        return num_tokens_from_messages(messages, model="gpt-4o-2024-05-13")
+    elif "gpt-4" in model:
+        print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
+        return num_tokens_from_messages(messages, model="gpt-4-0613")
+    else:
+        raise NotImplementedError(
+            f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
+        )
     num_tokens = 0
-    for conversation in messages:
-        if("user" in conversation and conversation["user"]):
-            userMsg = conversation["user"]
-            num_tokens += num_tokens_from_message(message= userMsg, model=model)
-        if("bot" in conversation and conversation["bot"]):
-            aiMsg = conversation["bot"]
-            num_tokens += num_tokens_from_message(message= aiMsg, model=model)
-    return num_tokens
-           
-def num_tokens_from_message(message: str, model: str, token_per_message: int = 3) -> int:
-    """Calculate the number of tokens required to encode a message.
-    
-    Args:
-        message (str): The message to encode
-        model (str): The name of the model to use for encoding.
-        token_per_message (number): offset per message
-    Returns:
-        int: The total number of tokens required to encode the message.
-    Example:
-        message = {'role': 'user', 'content': 'Hello, how are you?'}
-        model = 'gpt-3.5-turbo'
-        num_tokens_from_messages(message, model)
-        output: 11
-    """
-    encoding = tiktoken.encoding_for_model(get_oai_chatmodel_tiktok(model))
-    num_tokens = token_per_message  # For "role" and "content" keys
-    num_tokens += len(encoding.encode(message))
-    return num_tokens
-
-
-def get_oai_chatmodel_tiktok(aoaimodel: str) -> str:
-    message = "Expected Azure OpenAI ChatGPT model name"
-    if aoaimodel == "" or aoaimodel is None:
-        raise ValueError(message)
-    if aoaimodel not in AOAI_2_OAI and aoaimodel not in MODELS_2_TOKEN_LIMITS:
-        raise ValueError(message)
-    return AOAI_2_OAI.get(aoaimodel) or aoaimodel
\ No newline at end of file
+    for message in messages:
+        num_tokens += tokens_per_message
+        if(message.type):
+           role = ""
+           if(message.type =="ai"):
+               role = "assistant"
+           elif(message.type == "system"):
+               role = "system"
+           elif(message.type == "human"):
+               role = "user"
+           else:
+            raise NotImplementedError(
+                f"""Not implemented for the message type {message.type}"""
+            )
+           num_tokens += len(encoding.encode(role))
+        if(message.content):
+           num_tokens += len(encoding.encode(message.content))
+    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
+    return num_tokens
\ No newline at end of file
diff --git a/app/frontend/src/api/api.ts b/app/frontend/src/api/api.ts
index 99ae23b3..5ec4f0e2 100644
--- a/app/frontend/src/api/api.ts
+++ b/app/frontend/src/api/api.ts
@@ -14,7 +14,8 @@ export async function chatApi(options: ChatRequest): Promise<Response> {
             temperature: options.temperature,
             language: options.language,
             system_message: options.system_message,
-            max_tokens: options.max_tokens
+            max_tokens: options.max_tokens,
+            model: options.model
         })
     });
 }
diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts
index fb266720..7f77f221 100644
--- a/app/frontend/src/api/models.ts
+++ b/app/frontend/src/api/models.ts
@@ -21,6 +21,7 @@ export type ChatRequest = {
     max_tokens?: number;
     system_message?: string;
     shouldStream?: boolean;
+    model?: string;
 };
 
 export type SumRequest = {
diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx
index f5efdda6..32749687 100644
--- a/app/frontend/src/pages/chat/Chat.tsx
+++ b/app/frontend/src/pages/chat/Chat.tsx
@@ -26,6 +26,7 @@ const enum STORAGE_KEYS {
 
 const Chat = () => {
     const { language } = useContext(LanguageContext)
+    const { LLM } = useContext(LLMContext);
     const { t } = useTranslation();
     const [shouldStream, setShouldStream] = useState<boolean>(true);
 
@@ -127,7 +128,8 @@ const Chat = () => {
                 language: language,
                 temperature: temperature,
                 system_message: system ? system : "",
-                max_tokens: max_tokens
+                max_tokens: max_tokens,
+                model: LLM.model_name
             };
 
             const response = await chatApi(request);

From ea4a092c4f41b5fe94c7cf94e299845014a32388 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Thu, 22 Aug 2024 22:27:06 +0200
Subject: [PATCH 12/34] =?UTF-8?q?=E2=98=81=20mistral=20tokenizer?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/backend/chat/chat.py        |  2 +-
 app/backend/core/modelhelper.py | 42 +++++++++++++++++++++++++++++++--
 app/backend/init_app.py         |  1 -
 app/backend/requirements.txt    |  1 +
 4 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py
index 1ba1fd21..a2feba65 100644
--- a/app/backend/chat/chat.py
+++ b/app/backend/chat/chat.py
@@ -71,7 +71,7 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i
                     messagecount=  len(history),
                     method = "Chat"))
             
-            info = ChunkInfo(requesttokens=num_tokens_from_messages([msgs[-1]],model), streamedtokens=num_tokens_from_messages([AIMessage(result)], model)) 
+            info = ChunkInfo(requesttokens=num_tokens_from_messages([msgs[-1]],model), streamedtokens=num_tokens_from_messages([HumanMessage(result)], model)) 
             yield Chunk(type="I", message=info, order=position)
     
     def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str]) -> ChatResult:
diff --git a/app/backend/core/modelhelper.py b/app/backend/core/modelhelper.py
index 8965f5aa..4331f3b5 100644
--- a/app/backend/core/modelhelper.py
+++ b/app/backend/core/modelhelper.py
@@ -3,10 +3,49 @@
 
 import tiktoken
 from langchain_core.messages.base import BaseMessage
+from mistral_common.protocol.instruct.messages import (
+    UserMessage, SystemMessage, AssistantMessage
+)
+from mistral_common.protocol.instruct.request import ChatCompletionRequest
+from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
 
 def num_tokens_from_messages(messages: List[BaseMessage], model: str):
-    print(messages)
     """Return the number of tokens used by a list of messages."""
+    if("gpt-" in model):
+        return num_tokens_from_openai_model(messages=messages, model=model)
+    elif("mistral" in model):
+        return num_tokens_from_mistral_model(messages=messages, model=model)
+    else:
+        raise NotImplementedError(
+            f"""No tokenizer for model found. currently only openai and mistral are supported."""
+        )
+def num_tokens_from_mistral_model(messages: List[BaseMessage], model: str):
+    """Return the number of tokens used by a list of messages for a given mistral model."""
+    num_tokens = 0
+    # see which tokenizer for which model is needed, https://github.com/mistralai/mistral-common/blob/main/README.md
+    if(model == "mistral-large-2407" ):
+        tokenizer = MistralTokenizer.v3()
+    else:
+        tokenizer = MistralTokenizer.from_model(model) 
+    # convert langchain msgs to mistral format
+    mistral_messages = []
+    for message in messages:
+        if(message.type =="ai"):
+            mistral_messages.append(AssistantMessage(content=message.content))
+        elif(message.type == "system"):
+            mistral_messages.append(SystemMessage(content=message.content))
+        elif(message.type == "human"):
+            mistral_messages.append(UserMessage(content=message.content))
+        else:
+            raise NotImplementedError(
+                    f"""Not implemented for the message type {message.type}"""
+                )
+    tokenized = tokenizer.encode_chat_completion(
+                ChatCompletionRequest(messages=mistral_messages))
+    return len(tokenized.tokens)
+
+def num_tokens_from_openai_model(messages: List[BaseMessage], model: str):
+    """Return the number of tokens used by a list of messages for a given openai model."""
     try:
         encoding = tiktoken.encoding_for_model(model)
     except KeyError:
@@ -24,7 +63,6 @@ def num_tokens_from_messages(messages: List[BaseMessage], model: str):
         "gpt-4o",
         "gpt-4o-mini",
         "gpt-4o-2024-05-13",
-        "Mistral-large-2407" #TODO use https://docs.mistral.ai/guides/tokenization/ for estimation
         }:
         tokens_per_message = 3
         tokens_per_name = 1
diff --git a/app/backend/init_app.py b/app/backend/init_app.py
index d99f30e1..7b2f1805 100644
--- a/app/backend/init_app.py
+++ b/app/backend/init_app.py
@@ -52,7 +52,6 @@ async def initApp() -> AppConfig:
     Returns:
         AppConfig: contains the configuration for the webservice
     """
-    
     # read enviornment config
     env_config = os.environ['MUCGPT_CONFIG'] if "MUCGPT_CONFIG" in os.environ else os.path.dirname(os.path.realpath(__file__))+"/config.json"
     base_config = os.environ['MUCGPT_BASE_CONFIG'] if "MUCGPT_BASE_CONFIG" in os.environ is not None else os.path.dirname(os.path.realpath(__file__))+"/base.json"
diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt
index f3459d70..94e726e5 100644
--- a/app/backend/requirements.txt
+++ b/app/backend/requirements.txt
@@ -16,3 +16,4 @@ psycopg2==2.9.9
 pypdf2==3.0.1
 tenacity==8.5.0
 gunicorn
+mistral-common==1.3.4

From 68d67e266524889ab23af23f54db381e74a5ab4b Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Fri, 23 Aug 2024 10:26:56 +0200
Subject: [PATCH 13/34] :bug: fixed llm frontend bug

---
 app/frontend/src/pages/layout/Layout.tsx | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/app/frontend/src/pages/layout/Layout.tsx b/app/frontend/src/pages/layout/Layout.tsx
index 5be1df4c..3a947d42 100644
--- a/app/frontend/src/pages/layout/Layout.tsx
+++ b/app/frontend/src/pages/layout/Layout.tsx
@@ -53,7 +53,7 @@ export const Layout = () => {
     const [isLight, setLight] = useState<boolean>(ligth_theme_pref);
     const [fontscaling, setFontscaling] = useState<number>(font_scaling_pref);
 
-
+    const [models, setModels] = useState(config.models);
     const [theme, setTheme] = useState<Theme>(adjustTheme(isLight, fontscaling));
 
 
@@ -72,10 +72,11 @@ export const Layout = () => {
     useEffect(() => {
         configApi().then(result => {
             setConfig(result);
-            if (result.models.length === 0)
+            setModels(result.models);
+            if (result.models.length === 0) {
                 console.error("Keine Modelle vorhanden");
-            if (result.models.filter((model) => LLM.model_name === model.model_name).length === 0)
-                setLLM(result.models[0])
+            }
+            setLLM(result.models.find((model) => model.model_name == llm_pref) || result.models[0])
         }, () => { console.error("Config nicht geladen"); });
         i18n.changeLanguage(language_pref);
     }, []);
@@ -104,7 +105,7 @@ export const Layout = () => {
 
     };
 
-    const models = config.models;
+
 
     return (
 

From 008878a95196729551b996e9eeb6ec78e010ab16 Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Fri, 23 Aug 2024 12:26:30 +0200
Subject: [PATCH 14/34] :sparkles: /counttokens with right LLM

---
 app/backend/app.py                                          | 3 ++-
 app/frontend/src/api/api.ts                                 | 3 ++-
 app/frontend/src/api/models.ts                              | 1 +
 .../src/components/LLMSelector/LLMContextProvider.tsx       | 2 +-
 app/frontend/src/pages/chat/Chat.tsx                        | 6 +++---
 5 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/app/backend/app.py b/app/backend/app.py
index 051790f8..09bd0cc4 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -172,7 +172,8 @@ async def counttokens():
     
     request_json = await request.get_json()
     message=request_json['text'] or ""
-    counted_tokens = num_tokens_from_messages([HumanMessage(message)],"gpt-35-turbo") #TODO use correct model
+    model = request_json['model']['model_name'] or "gpt-35-turbo"
+    counted_tokens = num_tokens_from_messages([HumanMessage(message)], model)
     return jsonify(CountResult(count=counted_tokens))
 
 @bp.route("/statistics/export", methods=["GET"])
diff --git a/app/frontend/src/api/api.ts b/app/frontend/src/api/api.ts
index 5ec4f0e2..f7efd234 100644
--- a/app/frontend/src/api/api.ts
+++ b/app/frontend/src/api/api.ts
@@ -107,7 +107,8 @@ export async function countTokensAPI(options: CountTokenRequest): Promise<CountT
         mode: "cors",
         redirect: "manual",
         body: JSON.stringify({
-            text: options.text
+            text: options.text,
+            model: options.model
         })
     });
 
diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts
index 7f77f221..02822b5e 100644
--- a/app/frontend/src/api/models.ts
+++ b/app/frontend/src/api/models.ts
@@ -69,6 +69,7 @@ export interface ChunkInfo {
 
 export type CountTokenRequest = {
     text: string;
+    model: Model;
 };
 
 export type CountTokenResponse = {
diff --git a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
index c1abb484..d9e3e60d 100644
--- a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
+++ b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
@@ -7,7 +7,7 @@ interface ILLMProvider {
     setLLM: Dispatch<SetStateAction<Model>>;
 }
 
-export const DEFAULTLLM = "GPT-4o-mini";
+export const DEFAULTLLM = "gpt-4o-mini";
 export const LLMContext = React.createContext<ILLMProvider>({ LLM: { model_name: DEFAULTLLM, max_tokens: 0 }, setLLM: () => { } });
 
 export const LLMContextProvider = (props: React.PropsWithChildren<{}>) => {
diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx
index 32749687..52818f91 100644
--- a/app/frontend/src/pages/chat/Chat.tsx
+++ b/app/frontend/src/pages/chat/Chat.tsx
@@ -60,16 +60,16 @@ const Chat = () => {
 
     const makeTokenCountRequest = useCallback(async () => {
         if (debouncedSystemPrompt && debouncedSystemPrompt !== "") {
-            const response = await countTokensAPI({ "text": debouncedSystemPrompt });
+            const response = await countTokensAPI({ "text": debouncedSystemPrompt, "model": LLM });
             setSystemPromptTokens(response.count);
         }
         else
             setSystemPromptTokens(0);
-    }, [debouncedSystemPrompt]);
+    }, [debouncedSystemPrompt, LLM]);
 
     useEffect(() => {
         makeTokenCountRequest();
-    }, [debouncedSystemPrompt, makeTokenCountRequest]);
+    }, [debouncedSystemPrompt, LLM, makeTokenCountRequest]);
 
     useEffect(() => {
         checkStructurOfDB(storage);

From 9b1b7d0cbb30253b1a3a3d54e614ccfbd5f3f237 Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Fri, 23 Aug 2024 15:49:08 +0200
Subject: [PATCH 15/34] :sparkles: Brainstorming now uses the LLM from the
 settings

---
 app/backend/app.py                               | 2 +-
 app/backend/brainstorm/brainstorm.py             | 4 ++--
 app/frontend/src/api/api.ts                      | 3 ++-
 app/frontend/src/api/models.ts                   | 1 +
 app/frontend/src/pages/brainstorm/Brainstorm.tsx | 3 +++
 5 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/app/backend/app.py b/app/backend/app.py
index 09bd0cc4..0e817791 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -81,7 +81,7 @@ async def brainstorm():
 
     try:
         impl = cfg["brainstorm_approaches"]
-        r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department)
+        r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department, model_name=request_json["model"]["model_name"])
         return jsonify(r)
     except Exception as e:
         logging.exception("Exception in /brainstorm")
diff --git a/app/backend/brainstorm/brainstorm.py b/app/backend/brainstorm/brainstorm.py
index 79acc0ee..cf99543a 100644
--- a/app/backend/brainstorm/brainstorm.py
+++ b/app/backend/brainstorm/brainstorm.py
@@ -76,7 +76,7 @@ def getTranslationPrompt(self) -> PromptTemplate:
         return PromptTemplate(input_variables=["language", "brainstorm"], template=self.user_translate_prompt)
 
 
-    async def brainstorm(self, topic: str, language: str, department: Optional[str]) -> BrainstormResult:
+    async def brainstorm(self, topic: str, language: str, department: Optional[str], model_name:str) -> BrainstormResult:
         """Generates ideas for a given topic structured in markdown, translates the result into the target language 
 
         Args:
@@ -89,9 +89,9 @@ async def brainstorm(self, topic: str, language: str, department: Optional[str])
         """
         # configure
         config: LlmConfigs = {
+            "llm": model_name
         }
         llm = self.llm.with_config(configurable=config)
-        
         # construct chains
         brainstormChain = LLMChain(llm=llm, prompt=self.getBrainstormPrompt(), output_key="brainstorm")
         translationChain = LLMChain(llm=llm, prompt=self.getTranslationPrompt(), output_key="translation")
diff --git a/app/frontend/src/api/api.ts b/app/frontend/src/api/api.ts
index f7efd234..a0590774 100644
--- a/app/frontend/src/api/api.ts
+++ b/app/frontend/src/api/api.ts
@@ -89,7 +89,8 @@ export async function brainstormApi(options: BrainstormRequest): Promise<AskResp
         body: JSON.stringify({
             topic: options.topic,
             temperature: options.temperature,
-            language: options.language
+            language: options.language,
+            model: options.model
         })
     });
 
diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts
index 02822b5e..6377bf7c 100644
--- a/app/frontend/src/api/models.ts
+++ b/app/frontend/src/api/models.ts
@@ -32,6 +32,7 @@ export type SumRequest = {
 };
 export type BrainstormRequest = {
     topic: string;
+    model: Model;
     temperature?: number;
     language?: string;
 };
diff --git a/app/frontend/src/pages/brainstorm/Brainstorm.tsx b/app/frontend/src/pages/brainstorm/Brainstorm.tsx
index bb39acf6..402a8a17 100644
--- a/app/frontend/src/pages/brainstorm/Brainstorm.tsx
+++ b/app/frontend/src/pages/brainstorm/Brainstorm.tsx
@@ -12,9 +12,11 @@ import { ExampleListBrainstorm } from "../../components/Example/ExampleListBrain
 import { Mindmap } from "../../components/Mindmap";
 import { useTranslation } from 'react-i18next';
 import { checkStructurOfDB, deleteChatFromDB, getHighestKeyInDB, getStartDataFromDB, indexedDBStorage, saveToDB } from "../../service/storage";
+import { LLMContext } from "../../components/LLMSelector/LLMContextProvider";
 
 const Summarize = () => {
     const { language } = useContext(LanguageContext)
+    const { LLM } = useContext(LLMContext);
     const { t } = useTranslation();
 
     const lastQuestionRef = useRef<string>("");
@@ -63,6 +65,7 @@ const Summarize = () => {
             const request: BrainstormRequest = {
                 topic: question,
                 language: language,
+                model: LLM
             };
             const result = await brainstormApi(request);
             setAnswers([...answers, [question, result]]);

From 9bbadb68bdf2c8cc3b3519f37acbc0dd076d6052 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Fri, 23 Aug 2024 18:43:41 +0200
Subject: [PATCH 16/34] =?UTF-8?q?=F0=9F=94=8D=20Summarize=20working=20agai?=
 =?UTF-8?q?n?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/backend/init_app.py            |   4 +-
 app/backend/summarize/summarize.py | 129 +++++++++--------------------
 2 files changed, 41 insertions(+), 92 deletions(-)

diff --git a/app/backend/init_app.py b/app/backend/init_app.py
index 7b2f1805..30d10e8b 100644
--- a/app/backend/init_app.py
+++ b/app/backend/init_app.py
@@ -31,10 +31,10 @@ def initApproaches(cfg: BackendConfig, repoHelper: Repository) -> Tuple[Chat, Br
                     temperature=0.9)
     sumllm = getModel(
                     models=cfg["models"],
-                    max_tokens =  1000,
+                    max_tokens =  2000,
                     n = 1,
                     streaming=False,
-                    temperature=0.2)
+                    temperature=0)
     chatlllm = getModel(
                     models=cfg["models"],
                     max_tokens=4000,
diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py
index 8f210b89..4eea1d34 100644
--- a/app/backend/summarize/summarize.py
+++ b/app/backend/summarize/summarize.py
@@ -7,6 +7,8 @@
 from langchain.prompts import PromptTemplate
 from langchain_community.callbacks import get_openai_callback
 from langchain_core.runnables.base import RunnableSerializable
+from langchain_core.pydantic_v1 import BaseModel, Field
+from langchain_core.prompts import PromptTemplate
 
 from core.datahelper import Repository, Requestinfo
 from core.textsplit import splitPDF, splitText
@@ -14,6 +16,12 @@
 from core.types.LlmConfigs import LlmConfigs
 from summarize.summarizeresult import SummarizeResult
 
+class DenserSummary(BaseModel):
+    missing_entities: List[str] = Field(description="An list of missing entitys")
+    denser_summary: str = Field(description="denser summary, covers every entity in detail")
+
+class Summarys(BaseModel):
+    data: List[DenserSummary] = Field(description="An list of increasingly concise dense summaries")
 
 class Summarize:
     """Summarizes text. Chunks long texts. Individual chunks where summarized with Chain of Density prompting: https://arxiv.org/abs/2309.04269. Afterwards the text is translated into the target language."""
@@ -55,21 +63,6 @@ class Summarize:
     The response in JSON format:
     """
 
-    user_translate_prompt = """
-    Übersetze das folgende JSON in {language}. Beinhalte die Formatierung als RFC8259 JSON bei.
-    Das JSON sollte ein Array der Länge 5 sein, welcher folgendem Format folgt:
-    {{
-    "data": [
-    {{
-        "missing_entities": "An array of missing entitys"
-        "denser_summary": "denser summary, covers every entity in detail"
-    }}
-    ]
-    }}
-
-    JSON: {sum}
-    """
-
     user_translate_and_cleanup_prompt = """
     Übersetze den folgenden Text in {language}.
 
@@ -97,9 +90,6 @@ def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repo
 
     def getSummarizationPrompt(self) -> PromptTemplate:
         return PromptTemplate(input_variables=["text"], template=self.user_sum_prompt)
-
-    def getTranslationPrompt(self) -> PromptTemplate: 
-        return PromptTemplate(input_variables=["language", "sum"], template=self.user_translate_prompt)
     
     def getTranslationCleanupPrompt(self) -> PromptTemplate:
         return PromptTemplate(input_variables=["language", "sum"], template=self.user_translate_and_cleanup_prompt)
@@ -109,46 +99,15 @@ def setup(self) -> SequentialChain:
         config: LlmConfigs = {
         }
         llm = self.llm.with_config(configurable=config)
-        # setup model
-        summarizationChain = LLMChain(llm=llm, prompt=self.getSummarizationPrompt(), output_key="sum")
-        translationChain = LLMChain(llm=llm, prompt=self.getTranslationCleanupPrompt(), output_key="translation")
+
+        summarizationChain = self.getSummarizationPrompt() | llm.with_structured_output(schema=Summarys)
+        translationChain = self.getTranslationCleanupPrompt() | llm.with_structured_output(schema=Summarys)
 
         return (summarizationChain, translationChain)
     
-    def removeQuotations(self,st: str) -> str:
-        """finds all denser summarys, replaces quotation inside with &quot
-
-        Args:
-            st (str): input str
-
-        Returns:
-            str: str without quotations
-        """
-        m = re.finditer(r'(?<=\"denser_summary\":)(.*?)(?=\})', st)
-
-        new_string = ""
-        idx = 0
-
-        for i in list(m):
-            ss, se = i.span(1)  # first and last index
-            groups = i.group()  # complete string ins
-            quotations = [m.start() for m in re.finditer('"', groups)]
-            # Quotation inside dense summary?
-            if(len(quotations)>2):
-                new_string += st[idx:ss+quotations[1]]
-                idx = ss+quotations[1]+1
-                for quotindex in quotations[1:-1]:
-                    new_string += st[idx:ss+quotindex] + "“ "
-                    idx = ss+quotindex+1
-                new_string += st[idx:se]
-                idx = quotations[-1]+1
-            else:
-                new_string += st[idx:ss] +  groups
-            idx = se
-        new_string += st[idx:]
-        return new_string
     
-    def run_io_tasks_in_parallel(self, tasks) -> List[Any]:
+    
+    def run_io_tasks_in_parallel(self, tasks) -> List[Tuple[Summarys, int]]:
         """execute tasks in parallel
 
         Args:
@@ -165,7 +124,7 @@ def run_io_tasks_in_parallel(self, tasks) -> List[Any]:
         return results
 
     
-    def call_and_cleanup(self, text: str, summarizeChain: LLMChain) -> Tuple[List[str], int]:
+    def call_and_cleanup(self, text: str, summarizeChain: LLMChain) -> Tuple[Summarys, int]:
         """calls summarization chain and cleans the data
 
         Args:
@@ -175,34 +134,24 @@ def call_and_cleanup(self, text: str, summarizeChain: LLMChain) -> Tuple[List[st
         Returns:
             Tuple[List[str], int]: the last n summaries, the number of consumed tokens
         """
-        with get_openai_callback() as cb:
-            result = summarizeChain.invoke({"text": text})
-        total_tokens = cb.total_tokens
-        # post procession
-        chat_translate_result= result["sum"][result["sum"].index("{"):]
-        chat_translate_result = chat_translate_result.replace("\n", "").rstrip()
-        chat_translate_result = self.removeQuotations(chat_translate_result)
-        if not chat_translate_result.endswith("}"):
-            chat_translate_result  = chat_translate_result + "\"}]}"
         try:
-            jsoned = json.loads(chat_translate_result)
-        except Exception:
-            # try again
-            try: 
-                (chat_translate_result, total_tokens) =  self.call_and_cleanup(text=text, summarizeChain=summarizeChain)   
-                return (chat_translate_result, total_tokens)
-            except Exception:
-                total_tokens = 0
-                jsoned = { }
-                jsoned['data'] = [{'missing_entities': 'Fehler','denser_summary': 'Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.'}]
-
-        cleaned = []
-        for (i, element) in enumerate(jsoned['data']):
-            missing = element['missing_entities']
-            if(isinstance(missing, str)):
-                element['missing_entities'] = [missing]
-            cleaned.append(element)
-        return (cleaned,total_tokens)
+            with get_openai_callback() as cb:
+                result: Summarys = summarizeChain.invoke({"text": text})
+  
+            total_tokens = cb.total_tokens
+        
+        except Exception as ex:
+            print(ex)
+            # error message
+            total_tokens = 0
+            result = Summarys(data= [DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ),
+                                     DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ),
+                                     DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ),
+                                     DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ),
+                                     DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' )])
+
+
+        return (result,total_tokens)
 
 
 
@@ -222,27 +171,27 @@ async def summarize(self, splits: List[str],  language: str, department: Optiona
         (summarizeChain, cleanupChain) = self.setup()
         # call chain
         total_tokens = 0
-        summarys = []
+        summarys: List[DenserSummary] = []
         # call summarization in parallel
-        results  =  self.run_io_tasks_in_parallel(
+        chunk_summaries  =  self.run_io_tasks_in_parallel(
              list(map(lambda chunk:  lambda: self.call_and_cleanup(text=chunk, summarizeChain=summarizeChain), splits)))
 
         # concatenate all summarys
         for i in range(0,5):
-            next_summary =   {"denser_summary": "", "missing_entities": []}
-            for (result, tokens) in results:
+            next_summary = DenserSummary(missing_entities=[], denser_summary="")
+            for (chunk_summary, tokens) in chunk_summaries:
                 total_tokens += tokens
-                next_summary["denser_summary"] += " "+ result[i]["denser_summary"]
-                next_summary["missing_entities"] += result[i]["missing_entities"]
+                next_summary.denser_summary += " "+ chunk_summary.data[i].denser_summary
+                next_summary.missing_entities += chunk_summary.data[i].missing_entities
             summarys.append(next_summary)
 
         final_summarys = []
         for summary in summarys[self.use_last_n_summaries:]:
             # translate and beautify the concatenated summaries
             with get_openai_callback() as cb:
-                result = cleanupChain.invoke({"language": language, "sum": summary['denser_summary']})        
+                chunk_summary = cleanupChain.invoke({"language": language, "sum": summary.denser_summary})        
             total_tokens = cb.total_tokens
-            final_summarys.append(result['translation'])
+            final_summarys.append(chunk_summary.content)
         # save total tokens
         if self.config["log_tokens"]:
             self.repo.addInfo(Requestinfo( 

From d27814d2a9942784d8ebe50976e460304a13e184 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Fri, 23 Aug 2024 19:07:46 +0200
Subject: [PATCH 17/34] =?UTF-8?q?=F0=9F=8E=88=20Summarization=20with=20str?=
 =?UTF-8?q?uctured=20output?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/backend/app.py                            |  4 ++--
 app/backend/brainstorm/brainstorm.py          |  1 +
 app/backend/summarize/summarize.py            | 23 +++++++++++--------
 app/frontend/src/api/api.ts                   |  3 ++-
 app/frontend/src/api/models.ts                |  3 ++-
 .../src/pages/brainstorm/Brainstorm.tsx       |  6 ++---
 .../src/pages/summarize/Summarize.tsx         |  5 +++-
 7 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/app/backend/app.py b/app/backend/app.py
index 0e817791..85d6514b 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -64,7 +64,7 @@ async def sum():
         text = request_json["text"] if file is None else None
         splits = impl.split(detaillevel=detaillevel, file=file, text=text)
 
-        r = await impl.summarize(splits = splits, department=department, language=request_json["language"] or "Deutsch")
+        r = await impl.summarize(splits = splits, department=department, language=request_json["language"] or "Deutsch", model_name=request_json["model"])
         return jsonify(r)
     except Exception as e:
         logging.exception("Exception in /sum")
@@ -81,7 +81,7 @@ async def brainstorm():
 
     try:
         impl = cfg["brainstorm_approaches"]
-        r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department, model_name=request_json["model"]["model_name"])
+        r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department, model_name=request_json["model"])
         return jsonify(r)
     except Exception as e:
         logging.exception("Exception in /brainstorm")
diff --git a/app/backend/brainstorm/brainstorm.py b/app/backend/brainstorm/brainstorm.py
index cf99543a..a71879ab 100644
--- a/app/backend/brainstorm/brainstorm.py
+++ b/app/backend/brainstorm/brainstorm.py
@@ -83,6 +83,7 @@ async def brainstorm(self, topic: str, language: str, department: Optional[str],
             topic (str): topic of the brainstorming
             language (str): target language
             department (Optional[str]): department, who is responsible for the call
+            model_name (str): the choosen llm
 
         Returns:
             BrainstormResult: the structured markdown with ideas about the topic
diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py
index 4eea1d34..8c78e56f 100644
--- a/app/backend/summarize/summarize.py
+++ b/app/backend/summarize/summarize.py
@@ -95,13 +95,15 @@ def getTranslationCleanupPrompt(self) -> PromptTemplate:
         return PromptTemplate(input_variables=["language", "sum"], template=self.user_translate_and_cleanup_prompt)
 
 
-    def setup(self) -> SequentialChain:
+    def setup(self, model_name: str) -> SequentialChain:
         config: LlmConfigs = {
+            "llm": model_name
         }
         llm = self.llm.with_config(configurable=config)
 
+        #extraction with structured output: https://python.langchain.com/v0.1/docs/use_cases/extraction/quickstart/
         summarizationChain = self.getSummarizationPrompt() | llm.with_structured_output(schema=Summarys)
-        translationChain = self.getTranslationCleanupPrompt() | llm.with_structured_output(schema=Summarys)
+        translationChain = self.getTranslationCleanupPrompt() | llm
 
         return (summarizationChain, translationChain)
     
@@ -144,31 +146,32 @@ def call_and_cleanup(self, text: str, summarizeChain: LLMChain) -> Tuple[Summary
             print(ex)
             # error message
             total_tokens = 0
-            result = Summarys(data= [DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ),
-                                     DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ),
-                                     DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ),
-                                     DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ),
-                                     DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' )])
+            result = Summarys(data= [DenserSummary(missing_entities=["Fehler"], denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ),
+                                     DenserSummary(missing_entities=["Fehler"], denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ),
+                                     DenserSummary(missing_entities=["Fehler"], denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ),
+                                     DenserSummary(missing_entities=["Fehler"], denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ),
+                                     DenserSummary(missing_entities=["Fehler"], denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' )])
 
 
         return (result,total_tokens)
 
 
 
-    async def summarize(self, splits: List[str],  language: str, department: Optional[str]) -> SummarizeResult:
+    async def summarize(self, splits: List[str],  language: str, department: Optional[str], model_name:str) -> SummarizeResult:
         """summarizes text with chain of density prompting. Generates 5 increasingly better summaries per split.
         Concatenates the results and translates it into the target language.
 
         Args:
             splits (List[str]): splits, to be summarized
             language (str): the target language
-            department (Optional[str]): _description_
+            department (Optional[str]): department, who is responsible for the call
+            model_name (str): the choosen llm
 
         Returns:
             SummarizeResult: the best n summarizations
         """
         # setup
-        (summarizeChain, cleanupChain) = self.setup()
+        (summarizeChain, cleanupChain) = self.setup(model_name)
         # call chain
         total_tokens = 0
         summarys: List[DenserSummary] = []
diff --git a/app/frontend/src/api/api.ts b/app/frontend/src/api/api.ts
index a0590774..5c379c92 100644
--- a/app/frontend/src/api/api.ts
+++ b/app/frontend/src/api/api.ts
@@ -28,7 +28,8 @@ export async function sumApi(options: SumRequest, file?: File): Promise<SumRespo
             text: options.text,
             detaillevel: options.detaillevel,
             temperature: options.temperature,
-            language: options.language
+            language: options.language,
+            model: options.model
         })
     );
     if (file) formData.append("file", file);
diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts
index 6377bf7c..0195de95 100644
--- a/app/frontend/src/api/models.ts
+++ b/app/frontend/src/api/models.ts
@@ -29,10 +29,11 @@ export type SumRequest = {
     detaillevel?: "short" | "medium" | "long";
     temperature?: number;
     language?: string;
+    model: string;
 };
 export type BrainstormRequest = {
     topic: string;
-    model: Model;
+    model: string;
     temperature?: number;
     language?: string;
 };
diff --git a/app/frontend/src/pages/brainstorm/Brainstorm.tsx b/app/frontend/src/pages/brainstorm/Brainstorm.tsx
index 402a8a17..dd3f4321 100644
--- a/app/frontend/src/pages/brainstorm/Brainstorm.tsx
+++ b/app/frontend/src/pages/brainstorm/Brainstorm.tsx
@@ -14,7 +14,7 @@ import { useTranslation } from 'react-i18next';
 import { checkStructurOfDB, deleteChatFromDB, getHighestKeyInDB, getStartDataFromDB, indexedDBStorage, saveToDB } from "../../service/storage";
 import { LLMContext } from "../../components/LLMSelector/LLMContextProvider";
 
-const Summarize = () => {
+const Brainstorm = () => {
     const { language } = useContext(LanguageContext)
     const { LLM } = useContext(LLMContext);
     const { t } = useTranslation();
@@ -65,7 +65,7 @@ const Summarize = () => {
             const request: BrainstormRequest = {
                 topic: question,
                 language: language,
-                model: LLM
+                model: LLM.model_name
             };
             const result = await brainstormApi(request);
             setAnswers([...answers, [question, result]]);
@@ -173,4 +173,4 @@ const Summarize = () => {
     );
 };
 
-export default Summarize;
+export default Brainstorm;
diff --git a/app/frontend/src/pages/summarize/Summarize.tsx b/app/frontend/src/pages/summarize/Summarize.tsx
index e585cb3b..b4827f8d 100644
--- a/app/frontend/src/pages/summarize/Summarize.tsx
+++ b/app/frontend/src/pages/summarize/Summarize.tsx
@@ -13,11 +13,13 @@ import { SumAnswer } from "../../components/SumAnswer";
 import { SumInput } from "../../components/SumInput";
 import { Field, Radio, RadioGroup, RadioGroupOnChangeData } from "@fluentui/react-components";
 import { checkStructurOfDB, deleteChatFromDB, getHighestKeyInDB, getStartDataFromDB, indexedDBStorage, saveToDB } from "../../service/storage";
+import { LLMContext } from "../../components/LLMSelector/LLMContextProvider";
 
 const STORAGE_KEY_LEVEL_OF_DETAIL = "SUM_LEVEL_OF_DETAIL"
 
 const Summarize = () => {
     const { language } = useContext(LanguageContext)
+    const { LLM } = useContext(LLMContext);
     const { t } = useTranslation();
 
     const lastQuestionRef = useRef<string>("");
@@ -69,7 +71,8 @@ const Summarize = () => {
             const request: SumRequest = {
                 text: questionText,
                 detaillevel: detaillevel,
-                language: language
+                language: language,
+                model: LLM.model_name
             };
             const result = await sumApi(request, file);
             setAnswers([...answers, [questionText, result]]);

From 70f1b748f5331b552f1d49bd7747e92057925751 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Fri, 23 Aug 2024 19:17:01 +0200
Subject: [PATCH 18/34] =?UTF-8?q?=E2=98=81=20Using=20json=20mode=20for=20o?=
 =?UTF-8?q?ur=20french=20friends?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/backend/summarize/summarize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py
index 8c78e56f..d054271a 100644
--- a/app/backend/summarize/summarize.py
+++ b/app/backend/summarize/summarize.py
@@ -102,7 +102,7 @@ def setup(self, model_name: str) -> SequentialChain:
         llm = self.llm.with_config(configurable=config)
 
         #extraction with structured output: https://python.langchain.com/v0.1/docs/use_cases/extraction/quickstart/
-        summarizationChain = self.getSummarizationPrompt() | llm.with_structured_output(schema=Summarys)
+        summarizationChain = self.getSummarizationPrompt() | llm.with_structured_output(schema=Summarys,  method="json_mode")
         translationChain = self.getTranslationCleanupPrompt() | llm
 
         return (summarizationChain, translationChain)

From aa8780f7767976bfe52591e2551d29164e2efa93 Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Tue, 27 Aug 2024 11:19:15 +0200
Subject: [PATCH 19/34] :books: added descriptions of LLMs

---
 app/backend/app.py                                         | 2 +-
 app/backend/core/types/Config.py                           | 1 +
 app/frontend/src/api/models.ts                             | 1 +
 .../src/components/LLMSelector/LLMContextProvider.tsx      | 4 ++--
 .../components/SettingsDrawer/SettingsDrawer.module.css    | 7 +++++++
 .../src/components/SettingsDrawer/SettingsDrawer.tsx       | 4 +++-
 app/frontend/src/pages/layout/Layout.tsx                   | 7 +++++--
 7 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/app/backend/app.py b/app/backend/app.py
index 85d6514b..06b3b595 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -146,7 +146,7 @@ async def getConfig():
     models= cast(List[ModelsConfig], cfg["configuration_features"]["backend"]["models"])
     models_dto_list = []
     for model in models:
-        dto = ModelsDTO(model_name=model["model_name"], max_tokens=model["max_tokens"])
+        dto = ModelsDTO(model_name=model["model_name"], max_tokens=model["max_tokens"], description=model["description"])
         models_dto_list.append(dto)
     return jsonify({
         "frontend": frontend_features,
diff --git a/app/backend/core/types/Config.py b/app/backend/core/types/Config.py
index 83aeddf0..552faba4 100644
--- a/app/backend/core/types/Config.py
+++ b/app/backend/core/types/Config.py
@@ -16,6 +16,7 @@ class ModelsConfig(TypedDict):
 class ModelsDTO(TypedDict):
     model_name: str
     max_tokens: int
+    description: str
 
 class SSOConfig(TypedDict):
     sso_issuer: str
diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts
index 0195de95..430ad10a 100644
--- a/app/frontend/src/api/models.ts
+++ b/app/frontend/src/api/models.ts
@@ -52,6 +52,7 @@ export interface Frontend {
 export interface Model {
     max_tokens: number;
     model_name: string;
+    description: string;
 }
 
 export interface Labels {
diff --git a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
index d9e3e60d..d291e69d 100644
--- a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
+++ b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx
@@ -8,10 +8,10 @@ interface ILLMProvider {
 }
 
 export const DEFAULTLLM = "gpt-4o-mini";
-export const LLMContext = React.createContext<ILLMProvider>({ LLM: { model_name: DEFAULTLLM, max_tokens: 0 }, setLLM: () => { } });
+export const LLMContext = React.createContext<ILLMProvider>({ LLM: { model_name: DEFAULTLLM, max_tokens: 0, description: "" }, setLLM: () => { } });
 
 export const LLMContextProvider = (props: React.PropsWithChildren<{}>) => {
-    const [LLM, setLLM] = useState<Model>({ model_name: DEFAULTLLM, max_tokens: 0 });
+    const [LLM, setLLM] = useState<Model>({ model_name: DEFAULTLLM, max_tokens: 0, description: "" });
 
     return (
         <LLMContext.Provider value={{ LLM, setLLM }}>
diff --git a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.module.css b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.module.css
index bc23d509..78b29b34 100644
--- a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.module.css
+++ b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.module.css
@@ -47,3 +47,10 @@
     align-items: center;
     justify-content: start;
 }
+
+.info {
+    margin-top: 8px;
+    border-style: solid;
+    border-color: black;
+    border-width: 1px;
+}
diff --git a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx
index 84f3c565..56d4a546 100644
--- a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx
+++ b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx
@@ -29,9 +29,10 @@ interface Props {
     onLLMSelectionChanged: (e: SelectionEvents, selection: OptionOnSelectData) => void;
     defaultLLM: string;
     llmOptions: Model[];
+    currentLLM: Model;
 }
 
-export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, version, fontscale, setFontscale, isLight, setTheme, onLLMSelectionChanged, defaultLLM, llmOptions }: Props) => {
+export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, version, fontscale, setFontscale, isLight, setTheme, onLLMSelectionChanged, defaultLLM, llmOptions, currentLLM }: Props) => {
     const [isOpen, setIsOpen] = useState<boolean>(false);
     const { t, i18n } = useTranslation();
 
@@ -79,6 +80,7 @@ export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, versio
                 </div>
                 <div className={styles.bodyContainer}>
                     <LLMSelector defaultLLM={defaultLLM} onSelectionChange={onLLMSelectionChanged} options={llmOptions}></LLMSelector>
+                    <div className={styles.info}>{currentLLM["description"]}</div>
                 </div>
                 <div className={styles.header} role="heading" aria-level={3}>
                     {t('components.settingsdrawer.fontsize')}
diff --git a/app/frontend/src/pages/layout/Layout.tsx b/app/frontend/src/pages/layout/Layout.tsx
index 3a947d42..22ba0b10 100644
--- a/app/frontend/src/pages/layout/Layout.tsx
+++ b/app/frontend/src/pages/layout/Layout.tsx
@@ -30,11 +30,13 @@ export const Layout = () => {
     const [config, setConfig] = useState<ApplicationConfig>({
         models: [{
             "model_name": "KICC GPT",
-            "max_tokens": 128000
+            "max_tokens": 128000,
+            "description": ""
         },
         {
             "model_name": "Unknown GPT",
-            "max_tokens": 100
+            "max_tokens": 100,
+            "description": ""
         }],
         frontend: {
             labels: {
@@ -151,6 +153,7 @@ export const Layout = () => {
                             defaultLLM={llm_pref}
                             onLLMSelectionChanged={onLLMSelectionChanged}
                             llmOptions={models}
+                            currentLLM={LLM}
                         ></SettingsDrawer>
                     </div>
                 </header>

From a07f47202f5652ba8ce76088bfc33acf61e7d2a6 Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Tue, 27 Aug 2024 13:53:34 +0200
Subject: [PATCH 20/34] :bookmark: Version 1.1.3

---
 README.md                                  |  2 +-
 app/frontend/package.json                  |  2 +-
 app/frontend/src/pages/version/Version.tsx | 34 +++++++++++++++++++++-
 config/base.json                           |  2 +-
 4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 0a9a91e5..428f4725 100644
--- a/README.md
+++ b/README.md
@@ -90,7 +90,7 @@ python -m  quart --app main:app run
   
 The frontend is based on a template from [Microsoft Azure](https://github.com/Azure-Samples/azure-search-openai-demo) and is implemented using React, Typescript and Javascript.
   
-The framework used to implement the backend of MUCGPT is called [Quart](https://pgjones.gitlab.io/quart/). It is a fast Python web microframework for building JSON APIs, rendering and serving HTML, serving web sockets and much more. The backend uses LangChain to connect to LLMs like Chat-GPT-3.5, which is currently in use.
+The framework used to implement the backend of MUCGPT is called [Quart](https://pgjones.gitlab.io/quart/). It is a fast Python web microframework for building JSON APIs, rendering and serving HTML, serving web sockets and much more. The backend uses LangChain to connect to LLMs. In the [config](config/default.json) file, you can provide the user with various LLM options to select from in the frontend.
 
   
 For more information about all the features of MUCGPT click [here](/docs/FEATURES.md).  
diff --git a/app/frontend/package.json b/app/frontend/package.json
index 0543b724..fb1e576a 100644
--- a/app/frontend/package.json
+++ b/app/frontend/package.json
@@ -1,7 +1,7 @@
 {
   "name": "mucgpt",
   "private": true,
-  "version": "1.1.2",
+  "version": "1.1.3",
   "type": "module",
   "engines": {
     "node": ">=16.0.0"
diff --git a/app/frontend/src/pages/version/Version.tsx b/app/frontend/src/pages/version/Version.tsx
index bd5d4aac..56b304cb 100644
--- a/app/frontend/src/pages/version/Version.tsx
+++ b/app/frontend/src/pages/version/Version.tsx
@@ -32,7 +32,39 @@ const Version = () => {
 
             <div className={styles.versionRoot}>
                 <h1 className={styles.header}>{t('version.header')}</h1>
-                <Accordion multiple collapsible navigation="linear" defaultOpenItems="6">
+                <Accordion multiple collapsible navigation="linear" defaultOpenItems="7">
+                    <AccordionItem value="7">
+                        <AccordionHeader>[1.1.3] 28.08.2024</AccordionHeader>
+                        <AccordionPanel >
+                            <div className={styles.panel}>
+                                <h3>{t('version.added')}</h3>
+                                <ul>
+                                    <li>
+                                        Benutzer haben nun die Möglichkeit zwischen 3 verschiedenen Sprachmodellen zu wählen, welches für ihren Anwendungsfall am besten passt.
+                                        <ul>
+                                            <li>GPT-4o-mini</li>
+                                            <li>GPT-4o</li>
+                                            <li>Mistral-Large-2407</li>
+                                        </ul>
+                                    </li>
+                                </ul>
+                                <h3>{t('version.fixed')}</h3>
+                                <h3>{t('version.changed')}</h3>
+                                <ul>
+                                    <li>
+                                        Das standardmäßig benutze Sprachmodell wurde von GPT-3.5 auf die neuere Version GPT-4o-mini geändert.
+                                    </li>
+                                    <li>
+                                        Verbesserung der "Zusammenfassen"-Funktion
+                                        <ul>
+                                            <li>weniger Fehler</li>
+                                            <li>zuverlässigere Zusammenfassungen in der gewünschten Struktur</li>
+                                        </ul>
+                                    </li>
+                                </ul>
+                            </div>
+                        </AccordionPanel>
+                    </AccordionItem >
                     <AccordionItem value="6">
                         <AccordionHeader>[1.1.2] 31.07.2024</AccordionHeader>
                         <AccordionPanel >
diff --git a/config/base.json b/config/base.json
index cb7ad94b..15b52047 100644
--- a/config/base.json
+++ b/config/base.json
@@ -1,3 +1,3 @@
 {
-    "version": "1.1.1"
+    "version": "1.1.3"
 }
\ No newline at end of file

From b7c3229ff0336b0413d9a26a9df8b26224f30060 Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Tue, 27 Aug 2024 14:38:25 +0200
Subject: [PATCH 21/34] :rotating_light: worked on tests

---
 tests/unit/test_llmhelper.py   | 49 ++++++++++++---------------
 tests/unit/test_modelhelper.py | 61 +++++++++++++++++++++++-----------
 2 files changed, 63 insertions(+), 47 deletions(-)

diff --git a/tests/unit/test_llmhelper.py b/tests/unit/test_llmhelper.py
index 937256af..af9ae6d8 100644
--- a/tests/unit/test_llmhelper.py
+++ b/tests/unit/test_llmhelper.py
@@ -9,21 +9,29 @@
 class Test_LLMhelper(unittest.TestCase):
 
     def setUp(self):
-        self.api_key = "test_api_key"
-        self.api_base = "test_api_base"
-        self.api_version = "test_api_version"
-        self.api_type = "test_api_type"
+        self.model1 = {
+                "type": "OPENAI",
+                "model_name": "model1",
+                "endpoint": "TODO",
+                "api_key": "TODO",
+                "max_tokens": 128000
+            }
+        self.model2 ={
+                "type": "OPENAI",
+                "model_name": "model2",
+                "endpoint": "TODO",
+                "api_key": "TODO",
+                "max_tokens": 128000
+            }
 
     @pytest.mark.asyncio    
     @pytest.mark.unit
     def test_getModel_returns_llm(self):
-        model = getModel(chatgpt_model="test_model",
+        
+    
+        model = getModel(models=[self.model1, self.model2],
                          max_tokens=10,
                          n=1,
-                         api_key=self.api_key,
-                         api_base=self.api_base,
-                         api_version=self.api_version,
-                         api_type=self.api_type,
                          temperature=0.5,
                          streaming=True)
         self.assertIsInstance(model, RunnableSerializable)
@@ -31,31 +39,21 @@ def test_getModel_returns_llm(self):
     @pytest.mark.asyncio    
     @pytest.mark.unit    
     def test_getModel_configurable_fields(self):
-        model = getModel(chatgpt_model="test_model",
+        model = getModel(models=[self.model1, self.model2],
                          max_tokens=10,
                          n=1,
-                         api_key=self.api_key,
-                         api_base=self.api_base,
-                         api_version=self.api_version,
-                         api_type=self.api_type,
-                         temperature=0.5,
+                        temperature=0.5,
                          streaming=True)
         self.assertIn("temperature", model.fields)
         self.assertIn("max_tokens", model.fields)
-        self.assertIn("openai_api_key", model.fields)
         self.assertIn("streaming", model.fields)
-        self.assertIn("callbacks", model.fields)
 
     @pytest.mark.asyncio    
     @pytest.mark.unit    
     def test_getModel_configurable_alternatives(self):
-        model = getModel(chatgpt_model="test_model",
+        model = getModel(models=[self.model1, self.model2],
                          max_tokens=10,
                          n=1,
-                         api_key=self.api_key,
-                         api_base=self.api_base,
-                         api_version=self.api_version,
-                         api_type=self.api_type,
                          temperature=0.5,
                          streaming=True)
         self.assertIn("fake", model.alternatives)
@@ -63,14 +61,9 @@ def test_getModel_configurable_alternatives(self):
     @pytest.mark.asyncio    
     @pytest.mark.unit    
     def test_getModel_fake_llm(self):
-        model = getModel(chatgpt_model="test_model",
+        model = getModel(models=[self.model1, self.model2],
                          max_tokens=10,
                          n=1,
-                         api_key=self.api_key,
-                         api_base=self.api_base,
-                         api_version=self.api_version,
-                         api_type=self.api_type,
                          temperature=0.5,
                          streaming=True)
-        print(model.alternatives["fake"])
         self.assertEqual(model.alternatives["fake"].responses, ["Hi diggi"])
\ No newline at end of file
diff --git a/tests/unit/test_modelhelper.py b/tests/unit/test_modelhelper.py
index 33ea16da..4bee4cb6 100644
--- a/tests/unit/test_modelhelper.py
+++ b/tests/unit/test_modelhelper.py
@@ -2,29 +2,52 @@
 
 import pytest
 
-from core.modelhelper import get_token_limit, num_tokens_from_messages
-
+from core.modelhelper import num_tokens_from_messages, num_tokens_from_openai_model, num_tokens_from_mistral_model
+from langchain_core.messages.base import BaseMessage
 
 class Test_Modelhelper(unittest.TestCase):
+
+    def setUp(self):
+        # Set up common test variables
+        self.messages = [
+            BaseMessage(type="system", content="System message."),
+            BaseMessage(type="ai", content="I am fine, thank you."),
+            BaseMessage(type="human", content="Hello, how are you?"),
+            
+           
+        ]
+        self.model_openai = "gpt-3.5-turbo-0613"
+        self.model_mistral = "mistral-large-2407"
+
+
+    @pytest.mark.asyncio    
+    @pytest.mark.unit  
+    def test_num_tokens_from_messages_openai(self):
+        assert num_tokens_from_messages(self.messages, self.model_openai) == 31
+    
+    @pytest.mark.asyncio    
+    @pytest.mark.unit  
+    def test_num_tokens_from_messages_mistral(self):
+        assert num_tokens_from_messages(self.messages, self.model_mistral) == 24
+    
+    @pytest.mark.asyncio    
+    @pytest.mark.unit  
+    def test_num_tokens_from_messages_invalid_model(self):
+        with self.assertRaises(NotImplementedError):
+            num_tokens_from_messages(self.messages, "invalid-model")
+
     @pytest.mark.asyncio    
-    @pytest.mark.unit
-    def test_get_token_limit(self):
-        self.assertEqual(get_token_limit("gpt-35-turbo"), 4000)
-        self.assertEqual(get_token_limit("gpt-3.5-turbo"), 4000)
-        self.assertEqual(get_token_limit("gpt-35-turbo-16k"), 16000)
-        self.assertEqual(get_token_limit("gpt-3.5-turbo-16k"), 16000)
-        self.assertEqual(get_token_limit("gpt-4"), 8100)
-        self.assertEqual(get_token_limit("gpt-4-32k"), 32000)
-        self.assertRaises(ValueError, get_token_limit, "gpt-2")
+    @pytest.mark.unit  
+    def test_num_tokens_from_mistral_model_invalid_message_type(self):
+        invalid_messages = [BaseMessage(type="unknown", content="Test")]
+        with self.assertRaises(NotImplementedError):
+            num_tokens_from_mistral_model(invalid_messages, self.model_mistral)
 
     @pytest.mark.asyncio    
-    @pytest.mark.unit
-    def test_num_tokens_from_messages(self):
-        messages = [
-            {"user": "Hello, I have a problem with my computer.", "bot": "Hi there! What seems to be the issue?"},
-            {"user": "My computer won't turn on.", "bot": "Okay, let's try a few troubleshooting steps. Have you checked to make sure it's plugged in and the power outlet?"}]
-        self.assertEqual(num_tokens_from_messages(messages,"gpt-35-turbo" ), 64)
-        self.assertRaises(ValueError,num_tokens_from_messages,messages,"" )
-        self.assertRaises(ValueError,num_tokens_from_messages,messages,"gpt-2" )
+    @pytest.mark.unit  
+    def test_num_tokens_from_openai_model_invalid_message_type(self):
+        invalid_messages = [BaseMessage(type="unknown", content="Test")]
+        with self.assertRaises(NotImplementedError):
+            num_tokens_from_openai_model(invalid_messages, self.model_openai)
 
     
\ No newline at end of file

From 6edc77ee964ea9d3e2a6990ce10b93ffe8e07bad Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 09:51:55 +0200
Subject: [PATCH 22/34] :white_check_mark: fixed tests and added new ones

---
 app/backend/core/modelhelper.py |  1 -
 tests/unit/test_confighelper.py | 51 ++++++---------------------------
 tests/unit/test_datahelper.py   | 14 +++++++--
 tests/unit/test_llmhelper.py    | 43 +++++++++++++++++++++++++--
 tests/unit/test_modelhelper.py  | 34 +++++++++++++++++++++-
 5 files changed, 94 insertions(+), 49 deletions(-)

diff --git a/app/backend/core/modelhelper.py b/app/backend/core/modelhelper.py
index 4331f3b5..728dbea3 100644
--- a/app/backend/core/modelhelper.py
+++ b/app/backend/core/modelhelper.py
@@ -60,7 +60,6 @@ def num_tokens_from_openai_model(messages: List[BaseMessage], model: str):
         "gpt-4-32k-0613",
         "gpt-4-turbo",
         "gpt-4-turbo-2024-04-09",
-        "gpt-4o",
         "gpt-4o-mini",
         "gpt-4o-2024-05-13",
         }:
diff --git a/tests/unit/test_confighelper.py b/tests/unit/test_confighelper.py
index 4283e3df..50a72bfc 100644
--- a/tests/unit/test_confighelper.py
+++ b/tests/unit/test_confighelper.py
@@ -7,52 +7,17 @@
 
 
 class Test_Confighelper(unittest.TestCase):
-    @pytest.mark.asyncio    
-    @pytest.mark.unit
-    def test_confighelper_create(self):
-        path = os.path.join('app', 'backend', 'ressources', '')
-        path = os.path.abspath(path)
-        assert os.path.exists(path), "File does not exist"
-        path = path + "/"
-        env="dev"
-        helper = ConfigHelper(path, env)
-        self.assertEqual(helper.base_config_name, "base")
-        self.assertEqual(helper.env, env)
-        self.assertEqual(helper.base_path, path)
-        helper = ConfigHelper(path, env, "basis")
-        self.assertEqual(helper.base_config_name, "basis")
-        self.assertEqual(helper.env, env)
-        self.assertEqual(helper.base_path, path)
-
-
     @pytest.mark.asyncio    
     @pytest.mark.unit
     def test_confighelper_loadData(self):
-        path = os.path.join('app', 'backend', 'ressources', '')
-        path = os.path.abspath(path)
-        assert os.path.exists(path), "File does not exist"
-        path = path + "/"
-        env="dev"
-        helper = ConfigHelper(path, env)
+        env_path = os.path.join('config', 'default.json')
+        env_path = os.path.abspath(env_path)
+        assert os.path.exists(env_path), "File does not exist"
+        base_path = os.path.join('config', 'base.json')
+        base_path = os.path.abspath(base_path)
+        assert os.path.exists(base_path), "File does not exist"
+        helper = ConfigHelper(env_config=env_path, base_config=base_path)
         data = helper.loadData()
-        self.assertIn("version", data)
         self.assertIn("frontend", data)
         self.assertIn("backend", data)
-    
-    @pytest.mark.asyncio    
-    @pytest.mark.unit
-    def test_confighelper_loadData_fail(self):
-        path = os.path.join('app', 'backend', 'ressources', '')
-        path = os.path.abspath(path)
-        assert os.path.exists(path), "File does not exist"
-        path = path + "/"
-        env="super"
-        filename = path + env + ".json"
-        with open(filename, "w") as file:
-            file.write('{"frontend": {"labels": {"env_name": "MUC tschibidi-C"},"alternative_logo": true}}')
-        helper = ConfigHelper(path, env)
-        self.assertEqual(helper.base_config_name, "base")
-        self.assertEqual(helper.env, env)
-        self.assertEqual(helper.base_path, path)
-        self.assertRaises(ValueError, helper.loadData)
-        os.remove(filename)
+        self.assertIn("version", data)
diff --git a/tests/unit/test_datahelper.py b/tests/unit/test_datahelper.py
index 243c3d8e..55cacf49 100644
--- a/tests/unit/test_datahelper.py
+++ b/tests/unit/test_datahelper.py
@@ -1,8 +1,9 @@
 import unittest
 
 import pytest
+from sqlalchemy import Engine
 
-from core.datahelper import Requestinfo
+from core.datahelper import Requestinfo, Repository
 
 
 class Test_Datahelper(unittest.TestCase):
@@ -15,4 +16,13 @@ def test_requestinfo_creation(self):
         self.assertEqual(request.department, 'IT')
         self.assertEqual(request.messagecount, 50)
         self.assertEqual(request.method, 'GET')
-        self.assertEqual(str(request), '<ID None, Department \'IT\', Tokencount 100, Method \'GET\', Messagecount 50>')
\ No newline at end of file
+        self.assertEqual(str(request), '<ID None, Department \'IT\', Tokencount 100, Method \'GET\', Messagecount 50>')
+    
+    @pytest.mark.asyncio    
+    @pytest.mark.unit
+    def test_repository_creation(self):
+        repo = Repository("user", "host", "database", "password")
+        self.assertIsInstance(repo, Repository)
+        self.assertIsInstance(repo.engine, Engine)
+    
+    
\ No newline at end of file
diff --git a/tests/unit/test_llmhelper.py b/tests/unit/test_llmhelper.py
index af9ae6d8..cd357413 100644
--- a/tests/unit/test_llmhelper.py
+++ b/tests/unit/test_llmhelper.py
@@ -3,7 +3,7 @@
 import pytest
 from langchain_core.runnables.base import RunnableSerializable
 
-from core.llmhelper import getModel
+from core.llmhelper import getModel, ModelsConfigurationException
 
 
 class Test_LLMhelper(unittest.TestCase):
@@ -17,7 +17,16 @@ def setUp(self):
                 "max_tokens": 128000
             }
         self.model2 ={
-                "type": "OPENAI",
+                "type": "AZURE",
+                "deployment": "model2",
+                "model_name": "model2",
+                "api_version": "preview",
+                "endpoint": "TODO",
+                "api_key": "TODO",
+                "max_tokens": 128000
+            }
+        self.model3 ={
+                "type": "TODO",
                 "model_name": "model2",
                 "endpoint": "TODO",
                 "api_key": "TODO",
@@ -35,6 +44,36 @@ def test_getModel_returns_llm(self):
                          temperature=0.5,
                          streaming=True)
         self.assertIsInstance(model, RunnableSerializable)
+    
+    @pytest.mark.asyncio    
+    @pytest.mark.unit
+    def test_getModel_wrong_type(self):
+        with self.assertRaises(ModelsConfigurationException):
+            getModel(models=[self.model3],
+                         max_tokens=10,
+                         n=1,
+                         temperature=0.5,
+                         streaming=True)
+    
+    @pytest.mark.asyncio    
+    @pytest.mark.unit
+    def test_getModel_azure_first(self):
+        model = getModel(models=[self.model2, self.model1],
+                         max_tokens=10,
+                         n=1,
+                         temperature=0.5,
+                         streaming=True)
+        self.assertIsInstance(model, RunnableSerializable)
+    
+    @pytest.mark.asyncio    
+    @pytest.mark.unit
+    def test_getModel_no_model(self):
+        with self.assertRaises(ModelsConfigurationException):
+            getModel(models=[],
+                            max_tokens=10,
+                            n=1,
+                            temperature=0.5,
+                            streaming=True)
 
     @pytest.mark.asyncio    
     @pytest.mark.unit    
diff --git a/tests/unit/test_modelhelper.py b/tests/unit/test_modelhelper.py
index 4bee4cb6..6444436b 100644
--- a/tests/unit/test_modelhelper.py
+++ b/tests/unit/test_modelhelper.py
@@ -17,7 +17,8 @@ def setUp(self):
            
         ]
         self.model_openai = "gpt-3.5-turbo-0613"
-        self.model_mistral = "mistral-large-2407"
+        self.model_mistral2407 = "mistral-large-2407"
+        self.model_mistral = "mistral-large"
 
 
     @pytest.mark.asyncio    
@@ -25,6 +26,11 @@ def setUp(self):
     def test_num_tokens_from_messages_openai(self):
         assert num_tokens_from_messages(self.messages, self.model_openai) == 31
     
+    @pytest.mark.asyncio    
+    @pytest.mark.unit  
+    def test_num_tokens_from_messages_mistral2407(self):
+        assert num_tokens_from_messages(self.messages, self.model_mistral2407) == 24
+
     @pytest.mark.asyncio    
     @pytest.mark.unit  
     def test_num_tokens_from_messages_mistral(self):
@@ -35,6 +41,32 @@ def test_num_tokens_from_messages_mistral(self):
     def test_num_tokens_from_messages_invalid_model(self):
         with self.assertRaises(NotImplementedError):
             num_tokens_from_messages(self.messages, "invalid-model")
+    
+    @pytest.mark.asyncio    
+    @pytest.mark.unit  
+    def test_num_tokens_from_messages_invalid_openai_model(self):
+        with self.assertRaises(NotImplementedError):
+            num_tokens_from_openai_model(self.messages, "")
+    
+    @pytest.mark.asyncio    
+    @pytest.mark.unit  
+    def test_num_tokens_from_messages_openai_gpt0301(self):
+        assert num_tokens_from_messages(self.messages, "gpt-3.5-turbo-0301") == 34
+
+    @pytest.mark.asyncio    
+    @pytest.mark.unit  
+    def test_num_tokens_from_messages_openai_gptturbo(self):
+        assert num_tokens_from_messages(self.messages, "gpt-3.5-turbo") == 31
+
+    @pytest.mark.asyncio    
+    @pytest.mark.unit  
+    def test_num_tokens_from_messages_openai_gpt4(self):
+        assert num_tokens_from_messages(self.messages, "gpt-4") == 31
+    
+    @pytest.mark.asyncio    
+    @pytest.mark.unit  
+    def test_num_tokens_from_messages_openai_gpt4o(self):
+        assert num_tokens_from_messages(self.messages, "gpt-4o") == 31
 
     @pytest.mark.asyncio    
     @pytest.mark.unit  

From 97a2b320f7116b48dc6bfe797e0974bc59fb2d2c Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 28 Aug 2024 11:27:57 +0200
Subject: [PATCH 23/34] =?UTF-8?q?=F0=9F=A9=BA=20fixed=20integration=20test?=
 =?UTF-8?q?s?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/base.json        |  3 ++
 tests/integration/conftest.py      | 35 +++++-------------------
 tests/integration/test_app.py      | 11 +++++---
 tests/integration/test_config.json | 44 ++++++++++++++++++++++++++++++
 4 files changed, 61 insertions(+), 32 deletions(-)
 create mode 100644 tests/integration/base.json
 create mode 100644 tests/integration/test_config.json

diff --git a/tests/integration/base.json b/tests/integration/base.json
new file mode 100644
index 00000000..15b52047
--- /dev/null
+++ b/tests/integration/base.json
@@ -0,0 +1,3 @@
+{
+    "version": "1.1.3"
+}
\ No newline at end of file
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 57e86228..cad4b403 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,19 +1,10 @@
-from collections import namedtuple
-from unittest import mock
-
+import os
 import openai
 import pytest
 import pytest_asyncio
 
 import app
 
-MockToken = namedtuple("MockToken", ["token", "expires_on"])
-
-
-class MockAzureCredential:
-    async def get_token(self, uri):
-        return MockToken("mock_token", 9999999999)
-
 
 
 @pytest.fixture
@@ -50,24 +41,12 @@ async def mock_acreate(*args, **kwargs):
 
 @pytest_asyncio.fixture
 async def client(monkeypatch, mock_openai_chatcompletion):
-    monkeypatch.setenv("AZURE_OPENAI_SERVICE", "test-openai-service")
-    monkeypatch.setenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT", "test-chatgpt")
-    monkeypatch.setenv("AZURE_OPENAI_CHATGPT_MODEL", "gpt-35-turbo")
-    monkeypatch.setenv("AZURE_OPENAI_EMB_DEPLOYMENT", "test-ada")
-    monkeypatch.setenv("SSO_ISSUER", "testissuer.de")
-    monkeypatch.setenv("CONFIG_NAME", "test")
-    monkeypatch.setenv("DB_HOST", "not used")
-    monkeypatch.setenv("DB_NAME", "not used")
-    monkeypatch.setenv("DB_PASSWORD", "not used")
-    monkeypatch.setenv("DB_USER", "not used")
-
-
-    with mock.patch("init_app.DefaultAzureCredential") as mock_default_azure_credential:
-        mock_default_azure_credential.return_value = MockAzureCredential()
-        quart_app = app.create_app()
+    monkeypatch.setenv("MUCGPT_CONFIG", os.path.dirname(os.path.realpath(__file__))+"/test_config.json")
+    monkeypatch.setenv("MUCGPT_BASE_CONFIG", os.path.dirname(os.path.realpath(__file__))+"/base.json")
 
-        async with quart_app.test_app() as test_app:
-            quart_app.config.update({"TESTING": True})
+    quart_app = app.create_app()
+    async with quart_app.test_app() as test_app:
+        quart_app.config.update({"TESTING": True})
 
-            yield test_app.test_client()
+        yield test_app.test_client()
 
diff --git a/tests/integration/test_app.py b/tests/integration/test_app.py
index e4098cf8..f921ff39 100644
--- a/tests/integration/test_app.py
+++ b/tests/integration/test_app.py
@@ -89,7 +89,7 @@ async def test_brainstorm_exception(client, monkeypatch,caplog):
     data = {
         "topic": "München",
         "language": "Deutsch",
-        
+        "model": "TEST_MODEL",
     }
     response = await client.post('/brainstorm', json=data)
     assert response.status_code == 500
@@ -112,7 +112,7 @@ async def test_brainstorm(client, mocker):
     data = {
         "topic": "München",
         "language": "Deutsch",
-        
+        "model": "TEST_MODEL",
     }
     response = await client.post('/brainstorm', json=data)
     assert response.status_code == 200
@@ -128,7 +128,8 @@ async def test_sum_text(client, mocker):
     data = {
         "detaillevel": "short",
         "text": "To be summarized",
-        "language": "Deutsch"
+        "language": "Deutsch",
+        "model": "TEST_MODEL",
     }
     response = await client.post('/sum',  form={"body": json.dumps(data)})
     assert response.status_code == 200
@@ -143,7 +144,8 @@ async def test_sum_pdf(client, mocker):
 
     data = {
         "detaillevel": "short",
-        "language": "Deutsch"
+        "language": "Deutsch",
+        "model": "TEST_MODEL"
     }
 
     tmp = BytesIO()
@@ -195,6 +197,7 @@ async def test_chatstream(client, mocker):
         "temperature": 0.1,
         "max_tokens": 2400,
         "system_message": "",
+        "model": "TEST_MODEL",
         "history": [{"user": "hi"}]
         
     }
diff --git a/tests/integration/test_config.json b/tests/integration/test_config.json
new file mode 100644
index 00000000..f33398e5
--- /dev/null
+++ b/tests/integration/test_config.json
@@ -0,0 +1,44 @@
+{
+    "frontend": {
+        "labels": {
+            "env_name": "MUC tschibidi-test"
+        },
+        "alternative_logo": true
+    },
+    "backend": {
+        "enable_auth": false,
+        "enable_database": false,
+        "sso_config": {
+            "sso_issuer": "str",
+            "role": "lhm-ab-mucgpt-user"
+        },
+        "db_config": {
+            "db_host": "NOT USED",
+            "db_name": "NOT USED",
+            "db_user": "NOT USED",
+            "db_passwort": "NOT USED"
+        },
+        "chat": {
+            "log_tokens": false
+        },
+        "brainstorm": {
+            "log_tokens": false
+        },
+        "sum": {
+            "log_tokens": false
+        },
+
+        "models": [
+            {
+                "type": "AZURE",
+                "model_name": "TEST_MODEL",
+                "deployment": "NOT USED",
+                "endpoint": "NOT USED",
+                "api_key":"NOT USED",
+                "max_tokens": 128000,
+                "api_version": "NOT USED",
+                "description": "NOT USED"
+            }
+        ] 
+    }
+}
\ No newline at end of file

From cd4485db0c61bfb3f9ba94b6fa9a32e6da2c1a9a Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 28 Aug 2024 11:28:41 +0200
Subject: [PATCH 24/34] =?UTF-8?q?=F0=9F=92=84=20change=20ariell=20prompt?=
 =?UTF-8?q?=20to=20work=20with=20mistral?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/components/Example/ExampleList.tsx    | 152 +++++++++---------
 1 file changed, 74 insertions(+), 78 deletions(-)

diff --git a/app/frontend/src/components/Example/ExampleList.tsx b/app/frontend/src/components/Example/ExampleList.tsx
index 2b0e5795..747ab39c 100644
--- a/app/frontend/src/components/Example/ExampleList.tsx
+++ b/app/frontend/src/components/Example/ExampleList.tsx
@@ -25,86 +25,82 @@ const EXAMPLES: ExampleModel[] = [
     },
     {
         text: "🧜‍♀️ Arielle, die Diagramm-Assistentin [Setzt den System-Prompt innerhalb der Chateinstellungen. In diesem ist das Verhalten von Arielle definiert. Für andere Aufgaben abseits der Diagrammerstellung muss der Systemprompt gelöscht werden.]",
-        value: "Hallo",
+        value: "Hallo, wie kannst du mir helfen?",
         system: `Du bist Arielle🧜‍♀️, ein Assistent für das Erstellen von Mermaid Diagrammen. Du hilfst dem Nutzer dabei syntaktisch korrekte Mermaid Diagramme zu erstellen.
-        Du unterstützt Flussdiagramme, Sequenzdiagramme, Klassendiagramme, User Journeys, Kuchendiagramme, Mindmaps und Gantt-Diagramme. Lehne andere Diagrammtypen ab.
-
-        Gehe in folgenden Schritten vor, jeder Schritt ist eine eigene Nachricht.
-        1. Stelle dich kurz freundlich vor und frag den Nutzer nach dem Thema des Diagramms und der Art des Diagramms?
-        2. Frage den Nutzer nach den Daten, die dargestellt werden sollen?
-        3. Gib den Mermaid-Code für das entsprechende Mermaid Diagramm zurück:
-        
-        Halte unbedingt folgende Regeln bei Schritt 3 ein:
-        - Antworte dabei ausschließlich in Markdown-Codeblöcken in der Programmiersprache mermaid
-        - Beschrifte die Knoten der Diagramme passend
-        - Verwende ausschließlich die Daten aus Schritt 1 und 2
-
-        Eine Beispielausgabe aus Schritt 3 für ein Kuchendiagramm sieht so aus :
-        \`\`\`mermaid 
-        pie title Pets adopted by volunteers
-            "Dogs" : 386
-            "Cats" : 85
-            "Rats" : 15
-        \`\`\`
-
-        Eine Beispielausgabe aus Schritt 3 für eine Mindmap sieht so aus:
-        \`\`\`mermaid 
-        mindmap
-            root((mindmap))
-                Origins
-                    Long history
-                    ::icon(fa fa-book)
-                    Popularisation
-                    British popular psychology author Tony Buzan
-                Research
-                    On effectivness<br/>and features
-                    On Automatic creation
-                    Uses
-                        Creative techniques
-                        Strategic planning
-                        Argument mapping
-                Tools
-                    Pen and paper
-                    Mermaid
-        \`\`\`
 
-        Eine Beispielausgabe aus Schritt 3 für ein Sequenzdiagramm sieht so aus:
-        \`\`\`mermaid 
-        sequenceDiagram
-            Alice->>+John: Hello John, how are you?
-            Alice->>+John: John, can you hear me?
-            John-->>-Alice: Hi Alice, I can hear you!
-            John-->>-Alice: I feel great!
-         \`\`\`
-
-        Eine Beispielausgabe aus Schritt 3 für eine Userjourney sieht so aus:
-        \`\`\`mermaid 
-        journey
-            title My working day
-                section Go to work
-                    Make tea: 5: Me
-                    Go upstairs: 3: Me
-                    Do work: 1: Me, Cat
-            section Go home
-                Go downstairs: 5: Me
-                Sit down: 3: Me
-        \`\`\`   
-
-        Eine Beispielausgabe aus Schritt 3 für ein Gantt-diagramm sieht so aus:
-
-        \`\`\`mermaid 
-        gantt
-            title A Gantt Diagram
-            dateFormat YYYY-MM-DD
-            section Section
-                A task              :a1, 2014-01-01, 30d
-                Another task    :after a1, 20d
-            section Another
-                Task in Another :2014-01-12, 12d
-                another task    :24d
-        \`\`\`  
-        
-        Starte mit Schritt 1.
+        Du unterstützt Flussdiagramme, Sequenzdiagramme, Klassendiagramme, User Journeys, Kuchendiagramme, Mindmaps und Gantt-Diagramme. Lehne andere Diagrammtypen ab.
+       
+       Halte dich an folgende Regeln:
+       -  Bringe die Daten und den Diagrammtyp in Erfahrung
+       - Gib den Mermaid-Code für das entsprechende Mermaid Diagramm zurück:
+               - Antworte dabei ausschließlich in Markdown-Codeblöcken in der Programmiersprache mermaid
+               - Beschrifte die Knoten der Diagramme passend
+               - Verwende ausschließlich die Daten aus Schritt 1 und 2
+       
+               Eine Beispielausgabe aus Schritt 3 für ein Kuchendiagramm sieht so aus :
+               \`\`\`mermaid 
+               pie title Pets adopted by volunteers
+                   "Dogs" : 386
+                   "Cats" : 85
+                   "Rats" : 15
+               \`\`\`
+       
+               Eine Beispielausgabe aus Schritt 3 für eine Mindmap sieht so aus:
+               \`\`\`mermaid 
+               mindmap
+                   root((mindmap))
+                       Origins
+                           Long history
+                           ::icon(fa fa-book)
+                           Popularisation
+                           British popular psychology author Tony Buzan
+                       Research
+                           On effectivness<br/>and features
+                           On Automatic creation
+                           Uses
+                               Creative techniques
+                               Strategic planning
+                               Argument mapping
+                       Tools
+                           Pen and paper
+                           Mermaid
+               \`\`\`
+       
+               Eine Beispielausgabe aus Schritt 3 für ein Sequenzdiagramm sieht so aus:
+               \`\`\`mermaid 
+               sequenceDiagram
+                   Alice->>+John: Hello John, how are you?
+                   Alice->>+John: John, can you hear me?
+                   John-->>-Alice: Hi Alice, I can hear you!
+                   John-->>-Alice: I feel great!
+                \`\`\`
+       
+               Eine Beispielausgabe aus Schritt 3 für eine Userjourney sieht so aus:
+               \`\`\`mermaid 
+               journey
+                   title My working day
+                       section Go to work
+                           Make tea: 5: Me
+                           Go upstairs: 3: Me
+                           Do work: 1: Me, Cat
+                   section Go home
+                       Go downstairs: 5: Me
+                       Sit down: 3: Me
+               \`\`\`   
+       
+               Eine Beispielausgabe aus Schritt 3 für ein Gantt-diagramm sieht so aus:
+       
+               \`\`\`mermaid 
+               gantt
+                   title A Gantt Diagram
+                   dateFormat YYYY-MM-DD
+                   section Section
+                       A task              :a1, 2014-01-01, 30d
+                       Another task    :after a1, 20d
+                   section Another
+                       Task in Another :2014-01-12, 12d
+                       another task    :24d
+               \`\`\`        
         `
     }
 ];

From dc5bab80deddd85d3a0016f9b7a9bc259c44d6e6 Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 12:02:22 +0200
Subject: [PATCH 25/34] :hammer: added used model to db for statistics

---
 README.md                            |  2 +-
 app/backend/app.py                   | 12 +++++++-----
 app/backend/brainstorm/brainstorm.py |  3 ++-
 app/backend/chat/chat.py             |  8 +++++---
 app/backend/core/datahelper.py       | 16 ++++++++++++++--
 app/backend/core/modelhelper.py      | 13 +++----------
 app/backend/summarize/summarize.py   |  3 ++-
 tests/unit/test_modelhelper.py       | 10 ----------
 8 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/README.md b/README.md
index 428f4725..d5ea14d4 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ The documentation project is built with technologies we use in our projects (see
 ## Table of contents
 * [Built With](#built-with)
 * [Roadmap](#roadmap)
-* [Set up](#set-up-on-azure)
+* [Run](#Run)
 * [Documentation](#documentation)
 * [Contributing](#contributing)
 * [License](#license)
diff --git a/app/backend/app.py b/app/backend/app.py
index 06b3b595..33351d0c 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -64,7 +64,7 @@ async def sum():
         text = request_json["text"] if file is None else None
         splits = impl.split(detaillevel=detaillevel, file=file, text=text)
 
-        r = await impl.summarize(splits = splits, department=department, language=request_json["language"] or "Deutsch", model_name=request_json["model"])
+        r = await impl.summarize(splits = splits, department=department, language=request_json["language"] or "Deutsch", model_name=request_json["model"] or "gpt-4o-mini")
         return jsonify(r)
     except Exception as e:
         logging.exception("Exception in /sum")
@@ -81,7 +81,7 @@ async def brainstorm():
 
     try:
         impl = cfg["brainstorm_approaches"]
-        r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department, model_name=request_json["model"])
+        r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department, model_name=request_json["model"] or "gpt-4o-mini")
         return jsonify(r)
     except Exception as e:
         logging.exception("Exception in /brainstorm")
@@ -101,7 +101,7 @@ async def chat_stream():
         temperature=request_json['temperature'] or 0.7
         max_tokens=request_json['max_tokens'] or 4096
         system_message = request_json['system_message'] or None
-        model = request_json['model']
+        model = request_json['model'] or "gpt-4o-mini"
         response_generator = impl.run_with_streaming(history= request_json["history"],
                                                     temperature=temperature,
                                                     max_tokens=max_tokens,
@@ -127,13 +127,15 @@ async def chat():
         impl = cfg["chat_approaches"]
         temperature=request_json['temperature'] or 0.7
         max_tokens=request_json['max_tokens'] or 4096
+        model_name=request_json['model'] or "gpt-4o-mini"
         system_message = request_json['system_message'] or None
         history =  request_json["history"]
         chatResult = impl.run_without_streaming(history= history,
                                                     temperature=temperature,
                                                     max_tokens=max_tokens,
                                                     system_message=system_message,
-                                                    department= department)
+                                                    department= department,
+                                                    model_name= model_name)
         return jsonify(chatResult)
     except Exception as e:
         logging.exception("Exception in /chat")
@@ -172,7 +174,7 @@ async def counttokens():
     
     request_json = await request.get_json()
     message=request_json['text'] or ""
-    model = request_json['model']['model_name'] or "gpt-35-turbo"
+    model = request_json['model']['model_name'] or "gpt-4o-mini"
     counted_tokens = num_tokens_from_messages([HumanMessage(message)], model)
     return jsonify(CountResult(count=counted_tokens))
 
diff --git a/app/backend/brainstorm/brainstorm.py b/app/backend/brainstorm/brainstorm.py
index a71879ab..9f65d6b0 100644
--- a/app/backend/brainstorm/brainstorm.py
+++ b/app/backend/brainstorm/brainstorm.py
@@ -114,7 +114,8 @@ async def brainstorm(self, topic: str, language: str, department: Optional[str],
                 tokencount = total_tokens,
                 department = department,
                 messagecount=  1,
-                method = "Brainstorm"))
+                method = "Brainstorm"),
+                model = model_name)
         return BrainstormResult(answer=translation)
 
     def cleanup(self, chat_translate_result: str) -> str:
diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py
index a2feba65..140595a3 100644
--- a/app/backend/chat/chat.py
+++ b/app/backend/chat/chat.py
@@ -69,12 +69,13 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i
                     tokencount = num_tokens_from_messages(messages=msgs,model=model), #TODO richtiges Modell und tokenizer auswählen
                     department = department,
                     messagecount=  len(history),
-                    method = "Chat"))
+                    method = "Chat"),
+                    model = model)
             
             info = ChunkInfo(requesttokens=num_tokens_from_messages([msgs[-1]],model), streamedtokens=num_tokens_from_messages([HumanMessage(result)], model)) 
             yield Chunk(type="I", message=info, order=position)
     
-    def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str]) -> ChatResult:
+    def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str], model_name:str) -> ChatResult:
         """calls the llm in blocking mode, returns the full result
 
         Args:
@@ -104,7 +105,8 @@ def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens:
                 tokencount = total_tokens,
                 department = department,
                 messagecount=  1,
-                method = "Brainstorm"))
+                method = "Brainstorm"),
+                model = model_name)
         return ChatResult(content=ai_message.content)
 
 
diff --git a/app/backend/core/datahelper.py b/app/backend/core/datahelper.py
index b5538b46..4f456de9 100644
--- a/app/backend/core/datahelper.py
+++ b/app/backend/core/datahelper.py
@@ -8,6 +8,9 @@
 from sqlalchemy.orm import Session, declarative_base
 
 Base = declarative_base()
+DEPARTMENT_STRING_LENGTH=30
+MODEL_STRING_LENGTH=20
+METHOD_STRING_LENGTH=10
 
 class Requestinfo(Base):
     """Information about an Request to MUCGPT that is stored in the database.
@@ -16,9 +19,10 @@ class Requestinfo(Base):
 
     id = Column(Integer(), primary_key=True)
     tokencount = Column(Integer())
-    department = Column(String(20), nullable=False)
+    department = Column(String(DEPARTMENT_STRING_LENGTH), nullable=False)
+    model = Column(String(MODEL_STRING_LENGTH), nullable=False)
     messagecount = Column(Integer())
-    method = Column(String(10))
+    method = Column(String(METHOD_STRING_LENGTH))
     created_on = Column(DateTime(), default=datetime.now)
     updated_on = Column(DateTime(), default=datetime.now, onupdate=datetime.now)
 
@@ -47,6 +51,9 @@ def setup_schema(self, base):
         base.metadata.create_all(self.engine)
 
     def addInfo(self, info: Requestinfo):
+        info.department = self.truncate_string(info.department, DEPARTMENT_STRING_LENGTH)
+        info.model = self.truncate_string(info.model, MODEL_STRING_LENGTH)
+        info.method = self.truncate_string(info.method, METHOD_STRING_LENGTH)
         with Session(self.engine) as session:
             session.add(info)
             session.commit()
@@ -95,5 +102,10 @@ def export(self):
         # Das StringIO-Objekt in ein BytesIO-Objekt umwandeln
         memfile_bytesio = io.BytesIO(memfile.getvalue().encode())
         return memfile_bytesio
+    
+    def truncate_string(self, s, length):
+        if len(s) > length:
+            return s[:length]
+        return s
             
     
diff --git a/app/backend/core/modelhelper.py b/app/backend/core/modelhelper.py
index 728dbea3..ef608d5c 100644
--- a/app/backend/core/modelhelper.py
+++ b/app/backend/core/modelhelper.py
@@ -52,14 +52,17 @@ def num_tokens_from_openai_model(messages: List[BaseMessage], model: str):
         print("Warning: model not found. Using cl100k_base encoding.")
         encoding = tiktoken.get_encoding("cl100k_base")
     if model in {
+        "gpt-3.5-turbo",
         "gpt-3.5-turbo-0613",
         "gpt-3.5-turbo-16k-0613",
+        "gpt-4",
         "gpt-4-0314",
         "gpt-4-32k-0314",
         "gpt-4-0613",
         "gpt-4-32k-0613",
         "gpt-4-turbo",
         "gpt-4-turbo-2024-04-09",
+        "gpt-4o",
         "gpt-4o-mini",
         "gpt-4o-2024-05-13",
         }:
@@ -68,16 +71,6 @@ def num_tokens_from_openai_model(messages: List[BaseMessage], model: str):
     elif model == "gpt-3.5-turbo-0301":
         tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
         tokens_per_name = -1  # if there's a name, the role is omitted
-    elif "gpt-3.5-turbo" in model:
-        print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
-        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
-    elif "gpt-4o" in model:
-        print(
-            "Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13.")
-        return num_tokens_from_messages(messages, model="gpt-4o-2024-05-13")
-    elif "gpt-4" in model:
-        print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
-        return num_tokens_from_messages(messages, model="gpt-4-0613")
     else:
         raise NotImplementedError(
             f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py
index d054271a..8beb88b4 100644
--- a/app/backend/summarize/summarize.py
+++ b/app/backend/summarize/summarize.py
@@ -201,7 +201,8 @@ async def summarize(self, splits: List[str],  language: str, department: Optiona
                 tokencount = total_tokens,
                 department = department,
                 messagecount=  1,
-                method = "Sum"))
+                method = "Sum"),
+                model = model_name)
 
         return SummarizeResult(answer= final_summarys)
     
diff --git a/tests/unit/test_modelhelper.py b/tests/unit/test_modelhelper.py
index 6444436b..fa3898d6 100644
--- a/tests/unit/test_modelhelper.py
+++ b/tests/unit/test_modelhelper.py
@@ -52,16 +52,6 @@ def test_num_tokens_from_messages_invalid_openai_model(self):
     @pytest.mark.unit  
     def test_num_tokens_from_messages_openai_gpt0301(self):
         assert num_tokens_from_messages(self.messages, "gpt-3.5-turbo-0301") == 34
-
-    @pytest.mark.asyncio    
-    @pytest.mark.unit  
-    def test_num_tokens_from_messages_openai_gptturbo(self):
-        assert num_tokens_from_messages(self.messages, "gpt-3.5-turbo") == 31
-
-    @pytest.mark.asyncio    
-    @pytest.mark.unit  
-    def test_num_tokens_from_messages_openai_gpt4(self):
-        assert num_tokens_from_messages(self.messages, "gpt-4") == 31
     
     @pytest.mark.asyncio    
     @pytest.mark.unit  

From 7c12aa1a4343552b65b9cd110bbe9f0d83cb0daf Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:49:37 +0200
Subject: [PATCH 26/34] :bug: fixed bug save to db

---
 app/backend/brainstorm/brainstorm.py | 4 ++--
 app/backend/chat/chat.py             | 8 ++++----
 app/backend/summarize/summarize.py   | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/app/backend/brainstorm/brainstorm.py b/app/backend/brainstorm/brainstorm.py
index 9f65d6b0..2596e3a0 100644
--- a/app/backend/brainstorm/brainstorm.py
+++ b/app/backend/brainstorm/brainstorm.py
@@ -114,8 +114,8 @@ async def brainstorm(self, topic: str, language: str, department: Optional[str],
                 tokencount = total_tokens,
                 department = department,
                 messagecount=  1,
-                method = "Brainstorm"),
-                model = model_name)
+                method = "Brainstorm",
+                model = model_name))
         return BrainstormResult(answer=translation)
 
     def cleanup(self, chat_translate_result: str) -> str:
diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py
index 140595a3..ffff32a0 100644
--- a/app/backend/chat/chat.py
+++ b/app/backend/chat/chat.py
@@ -69,8 +69,8 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i
                     tokencount = num_tokens_from_messages(messages=msgs,model=model), #TODO richtiges Modell und tokenizer auswählen
                     department = department,
                     messagecount=  len(history),
-                    method = "Chat"),
-                    model = model)
+                    method = "Chat",
+                    model = model))
             
             info = ChunkInfo(requesttokens=num_tokens_from_messages([msgs[-1]],model), streamedtokens=num_tokens_from_messages([HumanMessage(result)], model)) 
             yield Chunk(type="I", message=info, order=position)
@@ -105,8 +105,8 @@ def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens:
                 tokencount = total_tokens,
                 department = department,
                 messagecount=  1,
-                method = "Brainstorm"),
-                model = model_name)
+                method = "Brainstorm",
+                model = model_name))
         return ChatResult(content=ai_message.content)
 
 
diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py
index 8beb88b4..e397335c 100644
--- a/app/backend/summarize/summarize.py
+++ b/app/backend/summarize/summarize.py
@@ -201,8 +201,8 @@ async def summarize(self, splits: List[str],  language: str, department: Optiona
                 tokencount = total_tokens,
                 department = department,
                 messagecount=  1,
-                method = "Sum"),
-                model = model_name)
+                method = "Sum",
+                model = model_name))
 
         return SummarizeResult(answer= final_summarys)
     

From 78e1f77144d1109a9b6cd790b71f78c003336e79 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 28 Aug 2024 13:51:49 +0200
Subject: [PATCH 27/34] No LLMChain anymore

---
 app/backend/summarize/summarize.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py
index 8beb88b4..3bbaa7d0 100644
--- a/app/backend/summarize/summarize.py
+++ b/app/backend/summarize/summarize.py
@@ -3,7 +3,7 @@
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, List, Optional, Tuple
 
-from langchain.chains import LLMChain, SequentialChain
+from langchain.chains import SequentialChain
 from langchain.prompts import PromptTemplate
 from langchain_community.callbacks import get_openai_callback
 from langchain_core.runnables.base import RunnableSerializable
@@ -126,12 +126,12 @@ def run_io_tasks_in_parallel(self, tasks) -> List[Tuple[Summarys, int]]:
         return results
 
     
-    def call_and_cleanup(self, text: str, summarizeChain: LLMChain) -> Tuple[Summarys, int]:
+    def call_and_cleanup(self, text: str, summarizeChain: RunnableSerializable) -> Tuple[Summarys, int]:
         """calls summarization chain and cleans the data
 
         Args:
             text (str): text, to be summarized
-            summarizeChain (LLMChain): the chain, that summarizes and cleans the data
+            summarizeChain (RunnableSerializable): the chain, that summarizes and cleans the data
 
         Returns:
             Tuple[List[str], int]: the last n summaries, the number of consumed tokens

From 2527c10485421c96c4b8639b448f75a3848be346 Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 14:26:37 +0200
Subject: [PATCH 28/34] :bug: fixed model for /chat

---
 app/frontend/src/pages/chat/Chat.tsx |  6 +++---
 app/frontend/src/service/storage.ts  | 12 +++++++-----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx
index 52818f91..7b913a71 100644
--- a/app/frontend/src/pages/chat/Chat.tsx
+++ b/app/frontend/src/pages/chat/Chat.tsx
@@ -119,7 +119,7 @@ const Chat = () => {
         error && setError(undefined);
         setIsLoading(true);
         let askResponse: AskResponse = {} as AskResponse;
-        saveToDB([question, { ...askResponse, answer: "", tokens: 0 }, 0], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens)
+        saveToDB([question, { ...askResponse, answer: "", tokens: 0 }, 0], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens, LLM.model_name)
         try {
             const history: ChatTurn[] = answers.map(a => ({ user: a[0], bot: a[1].answer }));
             const request: ChatRequest = {
@@ -166,7 +166,7 @@ const Chat = () => {
                     }
                 }
                 if (startId == currentId) {
-                    saveToDB([question, latestResponse, user_tokens], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens)
+                    saveToDB([question, latestResponse, user_tokens], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens, LLM.model_name)
                 }
             } else {
                 const parsedResponse: AskResponse = await response.json();
@@ -175,7 +175,7 @@ const Chat = () => {
                 }
                 setAnswers([...answers, [question, parsedResponse, 0]]);
                 if (startId == currentId) {
-                    saveToDB([question, parsedResponse, 0], storage, currentId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens)
+                    saveToDB([question, parsedResponse, 0], storage, currentId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens, LLM.model_name)
                 }
             }
         } catch (e) {
diff --git a/app/frontend/src/service/storage.ts b/app/frontend/src/service/storage.ts
index e494316a..1a87518d 100644
--- a/app/frontend/src/service/storage.ts
+++ b/app/frontend/src/service/storage.ts
@@ -34,7 +34,8 @@ export async function saveToDB(
     language?: string,
     temperature?: number,
     system_message?: string,
-    max_tokens?: number
+    max_tokens?: number,
+    model?: string
 ) {
     let openRequest = indexedDB.open(storage.db_name, storage.db_version);
     openRequest.onupgradeneeded = () => onUpgrade(openRequest, storage);
@@ -66,8 +67,8 @@ export async function saveToDB(
                 // if the chat does not exist in the DB
                 let name: string = "";
                 let new_idcounter = id_counter;
-                if (language != undefined && temperature != undefined && system_message != undefined && max_tokens != undefined) {
-                    name = await (await getChatName(a, language, temperature, system_message, max_tokens)).content;
+                if (language != undefined && temperature != undefined && system_message != undefined && max_tokens != undefined && model != undefined) {
+                    name = await (await getChatName(a, language, temperature, system_message, max_tokens, model)).content;
                     name = name.replaceAll('"', "").replaceAll(".", "");
                 }
                 if (storage.objectStore_name === "chat") {
@@ -99,7 +100,7 @@ export async function saveToDB(
     };
 }
 
-export async function getChatName(answers: any, language: string, temperature: number, system_message: string, max_tokens: number) {
+export async function getChatName(answers: any, language: string, temperature: number, system_message: string, max_tokens: number, model: string) {
     const history: ChatTurn[] = [{ user: answers[0], bot: answers[1].answer }];
     const request: ChatRequest = {
         history: [
@@ -113,7 +114,8 @@ export async function getChatName(answers: any, language: string, temperature: n
         language: language,
         temperature: temperature,
         system_message: system_message,
-        max_tokens: max_tokens
+        max_tokens: max_tokens,
+        model: model
     };
     const response = await chatApi(request);
     handleRedirect(response);

From db0eba9bcb5fdf0451e1b02c667d66bd0ff17207 Mon Sep 17 00:00:00 2001
From: pilitz <102222789+pilitz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 15:04:35 +0200
Subject: [PATCH 29/34] :hammer: refactored the deprecated LLMChain

---
 app/backend/brainstorm/brainstorm.py | 25 ++++++++++++-------------
 app/backend/summarize/summarize.py   | 11 ++++++-----
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/app/backend/brainstorm/brainstorm.py b/app/backend/brainstorm/brainstorm.py
index 2596e3a0..8588ca25 100644
--- a/app/backend/brainstorm/brainstorm.py
+++ b/app/backend/brainstorm/brainstorm.py
@@ -1,7 +1,8 @@
+from operator import itemgetter
 from typing import Optional
 
-from langchain.chains import LLMChain, SequentialChain
 from langchain.prompts import PromptTemplate
+from langchain.schema.output_parser import StrOutputParser
 from langchain_community.callbacks import get_openai_callback
 from langchain_core.runnables.base import RunnableSerializable
 
@@ -93,21 +94,19 @@ async def brainstorm(self, topic: str, language: str, department: Optional[str],
             "llm": model_name
         }
         llm = self.llm.with_config(configurable=config)
+        # get prompts
+        brainstorm_prompt = self.getBrainstormPrompt()
+        translation_prompt = self.getTranslationPrompt()
         # construct chains
-        brainstormChain = LLMChain(llm=llm, prompt=self.getBrainstormPrompt(), output_key="brainstorm")
-        translationChain = LLMChain(llm=llm, prompt=self.getTranslationPrompt(), output_key="translation")
-        overall_chain = SequentialChain(
-            chains=[brainstormChain, translationChain], 
-            input_variables=["language", "topic"],
-            output_variables=["brainstorm","translation"])
-            
-
+        brainstormChain =  brainstorm_prompt |llm | StrOutputParser()
+        translationChain = translation_prompt |llm | StrOutputParser()
+        # build complete chain
+        overall_chain = ({"brainstorm": brainstormChain,"language": itemgetter("language") }| translationChain )
+        
         with get_openai_callback() as cb:
-            result = await overall_chain.acall({"topic": topic, "language": language})
+            result = await overall_chain.ainvoke({"topic": topic, "language": language})
         total_tokens = cb.total_tokens
-
-        translation = result['translation']     
-        translation = self.cleanup(str(translation))
+        translation = self.cleanup(str(result))
 
         if self.config["log_tokens"]:
             self.repo.addInfo(Requestinfo( 
diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py
index e397335c..1b95f7e5 100644
--- a/app/backend/summarize/summarize.py
+++ b/app/backend/summarize/summarize.py
@@ -3,12 +3,12 @@
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, List, Optional, Tuple
 
-from langchain.chains import LLMChain, SequentialChain
+from langchain.chains import SequentialChain
 from langchain.prompts import PromptTemplate
 from langchain_community.callbacks import get_openai_callback
-from langchain_core.runnables.base import RunnableSerializable
-from langchain_core.pydantic_v1 import BaseModel, Field
 from langchain_core.prompts import PromptTemplate
+from langchain_core.pydantic_v1 import BaseModel, Field
+from langchain_core.runnables.base import RunnableSequence, RunnableSerializable
 
 from core.datahelper import Repository, Requestinfo
 from core.textsplit import splitPDF, splitText
@@ -16,6 +16,7 @@
 from core.types.LlmConfigs import LlmConfigs
 from summarize.summarizeresult import SummarizeResult
 
+
 class DenserSummary(BaseModel):
     missing_entities: List[str] = Field(description="An list of missing entitys")
     denser_summary: str = Field(description="denser summary, covers every entity in detail")
@@ -126,12 +127,12 @@ def run_io_tasks_in_parallel(self, tasks) -> List[Tuple[Summarys, int]]:
         return results
 
     
-    def call_and_cleanup(self, text: str, summarizeChain: LLMChain) -> Tuple[Summarys, int]:
+    def call_and_cleanup(self, text: str, summarizeChain: RunnableSequence) -> Tuple[Summarys, int]:
         """calls summarization chain and cleans the data
 
         Args:
             text (str): text, to be summarized
-            summarizeChain (LLMChain): the chain, that summarizes and cleans the data
+            summarizeChain (RunnableSequence): the chain, that summarizes and cleans the data
 
         Returns:
             Tuple[List[str], int]: the last n summaries, the number of consumed tokens

From a9a64866b0690a160bc2248c56f01b8aa00c4000 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 28 Aug 2024 16:22:30 +0200
Subject: [PATCH 30/34] =?UTF-8?q?=E2=9C=92=20describe=20config?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                        |   2 +-
 config/mucgpt_config.schema.json | 430 +++++++++++++++++++++++++++++++
 2 files changed, 431 insertions(+), 1 deletion(-)
 create mode 100644 config/mucgpt_config.schema.json

diff --git a/README.md b/README.md
index d5ea14d4..d13f7f00 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@ The documentation project is built with technologies we use in our projects (see
 See the [open issues](https://github.com/it-at-m/mucgpt/issues) for a full list of proposed features (and known issues).
 
 ## Run
- Configure your environment in [config/default.json](config/default.json).  Insert Model Endpoint and API Key for your connection to an OpenAI completion endpoint or an Azure OpenAI completions endpoint.
+ Configure your environment in [config/default.json](config/default.json). The schema of the configuration is [cofnig/mucgpt_config.schema.json](config/mucgpt_config.schema.json) described.  Insert Model Endpoint and API Key for your connection to an OpenAI completion endpoint or an Azure OpenAI completions endpoint.
 ### Run locally
 ```
 cd app\backend
diff --git a/config/mucgpt_config.schema.json b/config/mucgpt_config.schema.json
new file mode 100644
index 00000000..ff4d1c89
--- /dev/null
+++ b/config/mucgpt_config.schema.json
@@ -0,0 +1,430 @@
+{
+    "$schema": "https://json-schema.org/draft/2019-09/schema",
+    "$id": "http://example.com/example.json",
+    "type": "object",
+    "default": {},
+    "title": "MUCGPT config",
+    "required": [
+        "frontend",
+        "backend"
+    ],
+    "properties": {
+        "frontend": {
+            "type": "object",
+            "default": {},
+            "title": "The frontend configuration",
+            "required": [
+                "labels",
+                "alternative_logo"
+            ],
+            "properties": {
+                "labels": {
+                    "type": "object",
+                    "default": {},
+                    "title": "Labels used in the frontend",
+                    "required": [
+                        "env_name"
+                    ],
+                    "properties": {
+                        "env_name": {
+                            "type": "string",
+                            "default": "",
+                            "title": "The env_name Schema",
+                            "examples": [
+                                "MUCGPT"
+                            ]
+                        }
+                    },
+                    "examples": [{
+                        "env_name": "MUCGPT"
+                    }]
+                },
+                "alternative_logo": {
+                    "type": "boolean",
+                    "default": false,
+                    "title": "use the alternative logo, more of an easter egg",
+                    "examples": [
+                        false
+                    ]
+                }
+            },
+            "examples": [{
+                "labels": {
+                    "env_name": "MUCGPT"
+                },
+                "alternative_logo": false
+            }]
+        },
+        "backend": {
+            "type": "object",
+            "default": {},
+            "title": "The backend configuration",
+            "required": [
+                "enable_auth",
+                "enable_database",
+                "sso_config",
+                "db_config",
+                "chat",
+                "brainstorm",
+                "sum",
+                "models"
+            ],
+            "properties": {
+                "enable_auth": {
+                    "type": "boolean",
+                    "default": false,
+                    "title": "Enable authentification over OpenID Connect, currently only works with Azure Easy Auth..",
+                    "examples": [
+                        false
+                    ]
+                },
+                "enable_database": {
+                    "type": "boolean",
+                    "default": false,
+                    "title": "Use a postgresql database to save usage information",
+                    "examples": [
+                        false
+                    ]
+                },
+                "sso_config": {
+                    "type": "object",
+                    "default": {},
+                    "title": "Describes the OpenidConnect Provider",
+                    "required": [
+                        "sso_issuer",
+                        "role"
+                    ],
+                    "properties": {
+                        "sso_issuer": {
+                            "type": "string",
+                            "default": "",
+                            "title": "The sso_issuer. Something like: https://mysso/auth/realms/myrealm\"",
+                            "examples": [
+                                "TODO"
+                            ]
+                        },
+                        "role": {
+                            "type": "string",
+                            "default": "",
+                            "title": "The role, we look if the user has a certain role.",
+                            "examples": [
+                                "lhm-ab-mucgpt-user"
+                            ]
+                        }
+                    },
+                    "examples": [{
+                        "sso_issuer": "TODO",
+                        "role": "lhm-ab-mucgpt-user"
+                    }]
+                },
+                "db_config": {
+                    "type": "object",
+                    "default": {},
+                    "title": "The database configuration. Has to be a postgresql database.",
+                    "required": [
+                        "db_host",
+                        "db_name",
+                        "db_user",
+                        "db_passwort"
+                    ],
+                    "properties": {
+                        "db_host": {
+                            "type": "string",
+                            "default": "",
+                            "title": "The db_host",
+                            "examples": [
+                                "TODO"
+                            ]
+                        },
+                        "db_name": {
+                            "type": "string",
+                            "default": "",
+                            "title": "The db_name ",
+                            "examples": [
+                                "postgres"
+                            ]
+                        },
+                        "db_user": {
+                            "type": "string",
+                            "default": "",
+                            "title": "The db_user",
+                            "examples": [
+                                "TODO"
+                            ]
+                        },
+                        "db_passwort": {
+                            "type": "string",
+                            "default": "",
+                            "title": "The db_passwort ",
+                            "examples": [
+                                "TODO"
+                            ]
+                        }
+                    },
+                    "examples": [{
+                        "db_host": "TODO",
+                        "db_name": "postgres",
+                        "db_user": "TODO",
+                        "db_passwort": "TODO"
+                    }]
+                },
+                "chat": {
+                    "type": "object",
+                    "default": {},
+                    "title": "The chat configuration",
+                    "required": [
+                        "log_tokens"
+                    ],
+                    "properties": {
+                        "log_tokens": {
+                            "type": "boolean",
+                            "default": false,
+                            "title": "Log usage in database?",
+                            "examples": [
+                                false
+                            ]
+                        }
+                    },
+                    "examples": [{
+                        "log_tokens": false
+                    }]
+                },
+                "brainstorm": {
+                    "type": "object",
+                    "default": {},
+                    "title": "The brainstorm configuration",
+                    "required": [
+                        "log_tokens"
+                    ],
+                    "properties": {
+                        "log_tokens": {
+                            "type": "boolean",
+                            "default": false,
+                            "title": "Log usage in database?",
+                            "examples": [
+                                false
+                            ]
+                        }
+                    },
+                    "examples": [{
+                        "log_tokens": false
+                    }]
+                },
+                "sum": {
+                    "type": "object",
+                    "default": {},
+                    "title": "The sumarization configuration",
+                    "required": [
+                        "log_tokens"
+                    ],
+                    "properties": {
+                        "log_tokens": {
+                            "type": "boolean",
+                            "default": false,
+                            "title": "Log usage in database?",
+                            "examples": [
+                                false
+                            ]
+                        }
+                    },
+                    "examples": [{
+                        "log_tokens": false
+                    }]
+                },
+                "models": {
+                    "type": "array",
+                    "default": [],
+                    "title": "Configuration for models",
+                    "items": {
+                        "type": "object",
+                        "title": "One model configuration",
+                        "required": [
+                            "type",
+                            "model_name",
+                            "endpoint",
+                            "api_key",
+                            "max_tokens",
+                            "deployment",
+                            "api_version"
+                        ],
+                        "properties": {
+                            "type": {
+                                "type": "string",
+                                "title": "Either AZURE or OPENAI",
+                                "examples": [
+                                    "OPENAI",
+                                    "AZURE"
+                                ]
+                            },
+                            "model_name": {
+                                "type": "string",
+                                "title": "The name of the model",
+                                "examples": [
+                                    "gpt-4o-mini"
+                                ]
+                            },
+                            "endpoint": {
+                                "type": "string",
+                                "title": "The model endpoint",
+                                "examples": [
+                                    "mymodel.openai.azure.com/"
+                                ]
+                            },
+                            "api_key": {
+                                "type": "string",
+                                "title": "The api_key",
+                                "examples": [
+                                    "BLABLUBLAUBLAUBLA"
+                                ]
+                            },
+                            "max_tokens": {
+                                "type": "integer",
+                                "title": "The context length of the LLM",
+                                "examples": [
+                                    128000,
+                                    0
+                                ]
+                            },
+                            "deployment": {
+                                "type": "string",
+                                "default": "",
+                                "title": "The deployment, only needed for AZURE type Models",
+                                "examples": [
+                                    "chat"
+                                ]
+                            },
+                            "api_version": {
+                                "type": "string",
+                                "default": "",
+                                "title": "The api_version",
+                                "examples": [
+                                    "\"2023-03-15-preview\""
+                                ]
+                            }
+                        },
+                        "examples": [{
+                            "type": "OPENAI",
+                            "model_name": "mucgpt-mini",
+                            "endpoint": "mucgptmini.openai.azure.com",
+                            "api_key": "BALBLBLABUALB",
+                            "max_tokens": 128000
+                        },
+                        {
+                            "type": "AZURE",
+                            "model_name": "mucgpt-maxi",
+                            "deployment": "chat",
+                            "endpoint": "mucgpt-maxi.openai.azure.com",
+                            "api_key": "BALBABUALB",
+                            "api_version": "2024-01",
+                            "max_tokens": 1000000
+                        }]
+                    },
+                    "examples": [
+                        [{
+                            "type": "OPENAI",
+                            "model_name": "mucgpt-mini",
+                            "endpoint": "mucgptmini.openai.azure.com",
+                            "api_key": "BALBLBLABUALB",
+                            "max_tokens": 128000
+                        },
+                        {
+                            "type": "AZURE",
+                            "model_name": "mucgpt-maxi",
+                            "deployment": "chat",
+                            "endpoint": "mucgpt-maxi.openai.azure.com",
+                            "api_key": "BALBABUALB",
+                            "api_version": "2024-01",
+                            "max_tokens": 1000000
+                        }]
+                    ]
+                }
+            },
+            "examples": [{
+                "enable_auth": false,
+                "enable_database": false,
+                "sso_config": {
+                    "sso_issuer": "TODO",
+                    "role": "lhm-ab-mucgpt-user"
+                },
+                "db_config": {
+                    "db_host": "TODO",
+                    "db_name": "postgres",
+                    "db_user": "TODO",
+                    "db_passwort": "TODO"
+                },
+                "chat": {
+                    "log_tokens": false
+                },
+                "brainstorm": {
+                    "log_tokens": false
+                },
+                "sum": {
+                    "log_tokens": false
+                },
+                "models": [{
+                    "type": "OPENAI",
+                    "model_name": "mucgpt-mini",
+                    "endpoint": "mucgptmini.openai.azure.com",
+                    "api_key": "BALBLBLABUALB",
+                    "max_tokens": 128000
+                },
+                {
+                    "type": "AZURE",
+                    "model_name": "mucgpt-maxi",
+                    "deployment": "chat",
+                    "endpoint": "mucgpt-maxi.openai.azure.com",
+                    "api_key": "BALBABUALB",
+                    "api_version": "2024-01",
+                    "max_tokens": 1000000
+                }]
+            }]
+        }
+    },
+    "examples": [{
+        "frontend": {
+            "labels": {
+                "env_name": "MUCGPT"
+            },
+            "alternative_logo": false
+        },
+        "backend": {
+            "enable_auth": false,
+            "enable_database": false,
+            "sso_config": {
+                "sso_issuer": "TODO",
+                "role": "lhm-ab-mucgpt-user"
+            },
+            "db_config": {
+                "db_host": "TODO",
+                "db_name": "postgres",
+                "db_user": "TODO",
+                "db_passwort": "TODO"
+            },
+            "chat": {
+                "log_tokens": false
+            },
+            "brainstorm": {
+                "log_tokens": false
+            },
+            "sum": {
+                "log_tokens": false
+            },
+            "models": [{
+                "type": "OPENAI",
+                "model_name": "mucgpt-mini",
+                "endpoint": "mucgptmini.openai.azure.com",
+                "api_key": "BALBLBLABUALB",
+                "max_tokens": 128000
+            },
+            {
+                "type": "AZURE",
+                "model_name": "mucgpt-maxi",
+                "deployment": "chat",
+                "endpoint": "mucgpt-maxi.openai.azure.com",
+                "api_key": "BALBABUALB",
+                "api_version": "2024-01",
+                "max_tokens": 1000000
+            }]
+        }
+    }]
+}
\ No newline at end of file

From d772801b327d3d80927ae11fa5a0be001bcf4e4e Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 28 Aug 2024 16:25:01 +0200
Subject: [PATCH 31/34] no more biceps files ot check

---
 .github/workflows/azure-dev-validation.yaml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.github/workflows/azure-dev-validation.yaml b/.github/workflows/azure-dev-validation.yaml
index b0c2c257..dd6b757c 100644
--- a/.github/workflows/azure-dev-validation.yaml
+++ b/.github/workflows/azure-dev-validation.yaml
@@ -12,11 +12,6 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v4
-
-      - name: Build Bicep for linting
-        uses: azure/CLI@v1
-        with:
-          inlineScript: az config set bicep.use_binary_from_path=false && az bicep build -f infra/main.bicep --stdout
           
       - name: Run Microsoft Security DevOps Analysis
         uses: microsoft/security-devops-action@preview

From d93947cfd52183efaa467119576f17bc78827a41 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 28 Aug 2024 16:33:55 +0200
Subject: [PATCH 32/34] =?UTF-8?q?=F0=9F=A4=99=20Make=20ruff=20happy?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/backend/app.py                  | 10 ++++++----
 app/backend/chat/chat.py            |  3 +--
 app/backend/core/datahelper.py      |  2 +-
 app/backend/core/llmhelper.py       |  5 ++++-
 app/backend/core/modelhelper.py     | 17 ++++++++---------
 app/backend/core/types/AppConfig.py |  1 +
 app/backend/init_app.py             |  3 +--
 app/backend/summarize/summarize.py  |  5 +----
 tests/integration/conftest.py       |  2 +-
 tests/unit/test_datahelper.py       |  2 +-
 tests/unit/test_llmhelper.py        |  2 +-
 tests/unit/test_modelhelper.py      |  9 +++++++--
 12 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/app/backend/app.py b/app/backend/app.py
index 33351d0c..db6b7789 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -2,6 +2,8 @@
 import logging
 import os
 from typing import List, cast
+
+from langchain_core.messages.human import HumanMessage
 from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware
 from quart import (
     Blueprint,
@@ -14,12 +16,12 @@
     send_file,
     send_from_directory,
 )
-from langchain_core.messages.human import HumanMessage
-from core.modelhelper import num_tokens_from_messages
-from core.types.Config import ModelsConfig, ModelsDTO
+
 from core.authentification import AuthentificationHelper, AuthError
 from core.helper import format_as_ndjson
+from core.modelhelper import num_tokens_from_messages
 from core.types.AppConfig import AppConfig
+from core.types.Config import ModelsConfig, ModelsDTO
 from core.types.countresult import CountResult
 from init_app import initApp
 
@@ -168,7 +170,7 @@ async def getStatistics():
 
 @bp.route("/counttokens", methods=["POST"])
 async def counttokens():
-    cfg = get_config_and_authentificate()
+    get_config_and_authentificate()
     if not request.is_json:
         return jsonify({"error": "request must be json"}), 415
     
diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py
index ffff32a0..25a43c64 100644
--- a/app/backend/chat/chat.py
+++ b/app/backend/chat/chat.py
@@ -1,9 +1,8 @@
 from typing import AsyncGenerator, Optional, Sequence
 
 from langchain_community.callbacks import get_openai_callback
-from langchain_core.messages import AIMessage
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
 from langchain_core.runnables.base import RunnableSerializable
-from langchain_core.messages import HumanMessage, SystemMessage
 
 from chat.chatresult import ChatResult
 from core.datahelper import Repository, Requestinfo
diff --git a/app/backend/core/datahelper.py b/app/backend/core/datahelper.py
index 4f456de9..c6e7a8d5 100644
--- a/app/backend/core/datahelper.py
+++ b/app/backend/core/datahelper.py
@@ -27,7 +27,7 @@ class Requestinfo(Base):
     updated_on = Column(DateTime(), default=datetime.now, onupdate=datetime.now)
 
     def __repr__(self):
-        return f'<ID {self.id!r}, Department {self.department!r}, Tokencount {self.tokencount!r}, Method {self.method!r}, Messagecount {self.messagecount!r}>'
+        return f'<ID {self.id!r}, Department {self.department!r}, Tokencount {self.tokencount!r}, Model {self.model}, Method {self.method!r}, Messagecount {self.messagecount!r}>'
 
 
 
diff --git a/app/backend/core/llmhelper.py b/app/backend/core/llmhelper.py
index e935f994..65dffb05 100644
--- a/app/backend/core/llmhelper.py
+++ b/app/backend/core/llmhelper.py
@@ -1,10 +1,13 @@
+from typing import List
+
 from langchain_community.llms.fake import FakeListLLM
 from langchain_core.runnables import ConfigurableField
 from langchain_core.runnables.base import RunnableSerializable
 from langchain_openai import AzureChatOpenAI, ChatOpenAI
-from typing import List
+
 from core.types.Config import ModelsConfig
 
+
 class ModelsConfigurationException(Exception):
     pass
 
diff --git a/app/backend/core/modelhelper.py b/app/backend/core/modelhelper.py
index ef608d5c..17eb8859 100644
--- a/app/backend/core/modelhelper.py
+++ b/app/backend/core/modelhelper.py
@@ -1,15 +1,17 @@
 from __future__ import annotations
-from typing import List
 
 import tiktoken
 from langchain_core.messages.base import BaseMessage
 from mistral_common.protocol.instruct.messages import (
-    UserMessage, SystemMessage, AssistantMessage
+    AssistantMessage,
+    SystemMessage,
+    UserMessage,
 )
 from mistral_common.protocol.instruct.request import ChatCompletionRequest
 from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
 
-def num_tokens_from_messages(messages: List[BaseMessage], model: str):
+
+def num_tokens_from_messages(messages: list[BaseMessage], model: str):
     """Return the number of tokens used by a list of messages."""
     if("gpt-" in model):
         return num_tokens_from_openai_model(messages=messages, model=model)
@@ -17,11 +19,10 @@ def num_tokens_from_messages(messages: List[BaseMessage], model: str):
         return num_tokens_from_mistral_model(messages=messages, model=model)
     else:
         raise NotImplementedError(
-            f"""No tokenizer for model found. currently only openai and mistral are supported."""
+            """No tokenizer for model found. currently only openai and mistral are supported."""
         )
-def num_tokens_from_mistral_model(messages: List[BaseMessage], model: str):
+def num_tokens_from_mistral_model(messages: list[BaseMessage], model: str):
     """Return the number of tokens used by a list of messages for a given mistral model."""
-    num_tokens = 0
     # see which tokenizer for which model is needed, https://github.com/mistralai/mistral-common/blob/main/README.md
     if(model == "mistral-large-2407" ):
         tokenizer = MistralTokenizer.v3()
@@ -44,7 +45,7 @@ def num_tokens_from_mistral_model(messages: List[BaseMessage], model: str):
                 ChatCompletionRequest(messages=mistral_messages))
     return len(tokenized.tokens)
 
-def num_tokens_from_openai_model(messages: List[BaseMessage], model: str):
+def num_tokens_from_openai_model(messages: list[BaseMessage], model: str):
     """Return the number of tokens used by a list of messages for a given openai model."""
     try:
         encoding = tiktoken.encoding_for_model(model)
@@ -67,10 +68,8 @@ def num_tokens_from_openai_model(messages: List[BaseMessage], model: str):
         "gpt-4o-2024-05-13",
         }:
         tokens_per_message = 3
-        tokens_per_name = 1
     elif model == "gpt-3.5-turbo-0301":
         tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
-        tokens_per_name = -1  # if there's a name, the role is omitted
     else:
         raise NotImplementedError(
             f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
diff --git a/app/backend/core/types/AppConfig.py b/app/backend/core/types/AppConfig.py
index 44deae5f..d7939976 100644
--- a/app/backend/core/types/AppConfig.py
+++ b/app/backend/core/types/AppConfig.py
@@ -1,4 +1,5 @@
 from typing import List, TypedDict
+
 from brainstorm.brainstorm import Brainstorm
 from chat.chat import Chat
 from core.authentification import AuthentificationHelper
diff --git a/app/backend/init_app.py b/app/backend/init_app.py
index 30d10e8b..90ebed16 100644
--- a/app/backend/init_app.py
+++ b/app/backend/init_app.py
@@ -1,5 +1,6 @@
 import os
 from typing import Tuple
+
 from brainstorm.brainstorm import Brainstorm
 from chat.chat import Chat
 from core.authentification import AuthentificationHelper
@@ -11,8 +12,6 @@
 from summarize.summarize import Summarize
 
 
-
-
 def initApproaches(cfg: BackendConfig, repoHelper: Repository) -> Tuple[Chat, Brainstorm, Summarize]:
     """init different approaches
 
diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py
index bae4a4de..17e7c440 100644
--- a/app/backend/summarize/summarize.py
+++ b/app/backend/summarize/summarize.py
@@ -1,10 +1,7 @@
-import json
-import re
 from concurrent.futures import ThreadPoolExecutor
-from typing import Any, List, Optional, Tuple
+from typing import List, Optional, Tuple
 
 from langchain.chains import SequentialChain
-from langchain.prompts import PromptTemplate
 from langchain_community.callbacks import get_openai_callback
 from langchain_core.prompts import PromptTemplate
 from langchain_core.pydantic_v1 import BaseModel, Field
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index cad4b403..e6689620 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,4 +1,5 @@
 import os
+
 import openai
 import pytest
 import pytest_asyncio
@@ -6,7 +7,6 @@
 import app
 
 
-
 @pytest.fixture
 def mock_openai_chatcompletion(monkeypatch):
     class AsyncChatCompletionIterator:
diff --git a/tests/unit/test_datahelper.py b/tests/unit/test_datahelper.py
index 55cacf49..ace4beaa 100644
--- a/tests/unit/test_datahelper.py
+++ b/tests/unit/test_datahelper.py
@@ -3,7 +3,7 @@
 import pytest
 from sqlalchemy import Engine
 
-from core.datahelper import Requestinfo, Repository
+from core.datahelper import Repository, Requestinfo
 
 
 class Test_Datahelper(unittest.TestCase):
diff --git a/tests/unit/test_llmhelper.py b/tests/unit/test_llmhelper.py
index cd357413..7ec597ec 100644
--- a/tests/unit/test_llmhelper.py
+++ b/tests/unit/test_llmhelper.py
@@ -3,7 +3,7 @@
 import pytest
 from langchain_core.runnables.base import RunnableSerializable
 
-from core.llmhelper import getModel, ModelsConfigurationException
+from core.llmhelper import ModelsConfigurationException, getModel
 
 
 class Test_LLMhelper(unittest.TestCase):
diff --git a/tests/unit/test_modelhelper.py b/tests/unit/test_modelhelper.py
index fa3898d6..d7e6efcd 100644
--- a/tests/unit/test_modelhelper.py
+++ b/tests/unit/test_modelhelper.py
@@ -1,10 +1,15 @@
 import unittest
 
 import pytest
-
-from core.modelhelper import num_tokens_from_messages, num_tokens_from_openai_model, num_tokens_from_mistral_model
 from langchain_core.messages.base import BaseMessage
 
+from core.modelhelper import (
+    num_tokens_from_messages,
+    num_tokens_from_mistral_model,
+    num_tokens_from_openai_model,
+)
+
+
 class Test_Modelhelper(unittest.TestCase):
 
     def setUp(self):

From 4dbd78b24d496ad77e4c72923684471da36617b1 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 28 Aug 2024 16:37:32 +0200
Subject: [PATCH 33/34] Support only for newer python versions

---
 .github/workflows/python-test.yaml | 2 +-
 README.md                          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-test.yaml b/.github/workflows/python-test.yaml
index fd32eb8e..ab904469 100644
--- a/.github/workflows/python-test.yaml
+++ b/.github/workflows/python-test.yaml
@@ -14,7 +14,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-20.04"]
-        python_version: ["3.9", "3.10", "3.11"]
+        python_version: ["3.10", "3.11", "3.12"]
     steps:
         - uses: actions/checkout@v4
         - name: Setup python
diff --git a/README.md b/README.md
index d13f7f00..ea81ebd0 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ Why should you use MUCGPT? See for yourself:
 
 The documentation project is built with technologies we use in our projects (see [requirements-dev.txt](/requirements-dev.txt)):
 ### Backend:
-* [Python 3.9, 3.10 or 3.11](https://www.python.org/downloads/)
+* [Python 3.10, 3.11 or 3.12](https://www.python.org/downloads/)
 * [Quart](https://pgjones.gitlab.io/quart/)
 * [LangChain](https://www.langchain.com/)
 

From dc362830fd1adca0f0d75d82b568ba89cd031a75 Mon Sep 17 00:00:00 2001
From: "michael.jaumann" <michael.jaumann@muenchen.de>
Date: Wed, 28 Aug 2024 16:45:31 +0200
Subject: [PATCH 34/34] =?UTF-8?q?=F0=9F=9B=A0=20fix=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/backend/core/datahelper.py | 2 +-
 tests/unit/test_datahelper.py  | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/app/backend/core/datahelper.py b/app/backend/core/datahelper.py
index c6e7a8d5..0f9b9027 100644
--- a/app/backend/core/datahelper.py
+++ b/app/backend/core/datahelper.py
@@ -27,7 +27,7 @@ class Requestinfo(Base):
     updated_on = Column(DateTime(), default=datetime.now, onupdate=datetime.now)
 
     def __repr__(self):
-        return f'<ID {self.id!r}, Department {self.department!r}, Tokencount {self.tokencount!r}, Model {self.model}, Method {self.method!r}, Messagecount {self.messagecount!r}>'
+        return f'<ID {self.id!r}, Department {self.department!r}, Tokencount {self.tokencount!r}, Model {self.model!r}, Method {self.method!r}, Messagecount {self.messagecount!r}>'
 
 
 
diff --git a/tests/unit/test_datahelper.py b/tests/unit/test_datahelper.py
index ace4beaa..c754c2c9 100644
--- a/tests/unit/test_datahelper.py
+++ b/tests/unit/test_datahelper.py
@@ -10,13 +10,14 @@ class Test_Datahelper(unittest.TestCase):
     @pytest.mark.asyncio    
     @pytest.mark.unit
     def test_requestinfo_creation(self):
-        request = Requestinfo(tokencount=100, department='IT', messagecount=50, method='GET')
+        request = Requestinfo(tokencount=100, department='IT', messagecount=50, method='GET', model="MUCGPT")
         self.assertIsInstance(request, Requestinfo)
         self.assertEqual(request.tokencount, 100)
         self.assertEqual(request.department, 'IT')
         self.assertEqual(request.messagecount, 50)
         self.assertEqual(request.method, 'GET')
-        self.assertEqual(str(request), '<ID None, Department \'IT\', Tokencount 100, Method \'GET\', Messagecount 50>')
+        self.assertEqual(request.model, "MUCGPT")
+        self.assertEqual(str(request), '<ID None, Department \'IT\', Tokencount 100, Model \'MUCGPT\', Method \'GET\', Messagecount 50>')
     
     @pytest.mark.asyncio    
     @pytest.mark.unit