From 81d5da1ebe34148a7973f9c4a0b92fe0ead06aa1 Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 31 Jul 2024 17:29:19 +0200 Subject: [PATCH 01/34] =?UTF-8?q?=F0=9F=8E=86=20first=20local=20running=20?= =?UTF-8?q?version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .dockerignore | 8 ++ Dockerfile | 16 +++ app/backend/app.py | 16 +-- app/backend/brainstorm/brainstorm.py | 5 +- app/backend/chat/chat.py | 13 +- app/backend/core/llmhelper.py | 109 ++++++++++------- app/backend/core/types/AppConfig.py | 6 - app/backend/core/types/AzureChatGPTConfig.py | 14 --- app/backend/core/types/Config.py | 21 ++++ app/backend/core/types/SupportedModels.py | 7 -- app/backend/gunicorn.conf.py | 2 +- app/backend/init_app.py | 120 +++++-------------- app/backend/requirements.txt | 4 +- app/backend/ressources/test.json | 21 ---- app/backend/summarize/summarize.py | 5 +- app/backend/text.py | 2 - docker-compose.yml | 13 ++ 17 files changed, 160 insertions(+), 222 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile delete mode 100644 app/backend/core/types/AzureChatGPTConfig.py delete mode 100644 app/backend/core/types/SupportedModels.py delete mode 100644 app/backend/ressources/test.json delete mode 100644 app/backend/text.py create mode 100644 docker-compose.yml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..3a0dd85d --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +.git* +**/*.pyc +.venv/ +/tests +/notebooks +/infra +*.ipynb +**/node_modules \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..4b7176f4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +# syntax=docker/dockerfile:1 +FROM node:19-alpine AS builder + +WORKDIR /build +COPY app/ . +WORKDIR /build/frontend +RUN npm install +RUN npm run build + +FROM python:3.11 +WORKDIR /code +COPY --from=builder /build/backend . +RUN pip install --no-cache-dir --upgrade -r requirements.txt +EXPOSE 8000 + +CMD ["python","-m","gunicorn","main:app"] \ No newline at end of file diff --git a/app/backend/app.py b/app/backend/app.py index 9d50ce57..7a2b5449 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -1,7 +1,6 @@ import json import logging import os -import time from typing import cast from azure.monitor.opentelemetry import configure_azure_monitor @@ -162,10 +161,9 @@ async def counttokens(): if not request.is_json: return jsonify({"error": "request must be json"}), 415 - model = cfg["model_info"]["model"] request_json = await request.get_json() message=request_json['text'] or "" - counted_tokens = num_tokens_from_message(message,model) + counted_tokens = num_tokens_from_message(message,"gpt-35-turbo") #TODO use correct model return jsonify(CountResult(count=counted_tokens)) @bp.route("/statistics/export", methods=["GET"]) @@ -207,18 +205,6 @@ def get_department(request: Request): else: return None - - -@bp.before_request -async def ensure_openai_token(): - cfg = get_config() - openai_token = cfg["model_info"]["openai_token"] - if openai_token.expires_on < time.time() + 60: - openai_token = await cfg["azure_credential"].get_token("https://cognitiveservices.azure.com/.default") - # updates tokens, the approaches should get the newest version of the token via reference - cfg["model_info"]["openai_token"] = openai_token - cfg["model_info"]["openai_api_key"] = openai_token.token - @bp.before_app_serving async def setup_clients(): current_app.config[APPCONFIG_KEY] = await initApp() diff --git a/app/backend/brainstorm/brainstorm.py b/app/backend/brainstorm/brainstorm.py index 9a642516..79acc0ee 100644 --- a/app/backend/brainstorm/brainstorm.py +++ b/app/backend/brainstorm/brainstorm.py @@ -7,7 +7,6 @@ from brainstorm.brainstormresult import BrainstormResult from core.datahelper import Repository, Requestinfo -from core.types.AzureChatGPTConfig import AzureChatGPTConfig from core.types.Config import ApproachConfig from core.types.LlmConfigs import LlmConfigs @@ -56,10 +55,9 @@ class Brainstorm: Text: {brainstorm}""" - def __init__(self, llm: RunnableSerializable, config: ApproachConfig, model_info: AzureChatGPTConfig, repo: Repository): + def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repository): self.llm = llm self.config = config - self.model_info = model_info self.repo = repo def getBrainstormPrompt(self) -> PromptTemplate: @@ -91,7 +89,6 @@ async def brainstorm(self, topic: str, language: str, department: Optional[str]) """ # configure config: LlmConfigs = { - "llm_api_key": self.model_info["openai_api_key"] } llm = self.llm.with_config(configurable=config) diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py index 7e17fbc6..dba2f2dc 100644 --- a/app/backend/chat/chat.py +++ b/app/backend/chat/chat.py @@ -17,7 +17,6 @@ from chat.chatresult import ChatResult from core.datahelper import Repository, Requestinfo from core.modelhelper import num_tokens_from_message, num_tokens_from_messages -from core.types.AzureChatGPTConfig import AzureChatGPTConfig from core.types.Chunk import Chunk, ChunkInfo from core.types.Config import ApproachConfig from core.types.LlmConfigs import LlmConfigs @@ -27,12 +26,10 @@ class Chat: """Chat with a llm via multiple steps. """ - def __init__(self, llm: RunnableSerializable, config: ApproachConfig, model_info: AzureChatGPTConfig, repo: Repository, chatgpt_model: str): + def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repository): self.llm = llm self.config = config - self.model_info = model_info self.repo = repo - self.chatgpt_model = chatgpt_model async def create_coroutine(self, history: "Sequence[dict[str, str]]", llm: RunnableSerializable, system_message: Optional[str]) -> Any: """Calls the llm in streaming mode @@ -69,7 +66,6 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i handler = AsyncIteratorCallbackHandler() config: LlmConfigs = { "llm_max_tokens": max_tokens, - "llm_api_key": self.model_info["openai_api_key"], "llm_temperature": temperature, "llm_streaming": True, "llm_callbacks": [handler], @@ -102,15 +98,15 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i history[-1]["bot"] = result system_message_tokens = 0 if(system_message and system_message.strip() !=""): - system_message_tokens = num_tokens_from_message(system_message,self.chatgpt_model) + system_message_tokens = num_tokens_from_message(system_message,"gpt-35-turbo") #TODO if self.config["log_tokens"]: self.repo.addInfo(Requestinfo( - tokencount = num_tokens_from_messages(history,self.chatgpt_model) + system_message_tokens, + tokencount = num_tokens_from_messages(history,"gpt-35-turbo") + system_message_tokens, #TODO richtiges Modell und tokenizer auswählen department = department, messagecount= len(history), method = "Chat")) - info = ChunkInfo(requesttokens=num_tokens_from_message(history[-1]["user"],self.chatgpt_model), streamedtokens=num_tokens_from_message(result,self.chatgpt_model)) + info = ChunkInfo(requesttokens=num_tokens_from_message(history[-1]["user"],"gpt-35-turbo"), streamedtokens=num_tokens_from_message(result,"gpt-35-turbo")) #TODO yield Chunk(type="I", message=info, order=position) def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str]) -> ChatResult: @@ -128,7 +124,6 @@ def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: """ config: LlmConfigs = { "llm_max_tokens": max_tokens, - "llm_api_key": self.model_info["openai_api_key"], "llm_temperature": temperature, "llm_streaming": False, } diff --git a/app/backend/core/llmhelper.py b/app/backend/core/llmhelper.py index 14378179..16e1b4d8 100644 --- a/app/backend/core/llmhelper.py +++ b/app/backend/core/llmhelper.py @@ -1,18 +1,17 @@ from langchain_community.llms.fake import FakeListLLM from langchain_core.runnables import ConfigurableField from langchain_core.runnables.base import RunnableSerializable -from langchain_openai import AzureChatOpenAI +from langchain_openai import AzureChatOpenAI, ChatOpenAI +from typing import List +from core.types.Config import ModelsConfig -from core.types.SupportedModels import SupportedModels +class ModelsConfigurationException(Exception): + pass -def getModel(chatgpt_model: str, +def getModel(models: List[ModelsConfig], max_tokens: int, n: int, - api_key: str, - api_base: str, - api_version: str, - api_type: str, temperature: float, streaming: bool) -> RunnableSerializable: """returns a configured llm, which can be later be parametrized during runtime @@ -20,43 +19,61 @@ def getModel(chatgpt_model: str, Returns: RunnableSerializable: the configured llm """ - llm = AzureChatOpenAI( - model=chatgpt_model, - max_tokens=max_tokens, - n=n, - deployment_name= "chat", - openai_api_key=api_key, - azure_endpoint=api_base, - openai_api_version=api_version, - openai_api_type=api_type, - temperature=temperature, - streaming=streaming, - ).configurable_fields( - temperature=ConfigurableField( - id="llm_temperature", - name="LLM Temperature", - description="The temperature of the LLM", - ), - max_tokens= ConfigurableField( - id="llm_max_tokens", - name="LLM max Tokens", - description="The token Limit of the LLM", - ), - openai_api_key = ConfigurableField( - id="llm_api_key", - name="The api key", - description="The api key"), - streaming = ConfigurableField( - id="llm_streaming", - name="Streaming", - description="Should the LLM Stream"), - callbacks = ConfigurableField( - id="llm_callbacks", - name="Callbacks", - description="Callbacks for the llm") - - ).configurable_alternatives( - ConfigurableField(id="llm"), - default_key=SupportedModels.AZURE_CHATGPT.value, - fake= FakeListLLM(responses=["Hi diggi"])) + if len(models) == 0: + raise ModelsConfigurationException("No models found in the configuration.json") + default_model = models[0] + if default_model["type"] == "AZURE": + llm = AzureChatOpenAI( + model=default_model["model_name"], + deployment_name= default_model["deployment"], + openai_api_key=default_model["api_key"], + azure_endpoint=default_model["endpoint"], + openai_api_version=default_model["api_version"], + max_tokens=max_tokens, + n=n, + streaming=streaming, + temperature=temperature, + openai_api_type="azure", + ) + elif default_model["type"] == "OPENAI": + llm = ChatOpenAI( + model=default_model["model_name"], + api_key=default_model["api_key"], + base_url=default_model["endpoint"], + max_tokens=max_tokens, + n=n, + streaming=streaming, + temperature=temperature, + ) + else: + raise ModelsConfigurationException(f"Unknown model type: {default_model['type']}. Currently only `AZURE` and `OPENAI` are supported.") + + llm = llm.configurable_fields( + temperature=ConfigurableField( + id="llm_temperature", + name="LLM Temperature", + description="The temperature of the LLM", + ), + max_tokens= ConfigurableField( + id="llm_max_tokens", + name="LLM max Tokens", + description="The token Limit of the LLM", + ), + openai_api_key = ConfigurableField( + id="llm_api_key", + name="The api key", + description="The api key"), + streaming = ConfigurableField( + id="llm_streaming", + name="Streaming", + description="Should the LLM Stream"), + callbacks = ConfigurableField( + id="llm_callbacks", + name="Callbacks", + description="Callbacks for the llm") + + ).configurable_alternatives( + ConfigurableField(id="llm"), + default_key=models[0]["model_name"], + fake= FakeListLLM(responses=["Hi diggi"])) return llm diff --git a/app/backend/core/types/AppConfig.py b/app/backend/core/types/AppConfig.py index 56c4090d..88520801 100644 --- a/app/backend/core/types/AppConfig.py +++ b/app/backend/core/types/AppConfig.py @@ -1,12 +1,8 @@ from typing import TypedDict - -from azure.identity.aio import DefaultAzureCredential - from brainstorm.brainstorm import Brainstorm from chat.chat import Chat from core.authentification import AuthentificationHelper from core.datahelper import Repository -from core.types.AzureChatGPTConfig import AzureChatGPTConfig from core.types.Config import BackendConfig, Config from summarize.summarize import Summarize @@ -14,8 +10,6 @@ class AppConfig(TypedDict): """Config for the app, contains all clients and informations, that are needed """ - model_info: AzureChatGPTConfig - azure_credential: DefaultAzureCredential chat_approaches: Chat sum_approaches: Summarize brainstorm_approaches: Brainstorm diff --git a/app/backend/core/types/AzureChatGPTConfig.py b/app/backend/core/types/AzureChatGPTConfig.py deleted file mode 100644 index ef378289..00000000 --- a/app/backend/core/types/AzureChatGPTConfig.py +++ /dev/null @@ -1,14 +0,0 @@ -from typing import TypedDict - -from azure.core.credentials import AccessToken - - -class AzureChatGPTConfig(TypedDict): - """Contains all information, that describes an AzureOpenAI endpoint - """ - model: str - openai_token: AccessToken - openai_api_key: str - openai_api_base: str - openai_api_version: str - openai_api_type: str \ No newline at end of file diff --git a/app/backend/core/types/Config.py b/app/backend/core/types/Config.py index 85d71320..828a9e2f 100644 --- a/app/backend/core/types/Config.py +++ b/app/backend/core/types/Config.py @@ -4,11 +4,32 @@ class ApproachConfig(TypedDict): log_tokens: bool +class ModelsConfig(TypedDict): + type: str + model_name: str + deployment: str + endpoint: str + api_key: str + api_version: str + max_tokens: int + +class SSOConfig(TypedDict): + sso_issuer: str + role: str +class DatabaseConfig(TypedDict): + db_host: str + db_name: str + db_user: str + db_passwort: str class BackendConfig(TypedDict): + enable_auth: bool enable_database: bool + sso_config: SSOConfig + db_config: DatabaseConfig chat: ApproachConfig brainstorm: ApproachConfig sum: ApproachConfig + models: ModelsConfig class LabelsConfig(TypedDict): env_name: str diff --git a/app/backend/core/types/SupportedModels.py b/app/backend/core/types/SupportedModels.py deleted file mode 100644 index 932dc4e9..00000000 --- a/app/backend/core/types/SupportedModels.py +++ /dev/null @@ -1,7 +0,0 @@ -from enum import Enum, unique - - -@unique -class SupportedModels(Enum): - AZURE_CHATGPT = "AZURE_CHATGPT" - FAKE = "FAKE" \ No newline at end of file diff --git a/app/backend/gunicorn.conf.py b/app/backend/gunicorn.conf.py index b1aded06..86a6a912 100644 --- a/app/backend/gunicorn.conf.py +++ b/app/backend/gunicorn.conf.py @@ -3,7 +3,7 @@ max_requests = 1000 max_requests_jitter = 50 log_file = "-" -bind = "0.0.0.0" +bind = "0.0.0.0:8000" timeout = 230 # https://learn.microsoft.com/en-us/troubleshoot/azure/app-service/web-apps-performance-faqs#why-does-my-request-time-out-after-230-seconds diff --git a/app/backend/init_app.py b/app/backend/init_app.py index ca439183..dee97eb2 100644 --- a/app/backend/init_app.py +++ b/app/backend/init_app.py @@ -1,8 +1,5 @@ import os from typing import Tuple - -from azure.identity.aio import DefaultAzureCredential - from brainstorm.brainstorm import Brainstorm from chat.chat import Chat from core.authentification import AuthentificationHelper @@ -10,54 +7,16 @@ from core.datahelper import Base, Repository from core.llmhelper import getModel from core.types.AppConfig import AppConfig -from core.types.AzureChatGPTConfig import AzureChatGPTConfig -from core.types.Config import BackendConfig +from core.types.Config import BackendConfig, DatabaseConfig from summarize.summarize import Summarize -def read_env(): - """reads configured values from env - """ - AZURE_OPENAI_SERVICE = os.environ["AZURE_OPENAI_SERVICE"] - #AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.environ["AZURE_OPENAI_CHATGPT_DEPLOYMENT"] - AZURE_OPENAI_CHATGPT_MODEL = os.environ["AZURE_OPENAI_CHATGPT_MODEL"] - SSO_ISSUER = os.environ["SSO_ISSUER"] - CONFIG_NAME = os.environ["CONFIG_NAME"] - DB_HOST = os.environ["DB_HOST"] - DB_NAME = os.environ["DB_NAME"] - DB_USER = os.environ["DB_USER"] - DB_PASSWORD = os.environ["DB_PASSWORD"] - return AZURE_OPENAI_SERVICE,AZURE_OPENAI_CHATGPT_MODEL,SSO_ISSUER,CONFIG_NAME,DB_HOST,DB_NAME,DB_USER,DB_PASSWORD - - -async def get_openai_params(AZURE_OPENAI_SERVICE: str): - """get current openai access token - - Args: - AZURE_OPENAI_SERVICE (str): the current openaiservice - """ - # Use the current user identity to authenticate with Azure OpenAI (no secrets needed, - # just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the - # keys for each service - # If you encounter a blocking error during a DefaultAzureCredential resolution, you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True) - azure_credential = DefaultAzureCredential(exclude_shared_token_cache_credential = True) - - # Used by the OpenAI SDK - openai_api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com" - openai_api_version = "2023-05-15" - openai_api_type = "azure_ad" - openai_token = await azure_credential.get_token( - "https://cognitiveservices.azure.com/.default" - ) - openai_api_key = openai_token.token - return azure_credential,openai_api_base,openai_api_version,openai_api_type,openai_token,openai_api_key -def initApproaches(model_info: AzureChatGPTConfig, cfg: BackendConfig, repoHelper: Repository) -> Tuple[Chat, Brainstorm, Summarize]: +def initApproaches(cfg: BackendConfig, repoHelper: Repository) -> Tuple[Chat, Brainstorm, Summarize]: """init different approaches Args: - model_info (AzureChatGPTConfig): defines access key for the current model, gets renewed over time cfg (BackendConfig): the config for the backend repoHelper (Repository): the repository to save request statistics @@ -65,38 +24,26 @@ def initApproaches(model_info: AzureChatGPTConfig, cfg: BackendConfig, repoHelpe Tuple[Chat, Brainstorm, Summarize]: the implementation behind chat, brainstorm and summarize """ brainstormllm = getModel( - chatgpt_model= model_info["model"], + models=cfg["models"], max_tokens = 4000, n = 1, - api_key = model_info["openai_api_key"], - api_base = model_info["openai_api_base"], - api_version = model_info["openai_api_version"], - api_type = model_info["openai_api_type"], streaming=False, temperature=0.9) sumllm = getModel( - chatgpt_model= model_info["model"], + models=cfg["models"], max_tokens = 1000, n = 1, - api_key = model_info["openai_api_key"], - api_base = model_info["openai_api_base"], - api_version = model_info["openai_api_version"], - api_type = model_info["openai_api_type"], streaming=False, - temperature=0.7) + temperature=0.2) chatlllm = getModel( - chatgpt_model= model_info["model"], + models=cfg["models"], max_tokens=4000, n = 1, - api_key = model_info["openai_api_key"], - api_base = model_info["openai_api_base"], - api_version = model_info["openai_api_version"], - api_type = model_info["openai_api_type"], streaming=True, temperature=0.7) - chat_approaches = Chat(llm=chatlllm, config=cfg["chat"], model_info=model_info, repo=repoHelper, chatgpt_model=model_info["model"]) - brainstorm_approaches = Brainstorm(llm=brainstormllm, model_info=model_info, config=cfg["brainstorm"], repo=repoHelper) - sum_approaches = Summarize(llm=sumllm, config=cfg["sum"],model_info=model_info, repo=repoHelper) + chat_approaches = Chat(llm=chatlllm, config=cfg["chat"], repo=repoHelper) + brainstorm_approaches = Brainstorm(llm=brainstormllm, config=cfg["brainstorm"], repo=repoHelper) + sum_approaches = Summarize(llm=sumllm, config=cfg["sum"], repo=repoHelper) return (chat_approaches, brainstorm_approaches, sum_approaches) async def initApp() -> AppConfig: @@ -105,43 +52,34 @@ async def initApp() -> AppConfig: Returns: AppConfig: contains the configuration for the webservice """ - # Replace these with your own values, either in environment variables or directly here - AZURE_OPENAI_SERVICE, AZURE_OPENAI_CHATGPT_MODEL, SSO_ISSUER, CONFIG_NAME, DB_HOST, DB_NAME, DB_USER, DB_PASSWORD = read_env() - - azure_credential, openai_api_base, openai_api_version, openai_api_type, openai_token, openai_api_key = await get_openai_params(AZURE_OPENAI_SERVICE) + + # read enviornment config + config_helper = ConfigHelper(base_path=os.path.dirname(os.path.realpath(__file__))+"/", env="config", base_config_name="base") + cfg = config_helper.loadData() # Set up authentication helper auth_helper = AuthentificationHelper( - issuer=SSO_ISSUER, - role="lhm-ab-mucgpt-user" + issuer=cfg["backend"]["sso_config"]["sso_issuer"], + role=cfg["backend"]["sso_config"]["role"] ) # set up repositorty - repoHelper = Repository( - username=DB_USER, - host=DB_HOST, - database=DB_NAME, - password=DB_PASSWORD - ) - # read enviornment config - config_helper = ConfigHelper(base_path=os.path.dirname(os.path.realpath(__file__))+"/ressources/", env=CONFIG_NAME, base_config_name="base") - cfg = config_helper.loadData() - - model_info = AzureChatGPTConfig( - model=AZURE_OPENAI_CHATGPT_MODEL, - openai_token = openai_token, - openai_api_key = openai_api_key, - openai_api_base = openai_api_base, - openai_api_version = openai_api_version, - openai_api_type = openai_api_type - ) + if(cfg["backend"]["enable_database"]): + db_config: DatabaseConfig = cfg["backend"]["db_config"] + repoHelper = Repository( + username=db_config["db_user"], + host=db_config["db_host"], + database=db_config["db_name"], + password=db_config["db_passwort"] + ) + repoHelper.setup_schema(base=Base) + else: + repoHelper = None - (chat_approaches, brainstorm_approaches, sum_approaches) = initApproaches(model_info=model_info, cfg=cfg["backend"], repoHelper=repoHelper) + (chat_approaches, brainstorm_approaches, sum_approaches) = initApproaches(cfg=cfg["backend"], repoHelper=repoHelper) - if cfg["backend"]["enable_database"]: - repoHelper.setup_schema(base=Base) + + return AppConfig( - model_info=model_info, - azure_credential=azure_credential, authentification_client=auth_helper, configuration_features=cfg, chat_approaches= chat_approaches, diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index cc01f61f..29086bf0 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -1,4 +1,3 @@ -azure-identity==1.17.1 quart==0.19.6 langchain==0.2.6 langchain_openai @@ -16,4 +15,5 @@ requests sqlalchemy==2.0.31 psycopg2==2.9.9 pypdf2==3.0.1 -tenacity==8.4.2 \ No newline at end of file +tenacity==8.4.2 +gunicorn \ No newline at end of file diff --git a/app/backend/ressources/test.json b/app/backend/ressources/test.json deleted file mode 100644 index ae81a7ab..00000000 --- a/app/backend/ressources/test.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "frontend": { - "labels": { - "env_name": "MUCGPT-Test" - }, - "alternative_logo": false - }, - "backend": { - "enable_auth": false, - "enable_database": false, - "chat":{ - "log_tokens": false - }, - "brainstorm": { - "log_tokens": false - }, - "sum": { - "log_tokens": false - } - } -} \ No newline at end of file diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py index 55ee35d7..8f210b89 100644 --- a/app/backend/summarize/summarize.py +++ b/app/backend/summarize/summarize.py @@ -10,7 +10,6 @@ from core.datahelper import Repository, Requestinfo from core.textsplit import splitPDF, splitText -from core.types.AzureChatGPTConfig import AzureChatGPTConfig from core.types.Config import ApproachConfig from core.types.LlmConfigs import LlmConfigs from summarize.summarizeresult import SummarizeResult @@ -84,10 +83,9 @@ class Summarize: - def __init__(self, llm: RunnableSerializable, config: ApproachConfig, model_info: AzureChatGPTConfig, repo: Repository, short_split = 2100, medium_split = 1500, long_split = 700, use_last_n_summaries = -2): + def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repository, short_split = 2100, medium_split = 1500, long_split = 700, use_last_n_summaries = -2): self.llm = llm self.config = config - self.model_info = model_info self.repo = repo self.switcher = { "short": short_split, @@ -109,7 +107,6 @@ def getTranslationCleanupPrompt(self) -> PromptTemplate: def setup(self) -> SequentialChain: config: LlmConfigs = { - "llm_api_key": self.model_info["openai_api_key"] } llm = self.llm.with_config(configurable=config) # setup model diff --git a/app/backend/text.py b/app/backend/text.py deleted file mode 100644 index 9209778c..00000000 --- a/app/backend/text.py +++ /dev/null @@ -1,2 +0,0 @@ -def nonewlines(s: str) -> str: - return s.replace('\n', ' ').replace('\r', ' ') diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..dba02738 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,13 @@ + version: "3.7" + services: + mucgpt: + image: mucgpt:latest + container_name: MUCGPT + ports: + - "8000:8000" + volumes: + - "./config/local.json:/code/config.json" + - "./config/base.json:/code/base.json" + environment: + https_proxy: "http://internet-proxy-client.muenchen.de:80" + http_proxy: "http://internet-proxy-client.muenchen.de:80" \ No newline at end of file From 98103fefd74b2937ea6323063d14c43fe0811fba Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 14 Aug 2024 14:00:35 +0200 Subject: [PATCH 02/34] =?UTF-8?q?First=20working=20terraform=20version=20?= =?UTF-8?q?=F0=9F=8E=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 14 ++++++-- infra/main.tf | 78 ++++++++++++++++++++++++++++++++++++++++++++ infra/provider.tf | 30 +++++++++++++++++ infra/variables.tf | 81 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 200 insertions(+), 3 deletions(-) create mode 100644 infra/main.tf create mode 100644 infra/provider.tf create mode 100644 infra/variables.tf diff --git a/Dockerfile b/Dockerfile index 4b7176f4..1d5c9ed0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,24 @@ # syntax=docker/dockerfile:1 + FROM node:19-alpine AS builder +ENV GENERATE_SOURCEMAP=false +ENV NODE_OPTIONS=--max_old_space_size=4096 WORKDIR /build COPY app/ . WORKDIR /build/frontend RUN npm install RUN npm run build -FROM python:3.11 +FROM python:3.12 WORKDIR /code COPY --from=builder /build/backend . + +ARG fromconfig="./config/.local.json" +COPY $fromconfig /code/config.json +COPY "./config/base.json" /code/base.json + RUN pip install --no-cache-dir --upgrade -r requirements.txt -EXPOSE 8000 -CMD ["python","-m","gunicorn","main:app"] \ No newline at end of file +EXPOSE 8000 +CMD ["gunicorn","main:app"] \ No newline at end of file diff --git a/infra/main.tf b/infra/main.tf new file mode 100644 index 00000000..9cee9302 --- /dev/null +++ b/infra/main.tf @@ -0,0 +1,78 @@ + +resource "random_id" "server" { + byte_length = 16 +} + + + +resource "azurerm_container_registry" "acr" { + name = var.container_reg_name == "" ? "containerReg${random_id.server.hex}" : var.container_reg_name + resource_group_name = var.rg_name + location = var.location + sku = "Standard" + admin_enabled = true +} + +resource "azurerm_service_plan" "asp" { + name = var.service_plan_name == "" ? "${var.prefix}_serviceplan_${random_id.server.hex}" : var.service_plan_name + location = var.location + resource_group_name = var.rg_name + os_type = "Linux" + sku_name = var.service_plan_sku +} + +resource "azurerm_linux_web_app" "webapp" { + name = var.backend_name == "" ? "${var.prefix}-backend-${random_id.server.hex}" : var.backend_name + location = var.location + resource_group_name = var.rg_name + service_plan_id = azurerm_service_plan.asp.id + + app_settings = { + WEBSITES_ENABLE_APP_SERVICE_STORAGE = "false" + LHMSSO_PROVIDER_AUTHENTICATION_SECRET = var.sso_secret + WEBSITES_PORT = 8000 + WEBSITES_ENABLE_APP_SERVICE_STORAGE = false + DOCKER_ENABLE_CI = "true" + } + + site_config { + always_on = "true" + application_stack { + docker_image_name = var.image_name + docker_registry_url = "https://${azurerm_container_registry.acr.login_server}" + docker_registry_username = var.registry_username + docker_registry_password = var.registry_password + } + health_check_path = "/health" + } + auth_settings_v2 { + auth_enabled = true + require_authentication = true + unauthenticated_action = "RedirectToLoginPage" + default_provider = "LHMSSO" + excluded_paths=["/health"] + custom_oidc_v2 { + name = "LHMSSO" + client_id = "mucgpt" + openid_configuration_endpoint = var.sso_configuration_endpoint + scopes = ["openid"] + } + login { + + } + } + + + logs{ + application_logs{ + file_system_level = "Verbose" + + } + http_logs{ + file_system{ + retention_in_mb = 30 + retention_in_days = 7 + } + } + } +} diff --git a/infra/provider.tf b/infra/provider.tf new file mode 100644 index 00000000..584cdf1e --- /dev/null +++ b/infra/provider.tf @@ -0,0 +1,30 @@ +# Configure desired versions of terraform, azurerm provider +terraform { + required_version = ">= 1.1.7, < 2.0.0" + required_providers { + azurerm = { + version = "~>3.97.1" + source = "hashicorp/azurerm" + } + azurecaf = { + source = "aztfmod/azurecaf" + version = "~>1.2.24" + } + } +} + +# Enable features for azurerm +provider "azurerm" { + skip_provider_registration = "true" + features { + key_vault { + purge_soft_delete_on_destroy = false + } + resource_group { + prevent_deletion_if_contains_resources = false + } + } +} + +# Access client_id, tenant_id, subscription_id and object_id configuration values +data "azurerm_client_config" "current" {} diff --git a/infra/variables.tf b/infra/variables.tf new file mode 100644 index 00000000..0366dbf4 --- /dev/null +++ b/infra/variables.tf @@ -0,0 +1,81 @@ +variable "location" { + description = "(Required) The Azure location where the resource should be deployed" + type = string + default = "westeurope" +} + +variable "prefix" { + type = string + description = "The prefix used for all resources in this example" + default = "mucgpt" +} + + +variable "rg_name" { + description = "(Required) The RG_name. Please refer to the naming convention described in confluence." + type = string +} + +variable "container_reg_name" { + description = "The Container Registry Name. Please refer to the naming convention described in confluence." + type = string + default = "" +} + +variable "service_plan_name" { + description = "The Service Plan Name." + type = string + default = "" +} + +variable "service_plan_sku" { + description = "The Service Plan Name" + type = string + default = "" +} + +variable "backend_name" { + description = "The Webapp Name for the App Service" + type = string + default = "" +} + +variable "registry_username" { + description = "The username for the container registry" + type = string +} + +variable "registry_password" { + description = "The password for the container registry" + type = string +} + +variable "image_name"{ + description = "The image name of the image in the appservice" + type = string +} + +variable "sso_secret"{ + description = "The secret for the sso" + type = string +} + +variable "sso_configuration_endpoint"{ + description = "The configuration endpoint for the openid-connect endpoint. Ends with .well-known/openid-configuration" + type = string +} + +variable "tags" { + description = "(Required) The necessary tags defined in the tagging concept are mandatory." + type = object({ + cce-businesscriticality: string + cce-costcenter: string + cce-businessunit: string + cce-expirydate: string + cce-requestnumber: string + cce-serviceid: string + cce-serviceowner: string + cce-shortname: string + cce-stage: string + }) +} From dbe1de16ec6c1aacb209fbbdafcb43d24670ff2d Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Tue, 20 Aug 2024 11:50:49 +0200 Subject: [PATCH 03/34] =?UTF-8?q?=F0=9F=9B=A0=20default=20config?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 2 +- config/base.json | 3 +++ config/default.json | 50 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 config/base.json create mode 100644 config/default.json diff --git a/Dockerfile b/Dockerfile index 1d5c9ed0..7cfedd23 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ FROM python:3.12 WORKDIR /code COPY --from=builder /build/backend . -ARG fromconfig="./config/.local.json" +ARG fromconfig="./config/default.json" COPY $fromconfig /code/config.json COPY "./config/base.json" /code/base.json diff --git a/config/base.json b/config/base.json new file mode 100644 index 00000000..cb7ad94b --- /dev/null +++ b/config/base.json @@ -0,0 +1,3 @@ +{ + "version": "1.1.1" +} \ No newline at end of file diff --git a/config/default.json b/config/default.json new file mode 100644 index 00000000..e1167fc7 --- /dev/null +++ b/config/default.json @@ -0,0 +1,50 @@ +{ + "frontend": { + "labels": { + "env_name": "MUCGPT" + }, + "alternative_logo": false + }, + "backend": { + "enable_auth": false, + "enable_database": false, + "sso_config": { + "sso_issuer": "TODO", + "role": "lhm-ab-mucgpt-user" + }, + "db_config": { + "db_host": "TODO", + "db_name": "postgres", + "db_user": "TODO", + "db_passwort": "TODO" + }, + "chat": { + "log_tokens": false + }, + "brainstorm": { + "log_tokens": false + }, + "sum": { + "log_tokens": false + }, + + "models": [ + { + "type": "OPENAI", + "model_name": "TODO", + "endpoint": "TODO", + "api_key": "TODO", + "max_tokens": 128000 + }, + { + "type": "AZURE", + "model_name": "TODO", + "deployment": "TODO", + "endpoint": "TODO", + "api_key": "TODO", + "api_version": "TODO", + "max_tokens": 0 + } + ] + } +} \ No newline at end of file From 5773c80ef8c3e877c4a09f67e2611b5bbfb4add2 Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:55:14 +0200 Subject: [PATCH 04/34] :sparkles: Frontend selector of LLM --- .../LLMSelector/LLMContextProvider.tsx | 20 ++++++++ .../components/LLMSelector/LLMSelector.tsx | 48 +++++++++++++++++++ .../SettingsDrawer/SettingsDrawer.tsx | 12 ++++- app/frontend/src/i18n.ts | 9 ++-- app/frontend/src/pages/layout/Layout.tsx | 14 +++++- .../src/pages/layout/LayoutHelper.tsx | 1 + 6 files changed, 98 insertions(+), 6 deletions(-) create mode 100644 app/frontend/src/components/LLMSelector/LLMContextProvider.tsx create mode 100644 app/frontend/src/components/LLMSelector/LLMSelector.tsx diff --git a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx new file mode 100644 index 00000000..72d59a71 --- /dev/null +++ b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx @@ -0,0 +1,20 @@ +// Context.js +import React, { Dispatch, SetStateAction, useState } from "react"; + +interface ILLMProvider { + LLM: string; + setLLM: Dispatch>; +} + +export const DEFAULTLLM = "GPT-4o-mini"; +export const LLMContext = React.createContext({ LLM: DEFAULTLLM, setLLM: () => { } }); + +export const LLMContextProvider = (props: React.PropsWithChildren<{}>) => { + const [LLM, setLLM] = useState(DEFAULTLLM); + + return ( + + {props.children} + + ); +}; \ No newline at end of file diff --git a/app/frontend/src/components/LLMSelector/LLMSelector.tsx b/app/frontend/src/components/LLMSelector/LLMSelector.tsx new file mode 100644 index 00000000..e78fa87f --- /dev/null +++ b/app/frontend/src/components/LLMSelector/LLMSelector.tsx @@ -0,0 +1,48 @@ +import { + Dropdown, + makeStyles, + Option, +} from "@fluentui/react-components"; +import { SelectionEvents, OptionOnSelectData } from "@fluentui/react-combobox"; + +const useStyles = makeStyles({ + root: { + // Stack the label above the field with a gap + + }, + option: { + } +}); + +interface Props { + onSelectionChange: (e: SelectionEvents, selection: OptionOnSelectData) => void; + defaultLLM: string; +} + + +export const LLMSelector = ({ onSelectionChange, defaultLLM }: Props) => { + const styles = useStyles(); + return ( +
+ + + + + + +
+ ); +}; \ No newline at end of file diff --git a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx index 99642be9..b9b013ca 100644 --- a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx +++ b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx @@ -17,6 +17,7 @@ import styles from "./SettingsDrawer.module.css"; import { ChangeEvent, useCallback, useState } from "react"; import { SelectionEvents, OptionOnSelectData } from "@fluentui/react-combobox"; import { LanguageSelector } from "../../components/LanguageSelector"; +import { LLMSelector } from "../LLMSelector/LLMSelector"; import { useTranslation } from 'react-i18next'; import cheetsheet from "../../assets/mucgpt_cheatsheet.pdf"; interface Props { @@ -27,9 +28,11 @@ interface Props { setFontscale: (fontscale: number) => void; isLight: boolean; setTheme: (isLight: boolean) => void; + onLLMSelectionChanged: (e: SelectionEvents, selection: OptionOnSelectData) => void; + defaultLLM: string; } -export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, version, fontscale, setFontscale, isLight, setTheme }: Props) => { +export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, version, fontscale, setFontscale, isLight, setTheme, onLLMSelectionChanged, defaultLLM }: Props) => { const [isOpen, setIsOpen] = useState(false); const { t, i18n } = useTranslation(); @@ -73,7 +76,12 @@ export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, versio
{t('components.settingsdrawer.fontsize')} - +
+
+ {t('components.settingsdrawer.llm')} +
+
+
diff --git a/app/frontend/src/i18n.ts b/app/frontend/src/i18n.ts index 20bc5f83..0a579ce7 100644 --- a/app/frontend/src/i18n.ts +++ b/app/frontend/src/i18n.ts @@ -106,7 +106,8 @@ i18n theme: "Design", change_theme: "Design wechseln", theme_light: "Hell", - theme_dark: "Dunkel" + theme_dark: "Dunkel", + llm: "Sprachmodell" }, questioninput: { tokensused: "Token verbraucht", @@ -261,7 +262,8 @@ i18n theme: "Theme", change_theme: "Switch theme", theme_light: "Light", - theme_dark: "Dark" + theme_dark: "Dark", + llm: "language model" }, questioninput: { tokensused: "Token used", @@ -416,7 +418,8 @@ i18n theme: "Design", change_theme: "Design wechseln", theme_light: "Hell", - theme_dark: "Dunkel" + theme_dark: "Dunkel", + llm: "Sprachmodell" }, questioninput: { tokensused: "Token vabrocht", diff --git a/app/frontend/src/pages/layout/Layout.tsx b/app/frontend/src/pages/layout/Layout.tsx index 8465e2e7..b1baaf4a 100644 --- a/app/frontend/src/pages/layout/Layout.tsx +++ b/app/frontend/src/pages/layout/Layout.tsx @@ -12,6 +12,7 @@ import { ApplicationConfig, configApi } from "../../api"; import { SettingsDrawer } from "../../components/SettingsDrawer"; import { FluentProvider, Theme } from '@fluentui/react-components'; import { useStyles, STORAGE_KEYS, adjustTheme } from "./LayoutHelper"; +import { DEFAULTLLM, LLMContext } from "../../components/LLMSelector/LLMContextProvider"; const formatDate = (date: Date) => { let formatted_date = @@ -26,9 +27,11 @@ export const Layout = () => { const navigate = useNavigate() const termsofuseread = localStorage.getItem(STORAGE_KEYS.TERMS_OF_USE_READ) === formatDate(new Date()); const language_pref = (localStorage.getItem(STORAGE_KEYS.SETTINGS_LANGUAGE)) || DEFAULTLANG; + const llm_pref = (localStorage.getItem(STORAGE_KEYS.SETTINGS_LLM)) || DEFAULTLLM; const font_scaling_pref = Number(localStorage.getItem(STORAGE_KEYS.SETTINGS_FONT_SCALING)) || 1; const ligth_theme_pref = localStorage.getItem(STORAGE_KEYS.SETTINGS_IS_LIGHT_THEME) === null ? true : localStorage.getItem(STORAGE_KEYS.SETTINGS_IS_LIGHT_THEME) == 'true'; const { language, setLanguage } = useContext(LanguageContext); + const { LLM, setLLM } = useContext(LLMContext); const { t, i18n } = useTranslation(); const [config, setConfig] = useState({ backend: { @@ -82,6 +85,12 @@ export const Layout = () => { setLanguage(lang); localStorage.setItem(STORAGE_KEYS.SETTINGS_LANGUAGE, lang); }; + const onLLMSelectionChanged = (e: SelectionEvents, selection: OptionOnSelectData) => { + let llm = selection.optionValue || DEFAULTLLM; + setLLM(llm); + localStorage.setItem(STORAGE_KEYS.SETTINGS_LLM, llm); + }; + return ( @@ -122,7 +131,10 @@ export const Layout = () => { fontscale={fontscaling} setFontscale={onFontscaleChange} isLight={isLight} - setTheme={onThemeChange}> + setTheme={onThemeChange} + defaultLLM={llm_pref} + onLLMSelectionChanged={onLLMSelectionChanged} + >
diff --git a/app/frontend/src/pages/layout/LayoutHelper.tsx b/app/frontend/src/pages/layout/LayoutHelper.tsx index 5733cc87..b9ca17e6 100644 --- a/app/frontend/src/pages/layout/LayoutHelper.tsx +++ b/app/frontend/src/pages/layout/LayoutHelper.tsx @@ -25,6 +25,7 @@ export const useStyles = makeStyles({ export const enum STORAGE_KEYS { TERMS_OF_USE_READ = 'TERMS_OF_USE_READ', SETTINGS_LANGUAGE = 'SETTINGS_LANGUAGE', + SETTINGS_LLM = 'SETTINGS_LLM', SETTINGS_FONT_SCALING = 'SETTINGS_FONT_SCALING', SETTINGS_IS_LIGHT_THEME = 'SETTINGS_IS_LIGHT_THEME', VERSION_UPDATE_SEEN = 'VERSION_UPDATE_SEEN' From ba1d2402ab19db936e458a6716a04908a6453edd Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 21 Aug 2024 10:54:28 +0200 Subject: [PATCH 05/34] remove azure dependencies --- app/backend/app.py | 8 +------- app/backend/requirements.txt | 1 - 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/app/backend/app.py b/app/backend/app.py index 7a2b5449..dd570c5d 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -2,9 +2,6 @@ import logging import os from typing import cast - -from azure.monitor.opentelemetry import configure_azure_monitor -from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware from quart import ( Blueprint, @@ -210,12 +207,9 @@ async def setup_clients(): current_app.config[APPCONFIG_KEY] = await initApp() def create_app(): - if os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING"): - configure_azure_monitor() - AioHttpClientInstrumentor().instrument() app = Quart(__name__) app.register_blueprint(bp) - app.asgi_app = OpenTelemetryMiddleware(app.asgi_app) + app.asgi_app = OpenTelemetryMiddleware(app = app.asgi_app) # Level should be one of https://docs.python.org/3/library/logging.html#logging-levels logging.basicConfig(level=os.getenv("APP_LOG_LEVEL", "ERROR")) return app diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index 76ed2fb4..ba31fafa 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -5,7 +5,6 @@ langchain_community tiktoken uvicorn[standard]==0.30.3 aiohttp==3.10.2 -azure-monitor-opentelemetry==1.6.0 opentelemetry-instrumentation-asgi==0.46b0 opentelemetry-instrumentation-requests==0.46b0 opentelemetry-instrumentation-aiohttp-client==0.46b0 From 5350d9b9f9bb5788cb9f28570d07156d1bb3cdb1 Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 21 Aug 2024 10:55:01 +0200 Subject: [PATCH 06/34] remove ressources --- app/backend/ressources/base.json | 3 --- app/backend/ressources/demo.json | 21 --------------------- app/backend/ressources/dev.json | 21 --------------------- app/backend/ressources/local.json | 21 --------------------- app/backend/ressources/nosec.json | 21 --------------------- app/backend/ressources/prod.json | 21 --------------------- 6 files changed, 108 deletions(-) delete mode 100644 app/backend/ressources/base.json delete mode 100644 app/backend/ressources/demo.json delete mode 100644 app/backend/ressources/dev.json delete mode 100644 app/backend/ressources/local.json delete mode 100644 app/backend/ressources/nosec.json delete mode 100644 app/backend/ressources/prod.json diff --git a/app/backend/ressources/base.json b/app/backend/ressources/base.json deleted file mode 100644 index d1733968..00000000 --- a/app/backend/ressources/base.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "version": "1.1.2" -} \ No newline at end of file diff --git a/app/backend/ressources/demo.json b/app/backend/ressources/demo.json deleted file mode 100644 index 78cc1081..00000000 --- a/app/backend/ressources/demo.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "frontend": { - "labels": { - "env_name": "PILOT" - }, - "alternative_logo": false - }, - "backend": { - "enable_auth": true, - "enable_database": true, - "chat":{ - "log_tokens": true - }, - "brainstorm": { - "log_tokens": true - }, - "sum": { - "log_tokens": true - } - } -} \ No newline at end of file diff --git a/app/backend/ressources/dev.json b/app/backend/ressources/dev.json deleted file mode 100644 index d9b75028..00000000 --- a/app/backend/ressources/dev.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "frontend": { - "labels": { - "env_name": "MUC tschibidi-C" - }, - "alternative_logo": true - }, - "backend": { - "enable_auth": true, - "enable_database": true, - "chat":{ - "log_tokens": true - }, - "brainstorm": { - "log_tokens": true - }, - "sum": { - "log_tokens": true - } - } -} \ No newline at end of file diff --git a/app/backend/ressources/local.json b/app/backend/ressources/local.json deleted file mode 100644 index 44df80e0..00000000 --- a/app/backend/ressources/local.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "frontend": { - "labels": { - "env_name": "MUC tschibidi-C" - }, - "alternative_logo": true - }, - "backend": { - "enable_auth": false, - "enable_database": true, - "chat":{ - "log_tokens": true - }, - "brainstorm": { - "log_tokens": true - }, - "sum": { - "log_tokens": true - } - } -} \ No newline at end of file diff --git a/app/backend/ressources/nosec.json b/app/backend/ressources/nosec.json deleted file mode 100644 index b6152c26..00000000 --- a/app/backend/ressources/nosec.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "frontend": { - "labels": { - "env_name": "NOSEC" - }, - "alternative_logo": false - }, - "backend": { - "enable_auth": false, - "enable_database": false, - "chat":{ - "log_tokens": false - }, - "brainstorm": { - "log_tokens": false - }, - "sum": { - "log_tokens": false - } - } -} \ No newline at end of file diff --git a/app/backend/ressources/prod.json b/app/backend/ressources/prod.json deleted file mode 100644 index 5861cf2f..00000000 --- a/app/backend/ressources/prod.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "frontend": { - "labels": { - "env_name": "PROD" - }, - "alternative_logo": false - }, - "backend": { - "enable_auth": true, - "enable_database": true, - "chat":{ - "log_tokens": true - }, - "brainstorm": { - "log_tokens": true - }, - "sum": { - "log_tokens": true - } - } -} \ No newline at end of file From e69e96ca23b5bce313ed160dbd4fc2435212c16a Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 21 Aug 2024 10:55:48 +0200 Subject: [PATCH 07/34] =?UTF-8?q?=F0=9F=9A=AE=20remove=20bicep?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- infra/abbreviations.json | 135 -------------- infra/core/ai/cognitiveservices.bicep | 41 ---- infra/core/db/db.bicep | 48 ----- infra/core/host/appservice.bicep | 105 ----------- infra/core/host/appserviceplan.bicep | 21 --- infra/core/host/authsettingsV2.bicep | 73 -------- infra/core/monitor/applicationinsights.bicep | 17 -- infra/core/monitor/monitoring.bicep | 17 -- infra/main.bicep | 185 ------------------- infra/main.parameters.json | 66 ------- 10 files changed, 708 deletions(-) delete mode 100644 infra/abbreviations.json delete mode 100644 infra/core/ai/cognitiveservices.bicep delete mode 100644 infra/core/db/db.bicep delete mode 100644 infra/core/host/appservice.bicep delete mode 100644 infra/core/host/appserviceplan.bicep delete mode 100644 infra/core/host/authsettingsV2.bicep delete mode 100644 infra/core/monitor/applicationinsights.bicep delete mode 100644 infra/core/monitor/monitoring.bicep delete mode 100644 infra/main.bicep delete mode 100644 infra/main.parameters.json diff --git a/infra/abbreviations.json b/infra/abbreviations.json deleted file mode 100644 index 703e5038..00000000 --- a/infra/abbreviations.json +++ /dev/null @@ -1,135 +0,0 @@ -{ - "analysisServicesServers": "as", - "apiManagementService": "apim-", - "appConfigurationConfigurationStores": "appcs-", - "appManagedEnvironments": "cae-", - "appContainerApps": "ca-", - "authorizationPolicyDefinitions": "policy-", - "automationAutomationAccounts": "aa-", - "blueprintBlueprints": "bp-", - "blueprintBlueprintsArtifacts": "bpa-", - "cacheRedis": "redis-", - "cdnProfiles": "cdnp-", - "cdnProfilesEndpoints": "cdne-", - "cognitiveServicesAccounts": "cog-", - "cognitiveServicesFormRecognizer": "cog-fr-", - "cognitiveServicesTextAnalytics": "cog-ta-", - "computeAvailabilitySets": "avail-", - "computeCloudServices": "cld-", - "computeDiskEncryptionSets": "des", - "computeDisks": "disk", - "computeDisksOs": "osdisk", - "computeGalleries": "gal", - "computeSnapshots": "snap-", - "computeVirtualMachines": "vm", - "computeVirtualMachineScaleSets": "vmss-", - "containerInstanceContainerGroups": "ci", - "containerRegistryRegistries": "cr", - "containerServiceManagedClusters": "aks-", - "databricksWorkspaces": "dbw-", - "dataFactoryFactories": "adf-", - "dataLakeAnalyticsAccounts": "dla", - "dataLakeStoreAccounts": "dls", - "dataMigrationServices": "dms-", - "dBforMySQLServers": "mysql-", - "dBforPostgreSQLServers": "psql-", - "devicesIotHubs": "iot-", - "devicesProvisioningServices": "provs-", - "devicesProvisioningServicesCertificates": "pcert-", - "documentDBDatabaseAccounts": "cosmos-", - "eventGridDomains": "evgd-", - "eventGridDomainsTopics": "evgt-", - "eventGridEventSubscriptions": "evgs-", - "eventHubNamespaces": "evhns-", - "eventHubNamespacesEventHubs": "evh-", - "hdInsightClustersHadoop": "hadoop-", - "hdInsightClustersHbase": "hbase-", - "hdInsightClustersKafka": "kafka-", - "hdInsightClustersMl": "mls-", - "hdInsightClustersSpark": "spark-", - "hdInsightClustersStorm": "storm-", - "hybridComputeMachines": "arcs-", - "insightsActionGroups": "ag-", - "insightsComponents": "appi-", - "keyVaultVaults": "kv-", - "kubernetesConnectedClusters": "arck", - "kustoClusters": "dec", - "kustoClustersDatabases": "dedb", - "logicIntegrationAccounts": "ia-", - "logicWorkflows": "logic-", - "machineLearningServicesWorkspaces": "mlw-", - "managedIdentityUserAssignedIdentities": "id-", - "managementManagementGroups": "mg-", - "migrateAssessmentProjects": "migr-", - "networkApplicationGateways": "agw-", - "networkApplicationSecurityGroups": "asg-", - "networkAzureFirewalls": "afw-", - "networkBastionHosts": "bas-", - "networkConnections": "con-", - "networkDnsZones": "dnsz-", - "networkExpressRouteCircuits": "erc-", - "networkFirewallPolicies": "afwp-", - "networkFirewallPoliciesWebApplication": "waf", - "networkFirewallPoliciesRuleGroups": "wafrg", - "networkFrontDoors": "fd-", - "networkFrontdoorWebApplicationFirewallPolicies": "fdfp-", - "networkLoadBalancersExternal": "lbe-", - "networkLoadBalancersInternal": "lbi-", - "networkLoadBalancersInboundNatRules": "rule-", - "networkLocalNetworkGateways": "lgw-", - "networkNatGateways": "ng-", - "networkNetworkInterfaces": "nic-", - "networkNetworkSecurityGroups": "nsg-", - "networkNetworkSecurityGroupsSecurityRules": "nsgsr-", - "networkNetworkWatchers": "nw-", - "networkPrivateDnsZones": "pdnsz-", - "networkPrivateLinkServices": "pl-", - "networkPublicIPAddresses": "pip-", - "networkPublicIPPrefixes": "ippre-", - "networkRouteFilters": "rf-", - "networkRouteTables": "rt-", - "networkRouteTablesRoutes": "udr-", - "networkTrafficManagerProfiles": "traf-", - "networkVirtualNetworkGateways": "vgw-", - "networkVirtualNetworks": "vnet-", - "networkVirtualNetworksSubnets": "snet-", - "networkVirtualNetworksVirtualNetworkPeerings": "peer-", - "networkVirtualWans": "vwan-", - "networkVpnGateways": "vpng-", - "networkVpnGatewaysVpnConnections": "vcn-", - "networkVpnGatewaysVpnSites": "vst-", - "notificationHubsNamespaces": "ntfns-", - "notificationHubsNamespacesNotificationHubs": "ntf-", - "operationalInsightsWorkspaces": "log-", - "portalDashboards": "dash-", - "powerBIDedicatedCapacities": "pbi-", - "purviewAccounts": "pview-", - "recoveryServicesVaults": "rsv-", - "resourcesResourceGroups": "rg-", - "searchSearchServices": "srch-", - "serviceBusNamespaces": "sb-", - "serviceBusNamespacesQueues": "sbq-", - "serviceBusNamespacesTopics": "sbt-", - "serviceEndPointPolicies": "se-", - "serviceFabricClusters": "sf-", - "signalRServiceSignalR": "sigr", - "sqlManagedInstances": "sqlmi-", - "sqlServers": "sql-", - "sqlServersDataWarehouse": "sqldw-", - "sqlServersDatabases": "sqldb-", - "sqlServersDatabasesStretch": "sqlstrdb-", - "storageStorageAccounts": "st", - "storageStorageAccountsVm": "stvm", - "storSimpleManagers": "ssimp", - "streamAnalyticsCluster": "asa-", - "synapseWorkspaces": "syn", - "synapseWorkspacesAnalyticsWorkspaces": "synw", - "synapseWorkspacesSqlPoolsDedicated": "syndp", - "synapseWorkspacesSqlPoolsSpark": "synsp", - "timeSeriesInsightsEnvironments": "tsi-", - "webServerFarms": "plan-", - "webSitesAppService": "app-", - "webSitesAppServiceEnvironment": "ase-", - "webSitesFunctions": "func-", - "webStaticSites": "stapp-" -} diff --git a/infra/core/ai/cognitiveservices.bicep b/infra/core/ai/cognitiveservices.bicep deleted file mode 100644 index 8efb10ac..00000000 --- a/infra/core/ai/cognitiveservices.bicep +++ /dev/null @@ -1,41 +0,0 @@ -param name string -param location string = resourceGroup().location -param tags object = {} - -param customSubDomainName string = name -param deployments array = [] -param kind string = 'OpenAI' -param publicNetworkAccess string = 'Enabled' -param sku object = { - name: 'S0' -} - -resource account 'Microsoft.CognitiveServices/accounts@2023-05-01' = { - name: name - location: location - tags: tags - kind: kind - properties: { - customSubDomainName: customSubDomainName - publicNetworkAccess: publicNetworkAccess - } - sku: sku -} - -@batchSize(1) -resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for deployment in deployments: { - parent: account - name: deployment.name - properties: { - model: deployment.model - raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null - } - sku: contains(deployment, 'sku') ? deployment.sku : { - name: 'Standard' - capacity: 20 - } -}] - -output endpoint string = account.properties.endpoint -output id string = account.id -output name string = account.name diff --git a/infra/core/db/db.bicep b/infra/core/db/db.bicep deleted file mode 100644 index 31f3feed..00000000 --- a/infra/core/db/db.bicep +++ /dev/null @@ -1,48 +0,0 @@ -param location string = resourceGroup().location -param tags object = {} -param administratorLogin string -param administratorLoginPassword string -param name string - - -resource symbolicname 'Microsoft.DBforPostgreSQL/flexibleServers@2022-12-01' = { - name: name - location: location - tags: tags - sku: { - name: 'Standard_B1ms' - tier: 'Burstable' - } - properties: { - administratorLogin: administratorLogin - administratorLoginPassword: administratorLoginPassword - authConfig: { - activeDirectoryAuth: 'Disabled' - passwordAuth: 'Enabled' - } - availabilityZone: '1' - backup: { - backupRetentionDays: 7 - geoRedundantBackup: 'Disabled' - } - createMode: 'Default' - dataEncryption: { - type: 'SystemManaged' - } - highAvailability: { - mode: 'Disabled' - } - maintenanceWindow: { - customWindow: 'Disabled' - dayOfWeek: 0 - startHour: 0 - startMinute: 0 - } - pointInTimeUTC: 'string' - replicationRole: 'Primary' - storage: { - storageSizeGB: 32 - } - version: '15' - } -} diff --git a/infra/core/host/appservice.bicep b/infra/core/host/appservice.bicep deleted file mode 100644 index 9ba5ec91..00000000 --- a/infra/core/host/appservice.bicep +++ /dev/null @@ -1,105 +0,0 @@ -param name string -param location string = resourceGroup().location -param tags object = {} -param ssoSecret string - -// Reference Properties -param applicationInsightsName string = '' -param appServicePlanId string -param keyVaultName string = '' -param managedIdentity bool = !empty(keyVaultName) - -// Runtime Properties -@allowed([ - 'dotnet', 'dotnetcore', 'dotnet-isolated', 'node', 'python', 'java', 'powershell', 'custom' -]) -param runtimeName string -param runtimeNameAndVersion string = '${runtimeName}|${runtimeVersion}' -param runtimeVersion string - -// Microsoft.Web/sites Properties -param kind string = 'app,linux' - -// Microsoft.Web/sites/config -param allowedOrigins array = [] -param alwaysOn bool = true -param appCommandLine string = '' -param appSettings object = {} -param clientAffinityEnabled bool = false -param enableOryxBuild bool = contains(kind, 'linux') -param functionAppScaleLimit int = -1 -param linuxFxVersion string = runtimeNameAndVersion -param minimumElasticInstanceCount int = -1 -param numberOfWorkers int = -1 -param scmDoBuildDuringDeployment bool = false -param use32BitWorkerProcess bool = false -param ftpsState string = 'FtpsOnly' -param healthCheckPath string - -resource appService 'Microsoft.Web/sites@2022-03-01' = { - name: name - location: location - tags: tags - kind: kind - properties: { - serverFarmId: appServicePlanId - siteConfig: { - linuxFxVersion: linuxFxVersion - alwaysOn: alwaysOn - ftpsState: ftpsState - appCommandLine: appCommandLine - numberOfWorkers: numberOfWorkers != -1 ? numberOfWorkers : null - minimumElasticInstanceCount: minimumElasticInstanceCount != -1 ? minimumElasticInstanceCount : null - minTlsVersion: '1.2' - use32BitWorkerProcess: use32BitWorkerProcess - functionAppScaleLimit: functionAppScaleLimit != -1 ? functionAppScaleLimit : null - healthCheckPath: healthCheckPath - cors: { - allowedOrigins: union([ 'https://portal.azure.com', 'https://ms.portal.azure.com' ], allowedOrigins) - } - } - clientAffinityEnabled: clientAffinityEnabled - httpsOnly: true - } - - identity: { type: managedIdentity ? 'SystemAssigned' : 'None' } - - resource configAppSettings 'config' = { - name: 'appsettings' - properties: union(appSettings, - { - SCM_DO_BUILD_DURING_DEPLOYMENT: string(scmDoBuildDuringDeployment) - ENABLE_ORYX_BUILD: string(enableOryxBuild) - SSO_AUTHENTICATION_SECRET: ssoSecret - ORYX_DISABLE_TELEMETRY: 'true' - }, - runtimeName == 'python' ? { PYTHON_ENABLE_GUNICORN_MULTIWORKERS: 'true'} : {}, - !empty(applicationInsightsName) ? { APPLICATIONINSIGHTS_CONNECTION_STRING: applicationInsights.properties.ConnectionString } : {}, - !empty(keyVaultName) ? { AZURE_KEY_VAULT_ENDPOINT: keyVault.properties.vaultUri } : {}) - } - - resource configLogs 'config' = { - name: 'logs' - properties: { - applicationLogs: { fileSystem: { level: 'Verbose' } } - detailedErrorMessages: { enabled: true } - failedRequestsTracing: { enabled: true } - httpLogs: { fileSystem: { enabled: true, retentionInDays: 1, retentionInMb: 35 } } - } - dependsOn: [ - configAppSettings - ] - } -} - -resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' existing = if (!(empty(keyVaultName))) { - name: keyVaultName -} - -resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = if (!empty(applicationInsightsName)) { - name: applicationInsightsName -} - -output identityPrincipalId string = managedIdentity ? appService.identity.principalId : '' -output name string = appService.name -output uri string = 'https://${appService.properties.defaultHostName}' diff --git a/infra/core/host/appserviceplan.bicep b/infra/core/host/appserviceplan.bicep deleted file mode 100644 index c444f406..00000000 --- a/infra/core/host/appserviceplan.bicep +++ /dev/null @@ -1,21 +0,0 @@ -param name string -param location string = resourceGroup().location -param tags object = {} - -param kind string = '' -param reserved bool = true -param sku object - -resource appServicePlan 'Microsoft.Web/serverfarms@2022-03-01' = { - name: name - location: location - tags: tags - sku: sku - kind: kind - properties: { - reserved: reserved - } -} - -output id string = appServicePlan.id -output name string = appServicePlan.name diff --git a/infra/core/host/authsettingsV2.bicep b/infra/core/host/authsettingsV2.bicep deleted file mode 100644 index d2d7c2f0..00000000 --- a/infra/core/host/authsettingsV2.bicep +++ /dev/null @@ -1,73 +0,0 @@ -param siteName string -param location string = resourceGroup().location -param ssoConfiguration string - -resource authsettingsV 'Microsoft.Web/sites/config@2022-09-01' = { - name: '${siteName}/authsettingsV2' - location: location - properties: { - platform: { - enabled: true - runtimeVersion: '~1' - } - globalValidation: { - requireAuthentication: true - unauthenticatedClientAction: 'RedirectToLoginPage' - redirectToProvider: 'LHMSSO' - excludedPaths: ['/health' ] - } - identityProviders: { - azureActiveDirectory: { - enabled: true - login: { - disableWWWAuthenticate: false - } - } - customOpenIdConnectProviders: { - LHMSSO: { - registration: { - clientId: 'mucgpt' - clientCredential: { - clientSecretSettingName: 'SSO_AUTHENTICATION_SECRET' - } - openIdConnectConfiguration: { - wellKnownOpenIdConfiguration: ssoConfiguration - } - } - login: { - scopes: [ - 'openid' - ] - } - } - } - } - login: { - tokenStore: { - enabled: true - tokenRefreshExtensionHours: 72 - } - preserveUrlFragmentsForLogins: false - allowedExternalRedirectUrls: [] - cookieExpiration: { - convention: 'FixedTime' - timeToExpiration: '08:00:00' - } - nonce: { - validateNonce: true - nonceExpirationInterval: '00:05:00' - } - } - httpSettings: { - requireHttps: true - routes: { - apiPrefix: '/.auth' - } - forwardProxy: { - convention: 'NoProxy' - } - } - } -} - - diff --git a/infra/core/monitor/applicationinsights.bicep b/infra/core/monitor/applicationinsights.bicep deleted file mode 100644 index 0d9bc474..00000000 --- a/infra/core/monitor/applicationinsights.bicep +++ /dev/null @@ -1,17 +0,0 @@ -param name string -param location string = resourceGroup().location -param tags object = {} - -resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = { - name: name - location: location - tags: tags - kind: 'web' - properties: { - Application_Type: 'web' - } -} - -output connectionString string = applicationInsights.properties.ConnectionString -output instrumentationKey string = applicationInsights.properties.InstrumentationKey -output name string = applicationInsights.name diff --git a/infra/core/monitor/monitoring.bicep b/infra/core/monitor/monitoring.bicep deleted file mode 100644 index 0143363f..00000000 --- a/infra/core/monitor/monitoring.bicep +++ /dev/null @@ -1,17 +0,0 @@ -param applicationInsightsName string -param location string = resourceGroup().location -param tags object = {} - -module applicationInsights 'applicationinsights.bicep' = { - name: 'applicationinsights' - params: { - name: applicationInsightsName - location: location - tags: tags - } -} - -output applicationInsightsConnectionString string = applicationInsights.outputs.connectionString -output applicationInsightsInstrumentationKey string = applicationInsights.outputs.instrumentationKey -output applicationInsightsName string = applicationInsights.outputs.name - diff --git a/infra/main.bicep b/infra/main.bicep deleted file mode 100644 index a1be9fd0..00000000 --- a/infra/main.bicep +++ /dev/null @@ -1,185 +0,0 @@ -targetScope = 'subscription' - -@minLength(1) -@maxLength(64) -@description('Name of the the environment which is used to generate a short unique hash used in all resources.') -param environmentName string - -@minLength(1) -@description('Primary location for all resources') -param location string - -param appServicePlanName string = '' -param backendServiceName string = '' -param resourceGroupName string = '' - -param applicationInsightsName string = '' - -@secure() -param ssoSecret string -param ssoIssuer string -param configName string -param tagStage string -param dbHost string = '' -param dbName string = '' -param dbUser string = '' -param backendSkuName string -param backendCapacaty int -@secure() -param dbPassword string = '' - -param openAiServiceName string = '' -param openAiResourceGroupName string = '' -@description('Location for the OpenAI resource group') -@allowed(['canadaeast', 'eastus', 'francecentral', 'japaneast', 'northcentralus', 'westeurope']) -@metadata({ - azd: { - type: 'location' - } -}) -param openAiResourceGroupLocation string - -param openAiSkuName string = 'S0' - -param chatGptDeploymentName string // Set in main.parameters.json -param chatGptDeploymentCapacity int = 70 -param chatGptModelName string = 'gpt-35-turbo' -param chatGptModelVersion string = '0301' - -@description('Use Application Insights for monitoring and performance tracing') -param useApplicationInsights bool = false - -var ssoConfiguration = concat(ssoIssuer, '/.well-known/openid-configuration') -var abbrs = loadJsonContent('abbreviations.json') -var resourceToken = toLower(uniqueString(subscription().id, environmentName, location)) -var tags = {'azd-env-name': environmentName,'BusinessCriticality': 'low','BusinessUnit': 'ITM-KM-DI-KI','CostCenter': '313-2-014400','ExpiryDate': '31.12.2999','RequestNumber': 'Nicht vorhanden','ServiceName': 'MUCGPT','ServiceOwner': 'Michael Jaumann - ITM-KM-DI-KI','Stage': tagStage} - -// Organize resources in a resource group -resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = { - name: !empty(resourceGroupName) ? resourceGroupName : '${abbrs.resourcesResourceGroups}${environmentName}' - location: location - tags: tags -} - -resource openAiResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = if (!empty(openAiResourceGroupName)) { - name: !empty(openAiResourceGroupName) ? openAiResourceGroupName : resourceGroup.name -} - - -// Monitor application with Azure Monitor -module monitoring 'core/monitor/monitoring.bicep' = if (useApplicationInsights) { - name: 'monitoring' - scope: resourceGroup - params: { - location: location - tags: tags - applicationInsightsName: !empty(applicationInsightsName) ? applicationInsightsName : '${abbrs.insightsComponents}${resourceToken}' - } -} - -// Create an App Service Plan to group applications under the same payment plan and SKU -module appServicePlan 'core/host/appserviceplan.bicep' = { - name: 'appserviceplan' - scope: resourceGroup - params: { - name: !empty(appServicePlanName) ? appServicePlanName : '${abbrs.webServerFarms}${resourceToken}' - location: location - tags: tags - sku: { - name: backendSkuName - capacity: backendCapacaty - } - kind: 'linux' - } -} - -// Create an App Service Plan to group applications under the same payment plan and SKU -module db 'core/db/db.bicep' = { - name: 'db' - scope: resourceGroup - params: { - name: dbHost - location: location - tags: tags - administratorLogin: dbUser - administratorLoginPassword: dbPassword - } -} - -// The application frontend -module backend 'core/host/appservice.bicep' = { - name: 'web' - scope: resourceGroup - params: { - name: !empty(backendServiceName) ? backendServiceName : '${abbrs.webSitesAppService}backend-${resourceToken}' - location: location - tags: union(tags, { 'azd-service-name': 'backend' }) - appServicePlanId: appServicePlan.outputs.id - runtimeName: 'python' - runtimeVersion: '3.10' - appCommandLine: 'python3 -m gunicorn main:app' - scmDoBuildDuringDeployment: true - managedIdentity: true - ssoSecret: ssoSecret - healthCheckPath: '/health' - appSettings: { - AZURE_OPENAI_SERVICE: openAi.outputs.name - AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGptDeploymentName - AZURE_OPENAI_CHATGPT_MODEL: chatGptModelName - APPLICATIONINSIGHTS_CONNECTION_STRING: useApplicationInsights ? monitoring.outputs.applicationInsightsConnectionString : '' - SSO_ISSUER: ssoIssuer - CONFIG_NAME: configName - DB_HOST: concat(dbHost, '.postgres.database.azure.com') - DB_NAME: dbName - DB_USER: dbUser - DB_PASSWORD: dbPassword - } - } -} - -module openAi 'core/ai/cognitiveservices.bicep' = { - name: 'openai' - scope: openAiResourceGroup - params: { - name: !empty(openAiServiceName) ? openAiServiceName : '${abbrs.cognitiveServicesAccounts}${resourceToken}' - location: openAiResourceGroupLocation - tags: tags - sku: { - name: openAiSkuName - } - deployments: [ - { - name: chatGptDeploymentName - model: { - format: 'OpenAI' - name: chatGptModelName - version: chatGptModelVersion - } - sku: { - name: 'Standard' - capacity: chatGptDeploymentCapacity - } - } - ] - } -} - -module authsettingsV2 'core/host/authsettingsV2.bicep' = { - name: 'authsettingsV2' - scope: resourceGroup - params: { - location: location - siteName: backend.outputs.name - ssoConfiguration: ssoConfiguration - } -} - -output AZURE_LOCATION string = location -output AZURE_TENANT_ID string = tenant().tenantId -output AZURE_RESOURCE_GROUP string = resourceGroup.name - -output AZURE_OPENAI_SERVICE string = openAi.outputs.name -output AZURE_OPENAI_RESOURCE_GROUP string = openAiResourceGroup.name -output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = chatGptDeploymentName -output AZURE_OPENAI_CHATGPT_MODEL string = chatGptModelName -output BACKEND_URI string = backend.outputs.uri diff --git a/infra/main.parameters.json b/infra/main.parameters.json deleted file mode 100644 index 9b2d3d26..00000000 --- a/infra/main.parameters.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", - "contentVersion": "1.0.0.0", - "parameters": { - "environmentName": { - "value": "${AZURE_ENV_NAME}" - }, - "resourceGroupName": { - "value": "${AZURE_RESOURCE_GROUP}" - }, - "location": { - "value": "${AZURE_LOCATION}" - }, - "principalId": { - "value": "${AZURE_PRINCIPAL_ID}" - }, - "openAiServiceName": { - "value": "${AZURE_OPENAI_SERVICE}" - }, - "openAiResourceGroupName": { - "value": "${AZURE_OPENAI_RESOURCE_GROUP}" - }, - "openAiSkuName": { - "value": "S0" - }, - "chatGptDeploymentName": { - "value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT=chat}" - }, - "chatGptDeploymentCapacity": { - "value":"${AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY}" - }, - "useApplicationInsights": { - "value": "${AZURE_USE_APPLICATION_INSIGHTS=false}" - }, - "ssoSecret": { - "value": "${SSO_AUTHENTICATION_SECRET}" - }, - "ssoIssuer": { - "value": "${SSO_ISSUER}" - }, - "backendSkuName": { - "value": "${BACKEND_SKU}" - }, - "backendCapacaty": { - "value": "${BACKEND_CAPACITY}" - }, - "configName": { - "value": "${CONFIG_NAME}" - }, - "tagStage": { - "value": "${TAG_STAGE}" - }, - "dbHost": { - "value": "${DB_HOST}" - }, - "dbName": { - "value": "${DB_NAME}" - }, - "dbUser": { - "value": "${DB_USER}" - }, - "dbPassword": { - "value": "${DB_PASSWORD}" - } - } -} From 5915e7b39e0e170d17d32a8dadbec112cfdffcf0 Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 21 Aug 2024 19:48:10 +0200 Subject: [PATCH 08/34] remove azure stuff and terraform files --- app/start.ps1 | 75 ------------------------------------------ app/start.sh | 66 ------------------------------------- azure.yaml | 22 ------------- infra/main.tf | 78 -------------------------------------------- infra/provider.tf | 30 ----------------- infra/variables.tf | 81 ---------------------------------------------- 6 files changed, 352 deletions(-) delete mode 100644 app/start.ps1 delete mode 100755 app/start.sh delete mode 100644 azure.yaml delete mode 100644 infra/main.tf delete mode 100644 infra/provider.tf delete mode 100644 infra/variables.tf diff --git a/app/start.ps1 b/app/start.ps1 deleted file mode 100644 index 8a582eb2..00000000 --- a/app/start.ps1 +++ /dev/null @@ -1,75 +0,0 @@ -Write-Host "" -Write-Host "Loading azd .env file from current environment" -Write-Host "" - -foreach ($line in (& azd env get-values)) { - if ($line -match "([^=]+)=(.*)") { - $key = $matches[1] - $value = $matches[2] -replace '^"|"$' - Set-Item -Path "env:\$key" -Value $value - } -} - -if ($LASTEXITCODE -ne 0) { - Write-Host "Failed to load environment variables from azd environment" - exit $LASTEXITCODE -} - - -Write-Host 'Creating python virtual environment "backend/backend_env"' -$pythonCmd = Get-Command python -ErrorAction SilentlyContinue -if (-not $pythonCmd) { - # fallback to python3 if python not found - $pythonCmd = Get-Command python3 -ErrorAction SilentlyContinue -} -Start-Process -FilePath ($pythonCmd).Source -ArgumentList "-m venv ./backend/backend_env" -Wait -NoNewWindow - -Write-Host "" -Write-Host "Restoring backend python packages" -Write-Host "" - -Set-Location backend -$venvPythonPath = "./backend_env/scripts/python.exe" -if (Test-Path -Path "/usr") { - # fallback to Linux venv path - $venvPythonPath = "./backend_env/bin/python" -} - -Start-Process -FilePath $venvPythonPath -ArgumentList "-m pip install -r requirements.txt" -Wait -NoNewWindow -if ($LASTEXITCODE -ne 0) { - Write-Host "Failed to restore backend python packages" - exit $LASTEXITCODE -} - -Write-Host "" -Write-Host "Restoring frontend npm packages" -Write-Host "" -Set-Location ../frontend -npm install -if ($LASTEXITCODE -ne 0) { - Write-Host "Failed to restore frontend npm packages" - exit $LASTEXITCODE -} - -Write-Host "" -Write-Host "Building frontend" -Write-Host "" -npm run build -if ($LASTEXITCODE -ne 0) { - Write-Host "Failed to build frontend" - exit $LASTEXITCODE -} - -Write-Host "" -Write-Host "Starting backend" -Write-Host "" -Set-Location ../backend - -$port = 50505 -$hostname = "localhost" -Start-Process -FilePath $venvPythonPath -ArgumentList "-m quart --app main:app run --port $port --host $hostname --reload" -Wait -NoNewWindow - -if ($LASTEXITCODE -ne 0) { - Write-Host "Failed to start backend" - exit $LASTEXITCODE -} diff --git a/app/start.sh b/app/start.sh deleted file mode 100755 index a62bb0f3..00000000 --- a/app/start.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/sh - -echo "" -echo "Loading azd .env file from current environment" -echo "" - -while IFS='=' read -r key value; do - value=$(echo "$value" | sed 's/^"//' | sed 's/"$//') - export "$key=$value" -done < Date: Wed, 21 Aug 2024 19:48:38 +0200 Subject: [PATCH 09/34] run without docker --- README.md | 69 ++++++++++---------------------- app/backend/core/confighelper.py | 11 +++-- app/backend/init_app.py | 4 +- app/backend/requirements.txt | 6 +-- 4 files changed, 32 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index 6ddedc16..0a9a91e5 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,6 @@ The documentation project is built with technologies we use in our projects (see ### Backend: * [Python 3.9, 3.10 or 3.11](https://www.python.org/downloads/) * [Quart](https://pgjones.gitlab.io/quart/) -* [Azure OpenAI](https://azure.microsoft.com/de-de/products/ai-services/openai-service) * [LangChain](https://www.langchain.com/) ### Frontend: @@ -43,10 +42,10 @@ The documentation project is built with technologies we use in our projects (see * [Javascript](https://wiki.selfhtml.org/wiki/JavaScript) ### Deployment: - * [Azure Developer CLI](https://learn.microsoft.com/en-us/azure/developer/azure-developer-cli/install-azd?tabs=winget-windows%2Cbrew-mac%2Cscript-linux&pivots=os-windows) * [Node.js 14+](https://nodejs.org/en/download/package-manager) * [Git](https://git-scm.com/downloads) - * [Powershell 7+ (pwsh)](https://github.com/powershell/powershell) + * Python 12 + * Docker ## Table of contents * [Built With](#built-with) @@ -64,52 +63,26 @@ The documentation project is built with technologies we use in our projects (see See the [open issues](https://github.com/it-at-m/mucgpt/issues) for a full list of proposed features (and known issues). +## Run + Configure your environment in [config/default.json](config/default.json). Insert Model Endpoint and API Key for your connection to an OpenAI completion endpoint or an Azure OpenAI completions endpoint. +### Run locally +``` +cd app\backend +pip install --no-cache-dir --upgrade -r requirements.txt +cd ..\frontend +npm run build +cd ..\backend +$env:MUCGPT_CONFIG="path to default.json" +$env:MUCGPT_BASE_CONFIG="path to base.json" +python -m quart --app main:app run +``` + + +### Run with docker +1. Build an Image + ``` docker build --tag mucgpt-local . --build-arg fromconfig="./config/default.json"``` +2. Run the image ```docker run --detach --publish 8080:8000 mucgpt-local``` -## Set up on Azure -As this project bases on a template of Microsoft Azure see also [here](https://github.com/Azure-Samples/azure-search-openai-demo?tab=readme-ov-file#azure-deployment) for the deployment documentation. -### You need the following requirements to set up MUCGPT on Azure: -* Azure account -* Azure subscription with access enabled for the Azure OpenAI service -* Account Permissions: - * `Microsoft.Authorization/roleAssignments/write` - * Role Based Access Control Administrator, User Access Administrator, or Owner - * subscription-level permissions - * `Microsoft.Resources/deployments/write` on the subscription level - - -### Cost estimation: -Pricing varies per region and usage, so it isn't possible to predict exact costs for your usage. However, you can try the [Azure pricing calculator](https://azure.microsoft.com/en-us/pricing/calculator/) for the resources below. -* Azure App Service -* Azure OpenAI -* Flexibler Azure Database for PostgreSQL-Server -* App Service-Plan - -### Deploying -1. Install the [required tools](#built-with) -2. Clone the repository with the command `git clone https://github.com/it-at-m/mucgpt` and switch in your terminal to the folder -3. Login to your Azure account: `azd auth login` -4. Create a new azd environemnt with `azd env new`. Enter a name that will be used for the resource group. This will create a new folder in the `.azure` folder, and set it as the active environment for any calls to `azd` going forward. -5. (Optional) This is the point where you can customize the deployment by setting environment variables, in order to use existing resources, enable optional features (such as auth or vision), or deploy to free tiers. -6. Run `azd up` - This will provision Azure resources and deploy this sample to those resources. -7. After the application has been successfully deployed you will see a URL printed to the console. Click that URL to interact with the application in your browser. It will look like the following: -![](/docs/endpoint.png) - > **_NOTE:_** It may take 5-10 minutes after you see 'SUCCESS' for the application to be fully deployed. If you see a "Python Developer" welcome screen or an error page, then wait a bit and refresh the page. - -### Deploying again -If you've only changed the backend/frontend code in the `app` folder, then you don't need to re-provision the Azure resources. You can just run: - -`azd deploy` - -If you've changed the infrastructure files (`infra` folder or `azure.yaml`), then you'll need to re-provision the Azure resources. You can do that by running: - -`azd up` - -### Running locally -You can only run locally after having successfully run the `azd up` command. If you haven't yet, follow the steps in [Deploying](#deploying) above. - -1. Run `azd auth login` -2. Change dir to app -3. Run `./start.ps1` or `./start.sh` to start the app ## Documentation ![Architecture](docs/appcomponents_en.png) diff --git a/app/backend/core/confighelper.py b/app/backend/core/confighelper.py index 80213cc5..b8f3b6f9 100644 --- a/app/backend/core/confighelper.py +++ b/app/backend/core/confighelper.py @@ -6,14 +6,13 @@ class ConfigHelper: """Loads an available configuration. """ - def __init__(self, base_path: str, env: str, base_config_name: str = "base"): - self.base_path = base_path - self.base_config_name = base_config_name - self.env = env + def __init__(self, env_config: str, base_config: str = "base.json"): + self.base_config = base_config + self.env_config = env_config def loadData(self) -> Config: - with open(self.base_path + self.env + ".json") as f: + with open(self.env_config) as f: env_config = json.load(f) - with open(self.base_path + self.base_config_name + ".json") as f: + with open(self.base_config) as f: base_config = json.load(f) result_dict = dict(env_config,**base_config) return to_typed_config(result_dict) \ No newline at end of file diff --git a/app/backend/init_app.py b/app/backend/init_app.py index dee97eb2..d99f30e1 100644 --- a/app/backend/init_app.py +++ b/app/backend/init_app.py @@ -54,7 +54,9 @@ async def initApp() -> AppConfig: """ # read enviornment config - config_helper = ConfigHelper(base_path=os.path.dirname(os.path.realpath(__file__))+"/", env="config", base_config_name="base") + env_config = os.environ['MUCGPT_CONFIG'] if "MUCGPT_CONFIG" in os.environ else os.path.dirname(os.path.realpath(__file__))+"/config.json" + base_config = os.environ['MUCGPT_BASE_CONFIG'] if "MUCGPT_BASE_CONFIG" in os.environ is not None else os.path.dirname(os.path.realpath(__file__))+"/base.json" + config_helper = ConfigHelper(env_config=env_config, base_config=base_config) cfg = config_helper.loadData() # Set up authentication helper auth_helper = AuthentificationHelper( diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index ba31fafa..f3459d70 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -5,9 +5,9 @@ langchain_community tiktoken uvicorn[standard]==0.30.3 aiohttp==3.10.2 -opentelemetry-instrumentation-asgi==0.46b0 -opentelemetry-instrumentation-requests==0.46b0 -opentelemetry-instrumentation-aiohttp-client==0.46b0 +opentelemetry-instrumentation-asgi==0.47b0 +opentelemetry-instrumentation-requests +opentelemetry-instrumentation-aiohttp-client werkzeug==3.0.3 joserfc requests From 7d3a2177f874ea423cfa5832a4ff4e1cb8ba156a Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 21 Aug 2024 21:57:07 +0200 Subject: [PATCH 10/34] =?UTF-8?q?=F0=9F=94=AE=20select=20model=20from=20co?= =?UTF-8?q?nfig=20in=20backend?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/backend/app.py | 14 ++++++- app/backend/core/types/AppConfig.py | 4 +- app/backend/core/types/Config.py | 4 ++ app/frontend/src/api/models.ts | 11 +++--- .../ChatsettingsDrawer/ChatsettingsDrawer.tsx | 13 +++---- .../LLMSelector/LLMContextProvider.tsx | 9 +++-- .../components/LLMSelector/LLMSelector.tsx | 22 +++++------ .../QuestionInput/QuestionInput.tsx | 5 ++- .../SettingsDrawer/SettingsDrawer.tsx | 18 ++++----- .../src/components/SumInput/SumInput.tsx | 8 ++-- app/frontend/src/index.tsx | 5 ++- app/frontend/src/pages/chat/Chat.tsx | 1 + app/frontend/src/pages/layout/Layout.tsx | 38 +++++++++++++------ 13 files changed, 94 insertions(+), 58 deletions(-) diff --git a/app/backend/app.py b/app/backend/app.py index dd570c5d..7ef9b1cf 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -1,7 +1,7 @@ import json import logging import os -from typing import cast +from typing import List, cast from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware from quart import ( Blueprint, @@ -15,6 +15,7 @@ send_from_directory, ) +from core.types.Config import ModelsConfig, ModelsDTO from core.authentification import AuthentificationHelper, AuthError from core.helper import format_as_ndjson from core.modelhelper import num_tokens_from_message @@ -139,7 +140,16 @@ async def chat(): @bp.route("/config", methods=["GET"]) async def getConfig(): cfg = get_config_and_authentificate() - return jsonify(cfg["configuration_features"]) + frontend_features = cfg["configuration_features"]["frontend"] + models= cast(List[ModelsConfig], cfg["configuration_features"]["backend"]["models"]) + models_dto_list = [] + for model in models: + dto = ModelsDTO(model_name=model["model_name"], max_tokens=model["max_tokens"]) + models_dto_list.append(dto) + return jsonify({ + "frontend": frontend_features, + "models": models_dto_list + }) @bp.route("/statistics", methods=["GET"]) async def getStatistics(): diff --git a/app/backend/core/types/AppConfig.py b/app/backend/core/types/AppConfig.py index 88520801..44deae5f 100644 --- a/app/backend/core/types/AppConfig.py +++ b/app/backend/core/types/AppConfig.py @@ -1,4 +1,4 @@ -from typing import TypedDict +from typing import List, TypedDict from brainstorm.brainstorm import Brainstorm from chat.chat import Chat from core.authentification import AuthentificationHelper @@ -14,6 +14,6 @@ class AppConfig(TypedDict): sum_approaches: Summarize brainstorm_approaches: Brainstorm authentification_client: AuthentificationHelper - configuration_features: Config + configuration_features: List[Config] repository: Repository backend_config: BackendConfig \ No newline at end of file diff --git a/app/backend/core/types/Config.py b/app/backend/core/types/Config.py index 828a9e2f..83aeddf0 100644 --- a/app/backend/core/types/Config.py +++ b/app/backend/core/types/Config.py @@ -13,6 +13,10 @@ class ModelsConfig(TypedDict): api_version: str max_tokens: int +class ModelsDTO(TypedDict): + model_name: str + max_tokens: int + class SSOConfig(TypedDict): sso_issuer: str role: str diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts index d439bf90..fb266720 100644 --- a/app/frontend/src/api/models.ts +++ b/app/frontend/src/api/models.ts @@ -36,20 +36,21 @@ export type BrainstormRequest = { }; export interface ApplicationConfig { - backend: Backend; + models: Model[]; frontend: Frontend; version: string; } -export interface Backend { - enable_auth: boolean; -} - export interface Frontend { alternative_logo: boolean; labels: Labels; } +export interface Model { + max_tokens: number; + model_name: string; +} + export interface Labels { env_name: string; } diff --git a/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx b/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx index a7559640..c7041a85 100644 --- a/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx +++ b/app/frontend/src/components/ChatsettingsDrawer/ChatsettingsDrawer.tsx @@ -1,7 +1,5 @@ -import { ChatSettings24Regular, ChatWarning24Regular, CheckboxWarning24Regular, Delete24Regular, Dismiss24Regular } from "@fluentui/react-icons"; +import { ChatSettings24Regular, ChatWarning24Regular, Dismiss24Regular } from "@fluentui/react-icons"; import { - DrawerHeader, - DrawerHeaderTitle, OverlayDrawer, Button, Slider, @@ -12,13 +10,13 @@ import { InfoLabel, Tooltip, Textarea, - TextareaOnChangeData, - Badge + TextareaOnChangeData } from "@fluentui/react-components"; import styles from "./ChatsettingsDrawer.module.css"; -import { useCallback, useState } from "react"; +import { useCallback, useContext, useState } from "react"; import { useTranslation } from 'react-i18next'; +import { LLMContext } from "../LLMSelector/LLMContextProvider"; interface Props { temperature: number; setTemperature: (temp: number, id: number) => void; @@ -32,6 +30,7 @@ interface Props { export const ChatsettingsDrawer = ({ temperature, setTemperature, max_tokens, setMaxTokens, systemPrompt, setSystemPrompt, current_id }: Props) => { const [isOpen, setIsOpen] = useState(false); const { t, i18n } = useTranslation(); + const { LLM } = useContext(LLMContext) const onClickRightButton = useCallback(() => { setIsOpen(true); }, []) @@ -42,7 +41,7 @@ export const ChatsettingsDrawer = ({ temperature, setTemperature, max_tokens, se const max_tokensID = useId("input-max_tokens"); const min_max_tokens = 10; - const max_max_tokens = 4000; + const max_max_tokens = LLM.max_tokens; const min_temp = 0; const max_temp = 1; diff --git a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx index 72d59a71..c1abb484 100644 --- a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx +++ b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx @@ -1,16 +1,17 @@ // Context.js import React, { Dispatch, SetStateAction, useState } from "react"; +import { Model } from "../../api"; interface ILLMProvider { - LLM: string; - setLLM: Dispatch>; + LLM: Model; + setLLM: Dispatch>; } export const DEFAULTLLM = "GPT-4o-mini"; -export const LLMContext = React.createContext({ LLM: DEFAULTLLM, setLLM: () => { } }); +export const LLMContext = React.createContext({ LLM: { model_name: DEFAULTLLM, max_tokens: 0 }, setLLM: () => { } }); export const LLMContextProvider = (props: React.PropsWithChildren<{}>) => { - const [LLM, setLLM] = useState(DEFAULTLLM); + const [LLM, setLLM] = useState({ model_name: DEFAULTLLM, max_tokens: 0 }); return ( diff --git a/app/frontend/src/components/LLMSelector/LLMSelector.tsx b/app/frontend/src/components/LLMSelector/LLMSelector.tsx index e78fa87f..31ba027a 100644 --- a/app/frontend/src/components/LLMSelector/LLMSelector.tsx +++ b/app/frontend/src/components/LLMSelector/LLMSelector.tsx @@ -4,6 +4,7 @@ import { Option, } from "@fluentui/react-components"; import { SelectionEvents, OptionOnSelectData } from "@fluentui/react-combobox"; +import { Model } from "../../api"; const useStyles = makeStyles({ root: { @@ -17,10 +18,11 @@ const useStyles = makeStyles({ interface Props { onSelectionChange: (e: SelectionEvents, selection: OptionOnSelectData) => void; defaultLLM: string; + options: Model[]; } -export const LLMSelector = ({ onSelectionChange, defaultLLM }: Props) => { +export const LLMSelector = ({ onSelectionChange, defaultLLM, options }: Props) => { const styles = useStyles(); return (
@@ -30,18 +32,12 @@ export const LLMSelector = ({ onSelectionChange, defaultLLM }: Props) => { onOptionSelect={onSelectionChange} appearance="underline" size="small" positioning="below-start"> - - - - + {options.map((item, index) => ( + + + ))}
); diff --git a/app/frontend/src/components/QuestionInput/QuestionInput.tsx b/app/frontend/src/components/QuestionInput/QuestionInput.tsx index c2733150..7787220e 100644 --- a/app/frontend/src/components/QuestionInput/QuestionInput.tsx +++ b/app/frontend/src/components/QuestionInput/QuestionInput.tsx @@ -4,6 +4,8 @@ import { Send28Filled } from "@fluentui/react-icons"; import styles from "./QuestionInput.module.css"; import { useTranslation } from 'react-i18next'; +import { useContext } from "react"; +import { LLMContext } from "../LLMSelector/LLMContextProvider"; interface Props { onSend: (question: string) => void; @@ -18,7 +20,8 @@ interface Props { export const QuestionInput = ({ onSend, disabled, placeholder, clearOnSend, tokens_used, token_limit_tracking = true, question, setQuestion }: Props) => { const { t, i18n } = useTranslation(); - const wordCount = 4000; + const { LLM } = useContext(LLMContext) + const wordCount = LLM.max_tokens; const getDescription = () => { let actual = countWords(question) + tokens_used; let text; diff --git a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx index b9b013ca..84f3c565 100644 --- a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx +++ b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx @@ -1,10 +1,7 @@ import { ChevronDown24Regular, DarkTheme20Regular, Dismiss24Regular, FontIncrease20Regular, Mail24Regular } from "@fluentui/react-icons"; import { - DrawerHeader, - DrawerHeaderTitle, OverlayDrawer, Button, - CheckboxOnChangeData, Slider, SliderProps, Label, @@ -14,12 +11,13 @@ import { } from "@fluentui/react-components"; import styles from "./SettingsDrawer.module.css"; -import { ChangeEvent, useCallback, useState } from "react"; +import { useCallback, useState } from "react"; import { SelectionEvents, OptionOnSelectData } from "@fluentui/react-combobox"; import { LanguageSelector } from "../../components/LanguageSelector"; import { LLMSelector } from "../LLMSelector/LLMSelector"; import { useTranslation } from 'react-i18next'; import cheetsheet from "../../assets/mucgpt_cheatsheet.pdf"; +import { Model } from "../../api"; interface Props { onLanguageSelectionChanged: (e: SelectionEvents, selection: OptionOnSelectData) => void; defaultlang: string; @@ -30,9 +28,10 @@ interface Props { setTheme: (isLight: boolean) => void; onLLMSelectionChanged: (e: SelectionEvents, selection: OptionOnSelectData) => void; defaultLLM: string; + llmOptions: Model[]; } -export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, version, fontscale, setFontscale, isLight, setTheme, onLLMSelectionChanged, defaultLLM }: Props) => { +export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, version, fontscale, setFontscale, isLight, setTheme, onLLMSelectionChanged, defaultLLM, llmOptions }: Props) => { const [isOpen, setIsOpen] = useState(false); const { t, i18n } = useTranslation(); @@ -74,14 +73,15 @@ export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, versio
-
- {t('components.settingsdrawer.fontsize')} -
+
{t('components.settingsdrawer.llm')}
- + +
+
+ {t('components.settingsdrawer.fontsize')}
diff --git a/app/frontend/src/components/SumInput/SumInput.tsx b/app/frontend/src/components/SumInput/SumInput.tsx index ea2ac963..ad8348a8 100644 --- a/app/frontend/src/components/SumInput/SumInput.tsx +++ b/app/frontend/src/components/SumInput/SumInput.tsx @@ -1,10 +1,11 @@ -import { DragEventHandler, useState } from "react"; -import { Stack, classNamesFunction, } from "@fluentui/react"; +import { useContext, useState } from "react"; +import { Stack, } from "@fluentui/react"; import { Button, Tooltip, Textarea, TextareaOnChangeData, } from "@fluentui/react-components"; import { Delete24Regular, Send28Filled } from "@fluentui/react-icons"; import styles from "./SumInput.module.css"; import { useTranslation } from 'react-i18next'; +import { LLMContext } from "../LLMSelector/LLMContextProvider"; interface Props { onSend: (question: string, file?: File) => void; @@ -22,7 +23,8 @@ export const SumInput = ({ onSend, disabled, placeholder, clearOnSend, tokens_us const { t, i18n } = useTranslation(); const [dragging, setDragging] = useState(false); const [file, setFile] = useState(undefined); - const wordCount = 4000; + const { LLM } = useContext(LLMContext) + const wordCount = LLM.max_tokens; const getDescription = () => { let actual = countWords(question) + tokens_used; let text; diff --git a/app/frontend/src/index.tsx b/app/frontend/src/index.tsx index f84f0fa6..4a08ae65 100644 --- a/app/frontend/src/index.tsx +++ b/app/frontend/src/index.tsx @@ -14,6 +14,7 @@ import { LanguageContextProvider } from "./components/LanguageSelector/LanguageC import Brainstorm from "./pages/brainstorm/Brainstorm"; import Faq from "./pages/faq/Faq"; import Version from "./pages/version/Version"; +import { LLMContextProvider } from "./components/LLMSelector/LLMContextProvider"; initializeIcons(); const router = createHashRouter([ @@ -63,7 +64,9 @@ const router = createHashRouter([ ReactDOM.createRoot(document.getElementById("root") as HTMLElement).render( - + + + ); diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 087e4042..f5efdda6 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -16,6 +16,7 @@ import { indexedDBStorage, saveToDB, getStartDataFromDB, popLastMessageInDB, get import { History } from "../../components/History/History"; import useDebounce from "../../hooks/debouncehook"; import { MessageError } from "./MessageError"; +import { LLMContext } from "../../components/LLMSelector/LLMContextProvider"; const enum STORAGE_KEYS { CHAT_TEMPERATURE = 'CHAT_TEMPERATURE', diff --git a/app/frontend/src/pages/layout/Layout.tsx b/app/frontend/src/pages/layout/Layout.tsx index b1baaf4a..5be1df4c 100644 --- a/app/frontend/src/pages/layout/Layout.tsx +++ b/app/frontend/src/pages/layout/Layout.tsx @@ -27,16 +27,15 @@ export const Layout = () => { const navigate = useNavigate() const termsofuseread = localStorage.getItem(STORAGE_KEYS.TERMS_OF_USE_READ) === formatDate(new Date()); const language_pref = (localStorage.getItem(STORAGE_KEYS.SETTINGS_LANGUAGE)) || DEFAULTLANG; - const llm_pref = (localStorage.getItem(STORAGE_KEYS.SETTINGS_LLM)) || DEFAULTLLM; - const font_scaling_pref = Number(localStorage.getItem(STORAGE_KEYS.SETTINGS_FONT_SCALING)) || 1; - const ligth_theme_pref = localStorage.getItem(STORAGE_KEYS.SETTINGS_IS_LIGHT_THEME) === null ? true : localStorage.getItem(STORAGE_KEYS.SETTINGS_IS_LIGHT_THEME) == 'true'; - const { language, setLanguage } = useContext(LanguageContext); - const { LLM, setLLM } = useContext(LLMContext); - const { t, i18n } = useTranslation(); const [config, setConfig] = useState({ - backend: { - enable_auth: true + models: [{ + "model_name": "KICC GPT", + "max_tokens": 128000 }, + { + "model_name": "Unknown GPT", + "max_tokens": 100 + }], frontend: { labels: { "env_name": "MUC tschibidi-C" @@ -45,6 +44,12 @@ export const Layout = () => { }, version: "DEV 1.0.0" }); + const llm_pref = (localStorage.getItem(STORAGE_KEYS.SETTINGS_LLM)) || config.models[0].model_name; + const font_scaling_pref = Number(localStorage.getItem(STORAGE_KEYS.SETTINGS_FONT_SCALING)) || 1; + const ligth_theme_pref = localStorage.getItem(STORAGE_KEYS.SETTINGS_IS_LIGHT_THEME) === null ? true : localStorage.getItem(STORAGE_KEYS.SETTINGS_IS_LIGHT_THEME) == 'true'; + const { language, setLanguage } = useContext(LanguageContext); + const { LLM, setLLM } = useContext(LLMContext); + const { t, i18n } = useTranslation(); const [isLight, setLight] = useState(ligth_theme_pref); const [fontscaling, setFontscaling] = useState(font_scaling_pref); @@ -67,7 +72,11 @@ export const Layout = () => { useEffect(() => { configApi().then(result => { setConfig(result); - }, () => { console.log("Config nicht geladen"); }); + if (result.models.length === 0) + console.error("Keine Modelle vorhanden"); + if (result.models.filter((model) => LLM.model_name === model.model_name).length === 0) + setLLM(result.models[0]) + }, () => { console.error("Config nicht geladen"); }); i18n.changeLanguage(language_pref); }, []); @@ -87,10 +96,16 @@ export const Layout = () => { }; const onLLMSelectionChanged = (e: SelectionEvents, selection: OptionOnSelectData) => { let llm = selection.optionValue || DEFAULTLLM; - setLLM(llm); - localStorage.setItem(STORAGE_KEYS.SETTINGS_LLM, llm); + let found_llm = models.find((model) => model.model_name == llm); + if (found_llm) { + setLLM(found_llm); + localStorage.setItem(STORAGE_KEYS.SETTINGS_LLM, llm); + } + }; + const models = config.models; + return ( @@ -134,6 +149,7 @@ export const Layout = () => { setTheme={onThemeChange} defaultLLM={llm_pref} onLLMSelectionChanged={onLLMSelectionChanged} + llmOptions={models} >
From 781f9cf6a46ad012f2270080abab424901a197eb Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Thu, 22 Aug 2024 14:52:36 +0200 Subject: [PATCH 11/34] =?UTF-8?q?=F0=9F=8E=86=20GPT4o-mini=20funktioniert?= =?UTF-8?q?=20auch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/backend/app.py | 8 +- app/backend/chat/chat.py | 129 ++++++------------------ app/backend/core/llmhelper.py | 29 +++++- app/backend/core/modelhelper.py | 141 ++++++++++++--------------- app/frontend/src/api/api.ts | 3 +- app/frontend/src/api/models.ts | 1 + app/frontend/src/pages/chat/Chat.tsx | 4 +- 7 files changed, 130 insertions(+), 185 deletions(-) diff --git a/app/backend/app.py b/app/backend/app.py index 7ef9b1cf..051790f8 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -14,11 +14,11 @@ send_file, send_from_directory, ) - +from langchain_core.messages.human import HumanMessage +from core.modelhelper import num_tokens_from_messages from core.types.Config import ModelsConfig, ModelsDTO from core.authentification import AuthentificationHelper, AuthError from core.helper import format_as_ndjson -from core.modelhelper import num_tokens_from_message from core.types.AppConfig import AppConfig from core.types.countresult import CountResult from init_app import initApp @@ -101,10 +101,12 @@ async def chat_stream(): temperature=request_json['temperature'] or 0.7 max_tokens=request_json['max_tokens'] or 4096 system_message = request_json['system_message'] or None + model = request_json['model'] response_generator = impl.run_with_streaming(history= request_json["history"], temperature=temperature, max_tokens=max_tokens, system_message=system_message, + model=model, department= department) response = await make_response(format_as_ndjson(response_generator)) response.timeout = None # type: ignore @@ -170,7 +172,7 @@ async def counttokens(): request_json = await request.get_json() message=request_json['text'] or "" - counted_tokens = num_tokens_from_message(message,"gpt-35-turbo") #TODO use correct model + counted_tokens = num_tokens_from_messages([HumanMessage(message)],"gpt-35-turbo") #TODO use correct model return jsonify(CountResult(count=counted_tokens)) @bp.route("/statistics/export", methods=["GET"]) diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py index dba2f2dc..1ba1fd21 100644 --- a/app/backend/chat/chat.py +++ b/app/backend/chat/chat.py @@ -1,22 +1,13 @@ -import asyncio -from typing import Any, AsyncGenerator, Optional, Sequence, Tuple +from typing import AsyncGenerator, Optional, Sequence -from langchain.callbacks.streaming_aiter import AsyncIteratorCallbackHandler -from langchain.chains import LLMChain -from langchain.memory import ConversationBufferMemory -from langchain.prompts import ( - ChatPromptTemplate, - HumanMessagePromptTemplate, - MessagesPlaceholder, - SystemMessagePromptTemplate, -) from langchain_community.callbacks import get_openai_callback from langchain_core.messages import AIMessage from langchain_core.runnables.base import RunnableSerializable +from langchain_core.messages import HumanMessage, SystemMessage from chat.chatresult import ChatResult from core.datahelper import Repository, Requestinfo -from core.modelhelper import num_tokens_from_message, num_tokens_from_messages +from core.modelhelper import num_tokens_from_messages from core.types.Chunk import Chunk, ChunkInfo from core.types.Config import ApproachConfig from core.types.LlmConfigs import LlmConfigs @@ -30,23 +21,8 @@ def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repo self.llm = llm self.config = config self.repo = repo - - async def create_coroutine(self, history: "Sequence[dict[str, str]]", llm: RunnableSerializable, system_message: Optional[str]) -> Any: - """Calls the llm in streaming mode - - Args: - history (Sequence[dict[str, str]]): given set of messages - llm (RunnableSerializable): the llm - system_message (Optional[str]): the system message - - Returns: - Any: A Coroutine streaming the chat results - """ - user_q, conversation = self.init_conversation(history, llm, system_message) - chat_coroutine = conversation.acall({"question": user_q}) - return (chat_coroutine) - async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str]) -> AsyncGenerator[Chunk, None]: + async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: int, temperature: float, system_message: Optional[str], model: str, department: Optional[str]) -> AsyncGenerator[Chunk, None]: """call the llm in streaming mode Args: @@ -55,6 +31,7 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i temperature (float): temperature of the llm system_message (Optional[str]): the system message department (Optional[str]): from which department comes the call + model (str): the choosen model Returns: AsyncGenerator[Chunks, None]: a generator returning chunks of messages @@ -63,50 +40,38 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i Iterator[AsyncGenerator[Chunks, None]]: Chunks of chat messages. n messages with content. One final message with infos about the consumed tokens. """ # configure - handler = AsyncIteratorCallbackHandler() config: LlmConfigs = { "llm_max_tokens": max_tokens, "llm_temperature": temperature, "llm_streaming": True, - "llm_callbacks": [handler], + "llm": model } llm = self.llm.with_config(configurable=config) - - # create coroutine - chat_coroutine = await self.create_coroutine(history, llm=llm, system_message=system_message) - task = asyncio.create_task(chat_coroutine) + msgs = self.init_messages(history = history, system_message=system_message) result = "" position = 0 - # go over events - async for event in handler.aiter(): - result += str(event) - yield Chunk(type="C", message= event, order=position) - position += 1 - - # await till we have collected all events - await task - + try: + async for event in llm.astream(msgs): + result += str(event.content) + yield Chunk(type="C", message= event.content, order=position) + position += 1 + except Exception as ex: + yield Chunk(type="E",message= ex.exception(), order=position) # handle exceptions - if task.exception(): - if "Rate limit" in str(task.exception()): - yield Chunk(type="E",message= "Momentan liegt eine starke Auslastung vor. Bitte in einigen Sekunden erneut versuchen.", order=position) - else: - yield Chunk(type="E",message= task.exception(), order=position) + # TODO find ratelimits + # TODO use callbacks https://clemenssiebler.com/posts/azure_openai_load_balancing_langchain_with_fallbacks/ else: history[-1]["bot"] = result - system_message_tokens = 0 - if(system_message and system_message.strip() !=""): - system_message_tokens = num_tokens_from_message(system_message,"gpt-35-turbo") #TODO if self.config["log_tokens"]: self.repo.addInfo(Requestinfo( - tokencount = num_tokens_from_messages(history,"gpt-35-turbo") + system_message_tokens, #TODO richtiges Modell und tokenizer auswählen + tokencount = num_tokens_from_messages(messages=msgs,model=model), #TODO richtiges Modell und tokenizer auswählen department = department, messagecount= len(history), method = "Chat")) - info = ChunkInfo(requesttokens=num_tokens_from_message(history[-1]["user"],"gpt-35-turbo"), streamedtokens=num_tokens_from_message(result,"gpt-35-turbo")) #TODO + info = ChunkInfo(requesttokens=num_tokens_from_messages([msgs[-1]],model), streamedtokens=num_tokens_from_messages([AIMessage(result)], model)) yield Chunk(type="I", message=info, order=position) def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str]) -> ChatResult: @@ -128,10 +93,10 @@ def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: "llm_streaming": False, } llm = self.llm.with_config(configurable=config) - user_q, conversation = self.init_conversation(history, llm, system_message) + msgs = self.init_messages(history = history, system_message=system_message) with get_openai_callback() as cb: - ai_message: AIMessage = conversation.invoke({"question": user_q}) + ai_message: AIMessage = llm.invoke(msgs) total_tokens = cb.total_tokens if self.config["log_tokens"]: @@ -140,58 +105,24 @@ def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: department = department, messagecount= 1, method = "Brainstorm")) - return ChatResult(content=ai_message["chat_history"][-1].content) - - def init_conversation(self, history: "Sequence[dict[str, str]]", llm: RunnableSerializable, system_message:str) -> Tuple[str, Any]: - """transform the history into langchain format, initates the llm with the messages - - Args: - history (Sequence[dict[str, str]]): the previous chat messages - llm (RunnableSerializable): the llm - system_message (str): the system message - - Returns: - Tuple[str, Any]: (user query, the configured llm with memory) - """ - user_q = history[-1]["user"] - messages = [ - # The `variable_name` here is what must align with memory - MessagesPlaceholder(variable_name="chat_history"), - HumanMessagePromptTemplate.from_template("{question}") - ] - if(system_message and system_message.strip() !=""): - messages.insert(0, - SystemMessagePromptTemplate.from_template( - system_message - )) - prompt = ChatPromptTemplate( - messages=messages - ) - # Notice that we `return_messages=True` to fit into the MessagesPlaceholder - # Notice that `"chat_history"` aligns with the MessagesPlaceholder name. - memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) - ## initialize memory with our own chat model. - self.init_mem(history[:-1],memory=memory) - conversation = LLMChain( - llm=llm, - prompt=prompt, - memory=memory - ) - - return user_q,conversation + return ChatResult(content=ai_message.content) - def init_mem(self, messages:"Sequence[dict[str, str]]", memory: ConversationBufferMemory) : + def init_messages(self, history:"Sequence[dict[str, str]]", system_message: Optional[str] ) : """initialises memory with chat messages Args: messages (Sequence[dict[str, str]]): history of messages, are converted into langchain format - memory (ConversationBufferMemory): a memory for the messages + system_message ( Optional[str]): the system message """ - for conversation in messages: + langchain_messages = [] + if(system_message and system_message.strip() !=""): + langchain_messages.append(SystemMessage(system_message)) + for conversation in history: if("user" in conversation and conversation["user"]): userMsg = conversation["user"] - memory.chat_memory.add_user_message(userMsg) + langchain_messages.append(HumanMessage(userMsg)) if("bot" in conversation and conversation["bot"]): aiMsg = conversation["bot"] - memory.chat_memory.add_ai_message(aiMsg) \ No newline at end of file + langchain_messages.append(AIMessage(aiMsg)) + return langchain_messages \ No newline at end of file diff --git a/app/backend/core/llmhelper.py b/app/backend/core/llmhelper.py index 16e1b4d8..e935f994 100644 --- a/app/backend/core/llmhelper.py +++ b/app/backend/core/llmhelper.py @@ -24,7 +24,6 @@ def getModel(models: List[ModelsConfig], default_model = models[0] if default_model["type"] == "AZURE": llm = AzureChatOpenAI( - model=default_model["model_name"], deployment_name= default_model["deployment"], openai_api_key=default_model["api_key"], azure_endpoint=default_model["endpoint"], @@ -48,6 +47,31 @@ def getModel(models: List[ModelsConfig], else: raise ModelsConfigurationException(f"Unknown model type: {default_model['type']}. Currently only `AZURE` and `OPENAI` are supported.") + alternatives = {"fake" : FakeListLLM(responses=["Hi diggi"])} + for model in models[1:]: + if model["type"] == "AZURE": + alternative = AzureChatOpenAI( + deployment_name= model["deployment"], + openai_api_key=model["api_key"], + azure_endpoint=model["endpoint"], + openai_api_version=model["api_version"], + openai_api_type="azure", + max_tokens=max_tokens, + n=n, + streaming=streaming, + temperature=temperature, + ) + elif model["type"] == "OPENAI": + alternative = ChatOpenAI( + model=model["model_name"], + api_key=model["api_key"], + base_url=model["endpoint"], + max_tokens=max_tokens, + n=n, + streaming=streaming, + temperature=temperature, + ) + alternatives[model["model_name"]] = alternative llm = llm.configurable_fields( temperature=ConfigurableField( id="llm_temperature", @@ -75,5 +99,6 @@ def getModel(models: List[ModelsConfig], ).configurable_alternatives( ConfigurableField(id="llm"), default_key=models[0]["model_name"], - fake= FakeListLLM(responses=["Hi diggi"])) + **alternatives + ) return llm diff --git a/app/backend/core/modelhelper.py b/app/backend/core/modelhelper.py index db268ee1..8965f5aa 100644 --- a/app/backend/core/modelhelper.py +++ b/app/backend/core/modelhelper.py @@ -1,84 +1,67 @@ from __future__ import annotations +from typing import List import tiktoken +from langchain_core.messages.base import BaseMessage -MODELS_2_TOKEN_LIMITS = { - "gpt-35-turbo": 4000, - "gpt-3.5-turbo": 4000, - "gpt-35-turbo-16k": 16000, - "gpt-3.5-turbo-16k": 16000, - "gpt-4": 8100, - "gpt-4-32k": 32000 -} - -AOAI_2_OAI = { - "gpt-35-turbo": "gpt-3.5-turbo", - "gpt-35-turbo-16k": "gpt-3.5-turbo-16k" -} - - -def get_token_limit(model_id: str) -> int: - """returns the token limit for a given model - - Args: - model_id (str): id of the model - - Raises: - ValueError: if the model is not available - - Returns: - int: the token limit of the model - """ - if model_id not in MODELS_2_TOKEN_LIMITS: - raise ValueError("Expected model gpt-35-turbo and above") - return MODELS_2_TOKEN_LIMITS[model_id] - - -def num_tokens_from_messages(messages: list[dict[str, str]], model: str) -> int: - """ Calculate the number of tokens required to encode a list of messages - - Args: - messages (list[dict[str, str]]): list of messages - model (str): for which model - - Returns: - int: The total number of tokens required to encode the message. - """ +def num_tokens_from_messages(messages: List[BaseMessage], model: str): + print(messages) + """Return the number of tokens used by a list of messages.""" + try: + encoding = tiktoken.encoding_for_model(model) + except KeyError: + print("Warning: model not found. Using cl100k_base encoding.") + encoding = tiktoken.get_encoding("cl100k_base") + if model in { + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-16k-0613", + "gpt-4-0314", + "gpt-4-32k-0314", + "gpt-4-0613", + "gpt-4-32k-0613", + "gpt-4-turbo", + "gpt-4-turbo-2024-04-09", + "gpt-4o", + "gpt-4o-mini", + "gpt-4o-2024-05-13", + "Mistral-large-2407" #TODO use https://docs.mistral.ai/guides/tokenization/ for estimation + }: + tokens_per_message = 3 + tokens_per_name = 1 + elif model == "gpt-3.5-turbo-0301": + tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n + tokens_per_name = -1 # if there's a name, the role is omitted + elif "gpt-3.5-turbo" in model: + print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.") + return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613") + elif "gpt-4o" in model: + print( + "Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13.") + return num_tokens_from_messages(messages, model="gpt-4o-2024-05-13") + elif "gpt-4" in model: + print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.") + return num_tokens_from_messages(messages, model="gpt-4-0613") + else: + raise NotImplementedError( + f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""" + ) num_tokens = 0 - for conversation in messages: - if("user" in conversation and conversation["user"]): - userMsg = conversation["user"] - num_tokens += num_tokens_from_message(message= userMsg, model=model) - if("bot" in conversation and conversation["bot"]): - aiMsg = conversation["bot"] - num_tokens += num_tokens_from_message(message= aiMsg, model=model) - return num_tokens - -def num_tokens_from_message(message: str, model: str, token_per_message: int = 3) -> int: - """Calculate the number of tokens required to encode a message. - - Args: - message (str): The message to encode - model (str): The name of the model to use for encoding. - token_per_message (number): offset per message - Returns: - int: The total number of tokens required to encode the message. - Example: - message = {'role': 'user', 'content': 'Hello, how are you?'} - model = 'gpt-3.5-turbo' - num_tokens_from_messages(message, model) - output: 11 - """ - encoding = tiktoken.encoding_for_model(get_oai_chatmodel_tiktok(model)) - num_tokens = token_per_message # For "role" and "content" keys - num_tokens += len(encoding.encode(message)) - return num_tokens - - -def get_oai_chatmodel_tiktok(aoaimodel: str) -> str: - message = "Expected Azure OpenAI ChatGPT model name" - if aoaimodel == "" or aoaimodel is None: - raise ValueError(message) - if aoaimodel not in AOAI_2_OAI and aoaimodel not in MODELS_2_TOKEN_LIMITS: - raise ValueError(message) - return AOAI_2_OAI.get(aoaimodel) or aoaimodel \ No newline at end of file + for message in messages: + num_tokens += tokens_per_message + if(message.type): + role = "" + if(message.type =="ai"): + role = "assistant" + elif(message.type == "system"): + role = "system" + elif(message.type == "human"): + role = "user" + else: + raise NotImplementedError( + f"""Not implemented for the message type {message.type}""" + ) + num_tokens += len(encoding.encode(role)) + if(message.content): + num_tokens += len(encoding.encode(message.content)) + num_tokens += 3 # every reply is primed with <|start|>assistant<|message|> + return num_tokens \ No newline at end of file diff --git a/app/frontend/src/api/api.ts b/app/frontend/src/api/api.ts index 99ae23b3..5ec4f0e2 100644 --- a/app/frontend/src/api/api.ts +++ b/app/frontend/src/api/api.ts @@ -14,7 +14,8 @@ export async function chatApi(options: ChatRequest): Promise { temperature: options.temperature, language: options.language, system_message: options.system_message, - max_tokens: options.max_tokens + max_tokens: options.max_tokens, + model: options.model }) }); } diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts index fb266720..7f77f221 100644 --- a/app/frontend/src/api/models.ts +++ b/app/frontend/src/api/models.ts @@ -21,6 +21,7 @@ export type ChatRequest = { max_tokens?: number; system_message?: string; shouldStream?: boolean; + model?: string; }; export type SumRequest = { diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index f5efdda6..32749687 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -26,6 +26,7 @@ const enum STORAGE_KEYS { const Chat = () => { const { language } = useContext(LanguageContext) + const { LLM } = useContext(LLMContext); const { t } = useTranslation(); const [shouldStream, setShouldStream] = useState(true); @@ -127,7 +128,8 @@ const Chat = () => { language: language, temperature: temperature, system_message: system ? system : "", - max_tokens: max_tokens + max_tokens: max_tokens, + model: LLM.model_name }; const response = await chatApi(request); From ea4a092c4f41b5fe94c7cf94e299845014a32388 Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Thu, 22 Aug 2024 22:27:06 +0200 Subject: [PATCH 12/34] =?UTF-8?q?=E2=98=81=20mistral=20tokenizer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/backend/chat/chat.py | 2 +- app/backend/core/modelhelper.py | 42 +++++++++++++++++++++++++++++++-- app/backend/init_app.py | 1 - app/backend/requirements.txt | 1 + 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py index 1ba1fd21..a2feba65 100644 --- a/app/backend/chat/chat.py +++ b/app/backend/chat/chat.py @@ -71,7 +71,7 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i messagecount= len(history), method = "Chat")) - info = ChunkInfo(requesttokens=num_tokens_from_messages([msgs[-1]],model), streamedtokens=num_tokens_from_messages([AIMessage(result)], model)) + info = ChunkInfo(requesttokens=num_tokens_from_messages([msgs[-1]],model), streamedtokens=num_tokens_from_messages([HumanMessage(result)], model)) yield Chunk(type="I", message=info, order=position) def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str]) -> ChatResult: diff --git a/app/backend/core/modelhelper.py b/app/backend/core/modelhelper.py index 8965f5aa..4331f3b5 100644 --- a/app/backend/core/modelhelper.py +++ b/app/backend/core/modelhelper.py @@ -3,10 +3,49 @@ import tiktoken from langchain_core.messages.base import BaseMessage +from mistral_common.protocol.instruct.messages import ( + UserMessage, SystemMessage, AssistantMessage +) +from mistral_common.protocol.instruct.request import ChatCompletionRequest +from mistral_common.tokens.tokenizers.mistral import MistralTokenizer def num_tokens_from_messages(messages: List[BaseMessage], model: str): - print(messages) """Return the number of tokens used by a list of messages.""" + if("gpt-" in model): + return num_tokens_from_openai_model(messages=messages, model=model) + elif("mistral" in model): + return num_tokens_from_mistral_model(messages=messages, model=model) + else: + raise NotImplementedError( + f"""No tokenizer for model found. currently only openai and mistral are supported.""" + ) +def num_tokens_from_mistral_model(messages: List[BaseMessage], model: str): + """Return the number of tokens used by a list of messages for a given mistral model.""" + num_tokens = 0 + # see which tokenizer for which model is needed, https://github.com/mistralai/mistral-common/blob/main/README.md + if(model == "mistral-large-2407" ): + tokenizer = MistralTokenizer.v3() + else: + tokenizer = MistralTokenizer.from_model(model) + # convert langchain msgs to mistral format + mistral_messages = [] + for message in messages: + if(message.type =="ai"): + mistral_messages.append(AssistantMessage(content=message.content)) + elif(message.type == "system"): + mistral_messages.append(SystemMessage(content=message.content)) + elif(message.type == "human"): + mistral_messages.append(UserMessage(content=message.content)) + else: + raise NotImplementedError( + f"""Not implemented for the message type {message.type}""" + ) + tokenized = tokenizer.encode_chat_completion( + ChatCompletionRequest(messages=mistral_messages)) + return len(tokenized.tokens) + +def num_tokens_from_openai_model(messages: List[BaseMessage], model: str): + """Return the number of tokens used by a list of messages for a given openai model.""" try: encoding = tiktoken.encoding_for_model(model) except KeyError: @@ -24,7 +63,6 @@ def num_tokens_from_messages(messages: List[BaseMessage], model: str): "gpt-4o", "gpt-4o-mini", "gpt-4o-2024-05-13", - "Mistral-large-2407" #TODO use https://docs.mistral.ai/guides/tokenization/ for estimation }: tokens_per_message = 3 tokens_per_name = 1 diff --git a/app/backend/init_app.py b/app/backend/init_app.py index d99f30e1..7b2f1805 100644 --- a/app/backend/init_app.py +++ b/app/backend/init_app.py @@ -52,7 +52,6 @@ async def initApp() -> AppConfig: Returns: AppConfig: contains the configuration for the webservice """ - # read enviornment config env_config = os.environ['MUCGPT_CONFIG'] if "MUCGPT_CONFIG" in os.environ else os.path.dirname(os.path.realpath(__file__))+"/config.json" base_config = os.environ['MUCGPT_BASE_CONFIG'] if "MUCGPT_BASE_CONFIG" in os.environ is not None else os.path.dirname(os.path.realpath(__file__))+"/base.json" diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index f3459d70..94e726e5 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -16,3 +16,4 @@ psycopg2==2.9.9 pypdf2==3.0.1 tenacity==8.5.0 gunicorn +mistral-common==1.3.4 From 68d67e266524889ab23af23f54db381e74a5ab4b Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Fri, 23 Aug 2024 10:26:56 +0200 Subject: [PATCH 13/34] :bug: fixed llm frontend bug --- app/frontend/src/pages/layout/Layout.tsx | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/app/frontend/src/pages/layout/Layout.tsx b/app/frontend/src/pages/layout/Layout.tsx index 5be1df4c..3a947d42 100644 --- a/app/frontend/src/pages/layout/Layout.tsx +++ b/app/frontend/src/pages/layout/Layout.tsx @@ -53,7 +53,7 @@ export const Layout = () => { const [isLight, setLight] = useState(ligth_theme_pref); const [fontscaling, setFontscaling] = useState(font_scaling_pref); - + const [models, setModels] = useState(config.models); const [theme, setTheme] = useState(adjustTheme(isLight, fontscaling)); @@ -72,10 +72,11 @@ export const Layout = () => { useEffect(() => { configApi().then(result => { setConfig(result); - if (result.models.length === 0) + setModels(result.models); + if (result.models.length === 0) { console.error("Keine Modelle vorhanden"); - if (result.models.filter((model) => LLM.model_name === model.model_name).length === 0) - setLLM(result.models[0]) + } + setLLM(result.models.find((model) => model.model_name == llm_pref) || result.models[0]) }, () => { console.error("Config nicht geladen"); }); i18n.changeLanguage(language_pref); }, []); @@ -104,7 +105,7 @@ export const Layout = () => { }; - const models = config.models; + return ( From 008878a95196729551b996e9eeb6ec78e010ab16 Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Fri, 23 Aug 2024 12:26:30 +0200 Subject: [PATCH 14/34] :sparkles: /counttokens with right LLM --- app/backend/app.py | 3 ++- app/frontend/src/api/api.ts | 3 ++- app/frontend/src/api/models.ts | 1 + .../src/components/LLMSelector/LLMContextProvider.tsx | 2 +- app/frontend/src/pages/chat/Chat.tsx | 6 +++--- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/app/backend/app.py b/app/backend/app.py index 051790f8..09bd0cc4 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -172,7 +172,8 @@ async def counttokens(): request_json = await request.get_json() message=request_json['text'] or "" - counted_tokens = num_tokens_from_messages([HumanMessage(message)],"gpt-35-turbo") #TODO use correct model + model = request_json['model']['model_name'] or "gpt-35-turbo" + counted_tokens = num_tokens_from_messages([HumanMessage(message)], model) return jsonify(CountResult(count=counted_tokens)) @bp.route("/statistics/export", methods=["GET"]) diff --git a/app/frontend/src/api/api.ts b/app/frontend/src/api/api.ts index 5ec4f0e2..f7efd234 100644 --- a/app/frontend/src/api/api.ts +++ b/app/frontend/src/api/api.ts @@ -107,7 +107,8 @@ export async function countTokensAPI(options: CountTokenRequest): Promise>; } -export const DEFAULTLLM = "GPT-4o-mini"; +export const DEFAULTLLM = "gpt-4o-mini"; export const LLMContext = React.createContext({ LLM: { model_name: DEFAULTLLM, max_tokens: 0 }, setLLM: () => { } }); export const LLMContextProvider = (props: React.PropsWithChildren<{}>) => { diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 32749687..52818f91 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -60,16 +60,16 @@ const Chat = () => { const makeTokenCountRequest = useCallback(async () => { if (debouncedSystemPrompt && debouncedSystemPrompt !== "") { - const response = await countTokensAPI({ "text": debouncedSystemPrompt }); + const response = await countTokensAPI({ "text": debouncedSystemPrompt, "model": LLM }); setSystemPromptTokens(response.count); } else setSystemPromptTokens(0); - }, [debouncedSystemPrompt]); + }, [debouncedSystemPrompt, LLM]); useEffect(() => { makeTokenCountRequest(); - }, [debouncedSystemPrompt, makeTokenCountRequest]); + }, [debouncedSystemPrompt, LLM, makeTokenCountRequest]); useEffect(() => { checkStructurOfDB(storage); From 9b1b7d0cbb30253b1a3a3d54e614ccfbd5f3f237 Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Fri, 23 Aug 2024 15:49:08 +0200 Subject: [PATCH 15/34] :sparkles: Brainstorming now uses the LLM from the settings --- app/backend/app.py | 2 +- app/backend/brainstorm/brainstorm.py | 4 ++-- app/frontend/src/api/api.ts | 3 ++- app/frontend/src/api/models.ts | 1 + app/frontend/src/pages/brainstorm/Brainstorm.tsx | 3 +++ 5 files changed, 9 insertions(+), 4 deletions(-) diff --git a/app/backend/app.py b/app/backend/app.py index 09bd0cc4..0e817791 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -81,7 +81,7 @@ async def brainstorm(): try: impl = cfg["brainstorm_approaches"] - r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department) + r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department, model_name=request_json["model"]["model_name"]) return jsonify(r) except Exception as e: logging.exception("Exception in /brainstorm") diff --git a/app/backend/brainstorm/brainstorm.py b/app/backend/brainstorm/brainstorm.py index 79acc0ee..cf99543a 100644 --- a/app/backend/brainstorm/brainstorm.py +++ b/app/backend/brainstorm/brainstorm.py @@ -76,7 +76,7 @@ def getTranslationPrompt(self) -> PromptTemplate: return PromptTemplate(input_variables=["language", "brainstorm"], template=self.user_translate_prompt) - async def brainstorm(self, topic: str, language: str, department: Optional[str]) -> BrainstormResult: + async def brainstorm(self, topic: str, language: str, department: Optional[str], model_name:str) -> BrainstormResult: """Generates ideas for a given topic structured in markdown, translates the result into the target language Args: @@ -89,9 +89,9 @@ async def brainstorm(self, topic: str, language: str, department: Optional[str]) """ # configure config: LlmConfigs = { + "llm": model_name } llm = self.llm.with_config(configurable=config) - # construct chains brainstormChain = LLMChain(llm=llm, prompt=self.getBrainstormPrompt(), output_key="brainstorm") translationChain = LLMChain(llm=llm, prompt=self.getTranslationPrompt(), output_key="translation") diff --git a/app/frontend/src/api/api.ts b/app/frontend/src/api/api.ts index f7efd234..a0590774 100644 --- a/app/frontend/src/api/api.ts +++ b/app/frontend/src/api/api.ts @@ -89,7 +89,8 @@ export async function brainstormApi(options: BrainstormRequest): Promise { const { language } = useContext(LanguageContext) + const { LLM } = useContext(LLMContext); const { t } = useTranslation(); const lastQuestionRef = useRef(""); @@ -63,6 +65,7 @@ const Summarize = () => { const request: BrainstormRequest = { topic: question, language: language, + model: LLM }; const result = await brainstormApi(request); setAnswers([...answers, [question, result]]); From 9bbadb68bdf2c8cc3b3519f37acbc0dd076d6052 Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Fri, 23 Aug 2024 18:43:41 +0200 Subject: [PATCH 16/34] =?UTF-8?q?=F0=9F=94=8D=20Summarize=20working=20agai?= =?UTF-8?q?n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/backend/init_app.py | 4 +- app/backend/summarize/summarize.py | 129 +++++++++-------------------- 2 files changed, 41 insertions(+), 92 deletions(-) diff --git a/app/backend/init_app.py b/app/backend/init_app.py index 7b2f1805..30d10e8b 100644 --- a/app/backend/init_app.py +++ b/app/backend/init_app.py @@ -31,10 +31,10 @@ def initApproaches(cfg: BackendConfig, repoHelper: Repository) -> Tuple[Chat, Br temperature=0.9) sumllm = getModel( models=cfg["models"], - max_tokens = 1000, + max_tokens = 2000, n = 1, streaming=False, - temperature=0.2) + temperature=0) chatlllm = getModel( models=cfg["models"], max_tokens=4000, diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py index 8f210b89..4eea1d34 100644 --- a/app/backend/summarize/summarize.py +++ b/app/backend/summarize/summarize.py @@ -7,6 +7,8 @@ from langchain.prompts import PromptTemplate from langchain_community.callbacks import get_openai_callback from langchain_core.runnables.base import RunnableSerializable +from langchain_core.pydantic_v1 import BaseModel, Field +from langchain_core.prompts import PromptTemplate from core.datahelper import Repository, Requestinfo from core.textsplit import splitPDF, splitText @@ -14,6 +16,12 @@ from core.types.LlmConfigs import LlmConfigs from summarize.summarizeresult import SummarizeResult +class DenserSummary(BaseModel): + missing_entities: List[str] = Field(description="An list of missing entitys") + denser_summary: str = Field(description="denser summary, covers every entity in detail") + +class Summarys(BaseModel): + data: List[DenserSummary] = Field(description="An list of increasingly concise dense summaries") class Summarize: """Summarizes text. Chunks long texts. Individual chunks where summarized with Chain of Density prompting: https://arxiv.org/abs/2309.04269. Afterwards the text is translated into the target language.""" @@ -55,21 +63,6 @@ class Summarize: The response in JSON format: """ - user_translate_prompt = """ - Übersetze das folgende JSON in {language}. Beinhalte die Formatierung als RFC8259 JSON bei. - Das JSON sollte ein Array der Länge 5 sein, welcher folgendem Format folgt: - {{ - "data": [ - {{ - "missing_entities": "An array of missing entitys" - "denser_summary": "denser summary, covers every entity in detail" - }} - ] - }} - - JSON: {sum} - """ - user_translate_and_cleanup_prompt = """ Übersetze den folgenden Text in {language}. @@ -97,9 +90,6 @@ def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repo def getSummarizationPrompt(self) -> PromptTemplate: return PromptTemplate(input_variables=["text"], template=self.user_sum_prompt) - - def getTranslationPrompt(self) -> PromptTemplate: - return PromptTemplate(input_variables=["language", "sum"], template=self.user_translate_prompt) def getTranslationCleanupPrompt(self) -> PromptTemplate: return PromptTemplate(input_variables=["language", "sum"], template=self.user_translate_and_cleanup_prompt) @@ -109,46 +99,15 @@ def setup(self) -> SequentialChain: config: LlmConfigs = { } llm = self.llm.with_config(configurable=config) - # setup model - summarizationChain = LLMChain(llm=llm, prompt=self.getSummarizationPrompt(), output_key="sum") - translationChain = LLMChain(llm=llm, prompt=self.getTranslationCleanupPrompt(), output_key="translation") + + summarizationChain = self.getSummarizationPrompt() | llm.with_structured_output(schema=Summarys) + translationChain = self.getTranslationCleanupPrompt() | llm.with_structured_output(schema=Summarys) return (summarizationChain, translationChain) - def removeQuotations(self,st: str) -> str: - """finds all denser summarys, replaces quotation inside with " - - Args: - st (str): input str - - Returns: - str: str without quotations - """ - m = re.finditer(r'(?<=\"denser_summary\":)(.*?)(?=\})', st) - - new_string = "" - idx = 0 - - for i in list(m): - ss, se = i.span(1) # first and last index - groups = i.group() # complete string ins - quotations = [m.start() for m in re.finditer('"', groups)] - # Quotation inside dense summary? - if(len(quotations)>2): - new_string += st[idx:ss+quotations[1]] - idx = ss+quotations[1]+1 - for quotindex in quotations[1:-1]: - new_string += st[idx:ss+quotindex] + "“ " - idx = ss+quotindex+1 - new_string += st[idx:se] - idx = quotations[-1]+1 - else: - new_string += st[idx:ss] + groups - idx = se - new_string += st[idx:] - return new_string - def run_io_tasks_in_parallel(self, tasks) -> List[Any]: + + def run_io_tasks_in_parallel(self, tasks) -> List[Tuple[Summarys, int]]: """execute tasks in parallel Args: @@ -165,7 +124,7 @@ def run_io_tasks_in_parallel(self, tasks) -> List[Any]: return results - def call_and_cleanup(self, text: str, summarizeChain: LLMChain) -> Tuple[List[str], int]: + def call_and_cleanup(self, text: str, summarizeChain: LLMChain) -> Tuple[Summarys, int]: """calls summarization chain and cleans the data Args: @@ -175,34 +134,24 @@ def call_and_cleanup(self, text: str, summarizeChain: LLMChain) -> Tuple[List[st Returns: Tuple[List[str], int]: the last n summaries, the number of consumed tokens """ - with get_openai_callback() as cb: - result = summarizeChain.invoke({"text": text}) - total_tokens = cb.total_tokens - # post procession - chat_translate_result= result["sum"][result["sum"].index("{"):] - chat_translate_result = chat_translate_result.replace("\n", "").rstrip() - chat_translate_result = self.removeQuotations(chat_translate_result) - if not chat_translate_result.endswith("}"): - chat_translate_result = chat_translate_result + "\"}]}" try: - jsoned = json.loads(chat_translate_result) - except Exception: - # try again - try: - (chat_translate_result, total_tokens) = self.call_and_cleanup(text=text, summarizeChain=summarizeChain) - return (chat_translate_result, total_tokens) - except Exception: - total_tokens = 0 - jsoned = { } - jsoned['data'] = [{'missing_entities': 'Fehler','denser_summary': 'Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.'}] - - cleaned = [] - for (i, element) in enumerate(jsoned['data']): - missing = element['missing_entities'] - if(isinstance(missing, str)): - element['missing_entities'] = [missing] - cleaned.append(element) - return (cleaned,total_tokens) + with get_openai_callback() as cb: + result: Summarys = summarizeChain.invoke({"text": text}) + + total_tokens = cb.total_tokens + + except Exception as ex: + print(ex) + # error message + total_tokens = 0 + result = Summarys(data= [DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ), + DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ), + DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ), + DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ), + DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' )]) + + + return (result,total_tokens) @@ -222,27 +171,27 @@ async def summarize(self, splits: List[str], language: str, department: Optiona (summarizeChain, cleanupChain) = self.setup() # call chain total_tokens = 0 - summarys = [] + summarys: List[DenserSummary] = [] # call summarization in parallel - results = self.run_io_tasks_in_parallel( + chunk_summaries = self.run_io_tasks_in_parallel( list(map(lambda chunk: lambda: self.call_and_cleanup(text=chunk, summarizeChain=summarizeChain), splits))) # concatenate all summarys for i in range(0,5): - next_summary = {"denser_summary": "", "missing_entities": []} - for (result, tokens) in results: + next_summary = DenserSummary(missing_entities=[], denser_summary="") + for (chunk_summary, tokens) in chunk_summaries: total_tokens += tokens - next_summary["denser_summary"] += " "+ result[i]["denser_summary"] - next_summary["missing_entities"] += result[i]["missing_entities"] + next_summary.denser_summary += " "+ chunk_summary.data[i].denser_summary + next_summary.missing_entities += chunk_summary.data[i].missing_entities summarys.append(next_summary) final_summarys = [] for summary in summarys[self.use_last_n_summaries:]: # translate and beautify the concatenated summaries with get_openai_callback() as cb: - result = cleanupChain.invoke({"language": language, "sum": summary['denser_summary']}) + chunk_summary = cleanupChain.invoke({"language": language, "sum": summary.denser_summary}) total_tokens = cb.total_tokens - final_summarys.append(result['translation']) + final_summarys.append(chunk_summary.content) # save total tokens if self.config["log_tokens"]: self.repo.addInfo(Requestinfo( From d27814d2a9942784d8ebe50976e460304a13e184 Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Fri, 23 Aug 2024 19:07:46 +0200 Subject: [PATCH 17/34] =?UTF-8?q?=F0=9F=8E=88=20Summarization=20with=20str?= =?UTF-8?q?uctured=20output?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/backend/app.py | 4 ++-- app/backend/brainstorm/brainstorm.py | 1 + app/backend/summarize/summarize.py | 23 +++++++++++-------- app/frontend/src/api/api.ts | 3 ++- app/frontend/src/api/models.ts | 3 ++- .../src/pages/brainstorm/Brainstorm.tsx | 6 ++--- .../src/pages/summarize/Summarize.tsx | 5 +++- 7 files changed, 27 insertions(+), 18 deletions(-) diff --git a/app/backend/app.py b/app/backend/app.py index 0e817791..85d6514b 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -64,7 +64,7 @@ async def sum(): text = request_json["text"] if file is None else None splits = impl.split(detaillevel=detaillevel, file=file, text=text) - r = await impl.summarize(splits = splits, department=department, language=request_json["language"] or "Deutsch") + r = await impl.summarize(splits = splits, department=department, language=request_json["language"] or "Deutsch", model_name=request_json["model"]) return jsonify(r) except Exception as e: logging.exception("Exception in /sum") @@ -81,7 +81,7 @@ async def brainstorm(): try: impl = cfg["brainstorm_approaches"] - r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department, model_name=request_json["model"]["model_name"]) + r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department, model_name=request_json["model"]) return jsonify(r) except Exception as e: logging.exception("Exception in /brainstorm") diff --git a/app/backend/brainstorm/brainstorm.py b/app/backend/brainstorm/brainstorm.py index cf99543a..a71879ab 100644 --- a/app/backend/brainstorm/brainstorm.py +++ b/app/backend/brainstorm/brainstorm.py @@ -83,6 +83,7 @@ async def brainstorm(self, topic: str, language: str, department: Optional[str], topic (str): topic of the brainstorming language (str): target language department (Optional[str]): department, who is responsible for the call + model_name (str): the choosen llm Returns: BrainstormResult: the structured markdown with ideas about the topic diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py index 4eea1d34..8c78e56f 100644 --- a/app/backend/summarize/summarize.py +++ b/app/backend/summarize/summarize.py @@ -95,13 +95,15 @@ def getTranslationCleanupPrompt(self) -> PromptTemplate: return PromptTemplate(input_variables=["language", "sum"], template=self.user_translate_and_cleanup_prompt) - def setup(self) -> SequentialChain: + def setup(self, model_name: str) -> SequentialChain: config: LlmConfigs = { + "llm": model_name } llm = self.llm.with_config(configurable=config) + #extraction with structured output: https://python.langchain.com/v0.1/docs/use_cases/extraction/quickstart/ summarizationChain = self.getSummarizationPrompt() | llm.with_structured_output(schema=Summarys) - translationChain = self.getTranslationCleanupPrompt() | llm.with_structured_output(schema=Summarys) + translationChain = self.getTranslationCleanupPrompt() | llm return (summarizationChain, translationChain) @@ -144,31 +146,32 @@ def call_and_cleanup(self, text: str, summarizeChain: LLMChain) -> Tuple[Summary print(ex) # error message total_tokens = 0 - result = Summarys(data= [DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ), - DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ), - DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ), - DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ), - DenserSummary(missing_entities="Fehler", denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' )]) + result = Summarys(data= [DenserSummary(missing_entities=["Fehler"], denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ), + DenserSummary(missing_entities=["Fehler"], denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ), + DenserSummary(missing_entities=["Fehler"], denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ), + DenserSummary(missing_entities=["Fehler"], denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' ), + DenserSummary(missing_entities=["Fehler"], denser_summary='Zusammenfassung konnte nicht generiert werden. Bitte nochmals versuchen.' )]) return (result,total_tokens) - async def summarize(self, splits: List[str], language: str, department: Optional[str]) -> SummarizeResult: + async def summarize(self, splits: List[str], language: str, department: Optional[str], model_name:str) -> SummarizeResult: """summarizes text with chain of density prompting. Generates 5 increasingly better summaries per split. Concatenates the results and translates it into the target language. Args: splits (List[str]): splits, to be summarized language (str): the target language - department (Optional[str]): _description_ + department (Optional[str]): department, who is responsible for the call + model_name (str): the choosen llm Returns: SummarizeResult: the best n summarizations """ # setup - (summarizeChain, cleanupChain) = self.setup() + (summarizeChain, cleanupChain) = self.setup(model_name) # call chain total_tokens = 0 summarys: List[DenserSummary] = [] diff --git a/app/frontend/src/api/api.ts b/app/frontend/src/api/api.ts index a0590774..5c379c92 100644 --- a/app/frontend/src/api/api.ts +++ b/app/frontend/src/api/api.ts @@ -28,7 +28,8 @@ export async function sumApi(options: SumRequest, file?: File): Promise { +const Brainstorm = () => { const { language } = useContext(LanguageContext) const { LLM } = useContext(LLMContext); const { t } = useTranslation(); @@ -65,7 +65,7 @@ const Summarize = () => { const request: BrainstormRequest = { topic: question, language: language, - model: LLM + model: LLM.model_name }; const result = await brainstormApi(request); setAnswers([...answers, [question, result]]); @@ -173,4 +173,4 @@ const Summarize = () => { ); }; -export default Summarize; +export default Brainstorm; diff --git a/app/frontend/src/pages/summarize/Summarize.tsx b/app/frontend/src/pages/summarize/Summarize.tsx index e585cb3b..b4827f8d 100644 --- a/app/frontend/src/pages/summarize/Summarize.tsx +++ b/app/frontend/src/pages/summarize/Summarize.tsx @@ -13,11 +13,13 @@ import { SumAnswer } from "../../components/SumAnswer"; import { SumInput } from "../../components/SumInput"; import { Field, Radio, RadioGroup, RadioGroupOnChangeData } from "@fluentui/react-components"; import { checkStructurOfDB, deleteChatFromDB, getHighestKeyInDB, getStartDataFromDB, indexedDBStorage, saveToDB } from "../../service/storage"; +import { LLMContext } from "../../components/LLMSelector/LLMContextProvider"; const STORAGE_KEY_LEVEL_OF_DETAIL = "SUM_LEVEL_OF_DETAIL" const Summarize = () => { const { language } = useContext(LanguageContext) + const { LLM } = useContext(LLMContext); const { t } = useTranslation(); const lastQuestionRef = useRef(""); @@ -69,7 +71,8 @@ const Summarize = () => { const request: SumRequest = { text: questionText, detaillevel: detaillevel, - language: language + language: language, + model: LLM.model_name }; const result = await sumApi(request, file); setAnswers([...answers, [questionText, result]]); From 70f1b748f5331b552f1d49bd7747e92057925751 Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Fri, 23 Aug 2024 19:17:01 +0200 Subject: [PATCH 18/34] =?UTF-8?q?=E2=98=81=20Using=20json=20mode=20for=20o?= =?UTF-8?q?ur=20french=20friends?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/backend/summarize/summarize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py index 8c78e56f..d054271a 100644 --- a/app/backend/summarize/summarize.py +++ b/app/backend/summarize/summarize.py @@ -102,7 +102,7 @@ def setup(self, model_name: str) -> SequentialChain: llm = self.llm.with_config(configurable=config) #extraction with structured output: https://python.langchain.com/v0.1/docs/use_cases/extraction/quickstart/ - summarizationChain = self.getSummarizationPrompt() | llm.with_structured_output(schema=Summarys) + summarizationChain = self.getSummarizationPrompt() | llm.with_structured_output(schema=Summarys, method="json_mode") translationChain = self.getTranslationCleanupPrompt() | llm return (summarizationChain, translationChain) From aa8780f7767976bfe52591e2551d29164e2efa93 Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Tue, 27 Aug 2024 11:19:15 +0200 Subject: [PATCH 19/34] :books: added descriptions of LLMs --- app/backend/app.py | 2 +- app/backend/core/types/Config.py | 1 + app/frontend/src/api/models.ts | 1 + .../src/components/LLMSelector/LLMContextProvider.tsx | 4 ++-- .../components/SettingsDrawer/SettingsDrawer.module.css | 7 +++++++ .../src/components/SettingsDrawer/SettingsDrawer.tsx | 4 +++- app/frontend/src/pages/layout/Layout.tsx | 7 +++++-- 7 files changed, 20 insertions(+), 6 deletions(-) diff --git a/app/backend/app.py b/app/backend/app.py index 85d6514b..06b3b595 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -146,7 +146,7 @@ async def getConfig(): models= cast(List[ModelsConfig], cfg["configuration_features"]["backend"]["models"]) models_dto_list = [] for model in models: - dto = ModelsDTO(model_name=model["model_name"], max_tokens=model["max_tokens"]) + dto = ModelsDTO(model_name=model["model_name"], max_tokens=model["max_tokens"], description=model["description"]) models_dto_list.append(dto) return jsonify({ "frontend": frontend_features, diff --git a/app/backend/core/types/Config.py b/app/backend/core/types/Config.py index 83aeddf0..552faba4 100644 --- a/app/backend/core/types/Config.py +++ b/app/backend/core/types/Config.py @@ -16,6 +16,7 @@ class ModelsConfig(TypedDict): class ModelsDTO(TypedDict): model_name: str max_tokens: int + description: str class SSOConfig(TypedDict): sso_issuer: str diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts index 0195de95..430ad10a 100644 --- a/app/frontend/src/api/models.ts +++ b/app/frontend/src/api/models.ts @@ -52,6 +52,7 @@ export interface Frontend { export interface Model { max_tokens: number; model_name: string; + description: string; } export interface Labels { diff --git a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx index d9e3e60d..d291e69d 100644 --- a/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx +++ b/app/frontend/src/components/LLMSelector/LLMContextProvider.tsx @@ -8,10 +8,10 @@ interface ILLMProvider { } export const DEFAULTLLM = "gpt-4o-mini"; -export const LLMContext = React.createContext({ LLM: { model_name: DEFAULTLLM, max_tokens: 0 }, setLLM: () => { } }); +export const LLMContext = React.createContext({ LLM: { model_name: DEFAULTLLM, max_tokens: 0, description: "" }, setLLM: () => { } }); export const LLMContextProvider = (props: React.PropsWithChildren<{}>) => { - const [LLM, setLLM] = useState({ model_name: DEFAULTLLM, max_tokens: 0 }); + const [LLM, setLLM] = useState({ model_name: DEFAULTLLM, max_tokens: 0, description: "" }); return ( diff --git a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.module.css b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.module.css index bc23d509..78b29b34 100644 --- a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.module.css +++ b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.module.css @@ -47,3 +47,10 @@ align-items: center; justify-content: start; } + +.info { + margin-top: 8px; + border-style: solid; + border-color: black; + border-width: 1px; +} diff --git a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx index 84f3c565..56d4a546 100644 --- a/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx +++ b/app/frontend/src/components/SettingsDrawer/SettingsDrawer.tsx @@ -29,9 +29,10 @@ interface Props { onLLMSelectionChanged: (e: SelectionEvents, selection: OptionOnSelectData) => void; defaultLLM: string; llmOptions: Model[]; + currentLLM: Model; } -export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, version, fontscale, setFontscale, isLight, setTheme, onLLMSelectionChanged, defaultLLM, llmOptions }: Props) => { +export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, version, fontscale, setFontscale, isLight, setTheme, onLLMSelectionChanged, defaultLLM, llmOptions, currentLLM }: Props) => { const [isOpen, setIsOpen] = useState(false); const { t, i18n } = useTranslation(); @@ -79,6 +80,7 @@ export const SettingsDrawer = ({ onLanguageSelectionChanged, defaultlang, versio
+
{currentLLM["description"]}
{t('components.settingsdrawer.fontsize')} diff --git a/app/frontend/src/pages/layout/Layout.tsx b/app/frontend/src/pages/layout/Layout.tsx index 3a947d42..22ba0b10 100644 --- a/app/frontend/src/pages/layout/Layout.tsx +++ b/app/frontend/src/pages/layout/Layout.tsx @@ -30,11 +30,13 @@ export const Layout = () => { const [config, setConfig] = useState({ models: [{ "model_name": "KICC GPT", - "max_tokens": 128000 + "max_tokens": 128000, + "description": "" }, { "model_name": "Unknown GPT", - "max_tokens": 100 + "max_tokens": 100, + "description": "" }], frontend: { labels: { @@ -151,6 +153,7 @@ export const Layout = () => { defaultLLM={llm_pref} onLLMSelectionChanged={onLLMSelectionChanged} llmOptions={models} + currentLLM={LLM} >
From a07f47202f5652ba8ce76088bfc33acf61e7d2a6 Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Tue, 27 Aug 2024 13:53:34 +0200 Subject: [PATCH 20/34] :bookmark: Version 1.1.3 --- README.md | 2 +- app/frontend/package.json | 2 +- app/frontend/src/pages/version/Version.tsx | 34 +++++++++++++++++++++- config/base.json | 2 +- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 0a9a91e5..428f4725 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ python -m quart --app main:app run The frontend is based on a template from [Microsoft Azure](https://github.com/Azure-Samples/azure-search-openai-demo) and is implemented using React, Typescript and Javascript. -The framework used to implement the backend of MUCGPT is called [Quart](https://pgjones.gitlab.io/quart/). It is a fast Python web microframework for building JSON APIs, rendering and serving HTML, serving web sockets and much more. The backend uses LangChain to connect to LLMs like Chat-GPT-3.5, which is currently in use. +The framework used to implement the backend of MUCGPT is called [Quart](https://pgjones.gitlab.io/quart/). It is a fast Python web microframework for building JSON APIs, rendering and serving HTML, serving web sockets and much more. The backend uses LangChain to connect to LLMs. In the [config](config/default.json) file, you can provide the user with various LLM options to select from in the frontend. For more information about all the features of MUCGPT click [here](/docs/FEATURES.md). diff --git a/app/frontend/package.json b/app/frontend/package.json index 0543b724..fb1e576a 100644 --- a/app/frontend/package.json +++ b/app/frontend/package.json @@ -1,7 +1,7 @@ { "name": "mucgpt", "private": true, - "version": "1.1.2", + "version": "1.1.3", "type": "module", "engines": { "node": ">=16.0.0" diff --git a/app/frontend/src/pages/version/Version.tsx b/app/frontend/src/pages/version/Version.tsx index bd5d4aac..56b304cb 100644 --- a/app/frontend/src/pages/version/Version.tsx +++ b/app/frontend/src/pages/version/Version.tsx @@ -32,7 +32,39 @@ const Version = () => {

{t('version.header')}

- + + + [1.1.3] 28.08.2024 + +
+

{t('version.added')}

+
    +
  • + Benutzer haben nun die Möglichkeit zwischen 3 verschiedenen Sprachmodellen zu wählen, welches für ihren Anwendungsfall am besten passt. +
      +
    • GPT-4o-mini
    • +
    • GPT-4o
    • +
    • Mistral-Large-2407
    • +
    +
  • +
+

{t('version.fixed')}

+

{t('version.changed')}

+
    +
  • + Das standardmäßig benutze Sprachmodell wurde von GPT-3.5 auf die neuere Version GPT-4o-mini geändert. +
  • +
  • + Verbesserung der "Zusammenfassen"-Funktion +
      +
    • weniger Fehler
    • +
    • zuverlässigere Zusammenfassungen in der gewünschten Struktur
    • +
    +
  • +
+
+
+
[1.1.2] 31.07.2024 diff --git a/config/base.json b/config/base.json index cb7ad94b..15b52047 100644 --- a/config/base.json +++ b/config/base.json @@ -1,3 +1,3 @@ { - "version": "1.1.1" + "version": "1.1.3" } \ No newline at end of file From b7c3229ff0336b0413d9a26a9df8b26224f30060 Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:38:25 +0200 Subject: [PATCH 21/34] :rotating_light: worked on tests --- tests/unit/test_llmhelper.py | 49 ++++++++++++--------------- tests/unit/test_modelhelper.py | 61 +++++++++++++++++++++++----------- 2 files changed, 63 insertions(+), 47 deletions(-) diff --git a/tests/unit/test_llmhelper.py b/tests/unit/test_llmhelper.py index 937256af..af9ae6d8 100644 --- a/tests/unit/test_llmhelper.py +++ b/tests/unit/test_llmhelper.py @@ -9,21 +9,29 @@ class Test_LLMhelper(unittest.TestCase): def setUp(self): - self.api_key = "test_api_key" - self.api_base = "test_api_base" - self.api_version = "test_api_version" - self.api_type = "test_api_type" + self.model1 = { + "type": "OPENAI", + "model_name": "model1", + "endpoint": "TODO", + "api_key": "TODO", + "max_tokens": 128000 + } + self.model2 ={ + "type": "OPENAI", + "model_name": "model2", + "endpoint": "TODO", + "api_key": "TODO", + "max_tokens": 128000 + } @pytest.mark.asyncio @pytest.mark.unit def test_getModel_returns_llm(self): - model = getModel(chatgpt_model="test_model", + + + model = getModel(models=[self.model1, self.model2], max_tokens=10, n=1, - api_key=self.api_key, - api_base=self.api_base, - api_version=self.api_version, - api_type=self.api_type, temperature=0.5, streaming=True) self.assertIsInstance(model, RunnableSerializable) @@ -31,31 +39,21 @@ def test_getModel_returns_llm(self): @pytest.mark.asyncio @pytest.mark.unit def test_getModel_configurable_fields(self): - model = getModel(chatgpt_model="test_model", + model = getModel(models=[self.model1, self.model2], max_tokens=10, n=1, - api_key=self.api_key, - api_base=self.api_base, - api_version=self.api_version, - api_type=self.api_type, - temperature=0.5, + temperature=0.5, streaming=True) self.assertIn("temperature", model.fields) self.assertIn("max_tokens", model.fields) - self.assertIn("openai_api_key", model.fields) self.assertIn("streaming", model.fields) - self.assertIn("callbacks", model.fields) @pytest.mark.asyncio @pytest.mark.unit def test_getModel_configurable_alternatives(self): - model = getModel(chatgpt_model="test_model", + model = getModel(models=[self.model1, self.model2], max_tokens=10, n=1, - api_key=self.api_key, - api_base=self.api_base, - api_version=self.api_version, - api_type=self.api_type, temperature=0.5, streaming=True) self.assertIn("fake", model.alternatives) @@ -63,14 +61,9 @@ def test_getModel_configurable_alternatives(self): @pytest.mark.asyncio @pytest.mark.unit def test_getModel_fake_llm(self): - model = getModel(chatgpt_model="test_model", + model = getModel(models=[self.model1, self.model2], max_tokens=10, n=1, - api_key=self.api_key, - api_base=self.api_base, - api_version=self.api_version, - api_type=self.api_type, temperature=0.5, streaming=True) - print(model.alternatives["fake"]) self.assertEqual(model.alternatives["fake"].responses, ["Hi diggi"]) \ No newline at end of file diff --git a/tests/unit/test_modelhelper.py b/tests/unit/test_modelhelper.py index 33ea16da..4bee4cb6 100644 --- a/tests/unit/test_modelhelper.py +++ b/tests/unit/test_modelhelper.py @@ -2,29 +2,52 @@ import pytest -from core.modelhelper import get_token_limit, num_tokens_from_messages - +from core.modelhelper import num_tokens_from_messages, num_tokens_from_openai_model, num_tokens_from_mistral_model +from langchain_core.messages.base import BaseMessage class Test_Modelhelper(unittest.TestCase): + + def setUp(self): + # Set up common test variables + self.messages = [ + BaseMessage(type="system", content="System message."), + BaseMessage(type="ai", content="I am fine, thank you."), + BaseMessage(type="human", content="Hello, how are you?"), + + + ] + self.model_openai = "gpt-3.5-turbo-0613" + self.model_mistral = "mistral-large-2407" + + + @pytest.mark.asyncio + @pytest.mark.unit + def test_num_tokens_from_messages_openai(self): + assert num_tokens_from_messages(self.messages, self.model_openai) == 31 + + @pytest.mark.asyncio + @pytest.mark.unit + def test_num_tokens_from_messages_mistral(self): + assert num_tokens_from_messages(self.messages, self.model_mistral) == 24 + + @pytest.mark.asyncio + @pytest.mark.unit + def test_num_tokens_from_messages_invalid_model(self): + with self.assertRaises(NotImplementedError): + num_tokens_from_messages(self.messages, "invalid-model") + @pytest.mark.asyncio - @pytest.mark.unit - def test_get_token_limit(self): - self.assertEqual(get_token_limit("gpt-35-turbo"), 4000) - self.assertEqual(get_token_limit("gpt-3.5-turbo"), 4000) - self.assertEqual(get_token_limit("gpt-35-turbo-16k"), 16000) - self.assertEqual(get_token_limit("gpt-3.5-turbo-16k"), 16000) - self.assertEqual(get_token_limit("gpt-4"), 8100) - self.assertEqual(get_token_limit("gpt-4-32k"), 32000) - self.assertRaises(ValueError, get_token_limit, "gpt-2") + @pytest.mark.unit + def test_num_tokens_from_mistral_model_invalid_message_type(self): + invalid_messages = [BaseMessage(type="unknown", content="Test")] + with self.assertRaises(NotImplementedError): + num_tokens_from_mistral_model(invalid_messages, self.model_mistral) @pytest.mark.asyncio - @pytest.mark.unit - def test_num_tokens_from_messages(self): - messages = [ - {"user": "Hello, I have a problem with my computer.", "bot": "Hi there! What seems to be the issue?"}, - {"user": "My computer won't turn on.", "bot": "Okay, let's try a few troubleshooting steps. Have you checked to make sure it's plugged in and the power outlet?"}] - self.assertEqual(num_tokens_from_messages(messages,"gpt-35-turbo" ), 64) - self.assertRaises(ValueError,num_tokens_from_messages,messages,"" ) - self.assertRaises(ValueError,num_tokens_from_messages,messages,"gpt-2" ) + @pytest.mark.unit + def test_num_tokens_from_openai_model_invalid_message_type(self): + invalid_messages = [BaseMessage(type="unknown", content="Test")] + with self.assertRaises(NotImplementedError): + num_tokens_from_openai_model(invalid_messages, self.model_openai) \ No newline at end of file From 6edc77ee964ea9d3e2a6990ce10b93ffe8e07bad Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Wed, 28 Aug 2024 09:51:55 +0200 Subject: [PATCH 22/34] :white_check_mark: fixed tests and added new ones --- app/backend/core/modelhelper.py | 1 - tests/unit/test_confighelper.py | 51 ++++++--------------------------- tests/unit/test_datahelper.py | 14 +++++++-- tests/unit/test_llmhelper.py | 43 +++++++++++++++++++++++++-- tests/unit/test_modelhelper.py | 34 +++++++++++++++++++++- 5 files changed, 94 insertions(+), 49 deletions(-) diff --git a/app/backend/core/modelhelper.py b/app/backend/core/modelhelper.py index 4331f3b5..728dbea3 100644 --- a/app/backend/core/modelhelper.py +++ b/app/backend/core/modelhelper.py @@ -60,7 +60,6 @@ def num_tokens_from_openai_model(messages: List[BaseMessage], model: str): "gpt-4-32k-0613", "gpt-4-turbo", "gpt-4-turbo-2024-04-09", - "gpt-4o", "gpt-4o-mini", "gpt-4o-2024-05-13", }: diff --git a/tests/unit/test_confighelper.py b/tests/unit/test_confighelper.py index 4283e3df..50a72bfc 100644 --- a/tests/unit/test_confighelper.py +++ b/tests/unit/test_confighelper.py @@ -7,52 +7,17 @@ class Test_Confighelper(unittest.TestCase): - @pytest.mark.asyncio - @pytest.mark.unit - def test_confighelper_create(self): - path = os.path.join('app', 'backend', 'ressources', '') - path = os.path.abspath(path) - assert os.path.exists(path), "File does not exist" - path = path + "/" - env="dev" - helper = ConfigHelper(path, env) - self.assertEqual(helper.base_config_name, "base") - self.assertEqual(helper.env, env) - self.assertEqual(helper.base_path, path) - helper = ConfigHelper(path, env, "basis") - self.assertEqual(helper.base_config_name, "basis") - self.assertEqual(helper.env, env) - self.assertEqual(helper.base_path, path) - - @pytest.mark.asyncio @pytest.mark.unit def test_confighelper_loadData(self): - path = os.path.join('app', 'backend', 'ressources', '') - path = os.path.abspath(path) - assert os.path.exists(path), "File does not exist" - path = path + "/" - env="dev" - helper = ConfigHelper(path, env) + env_path = os.path.join('config', 'default.json') + env_path = os.path.abspath(env_path) + assert os.path.exists(env_path), "File does not exist" + base_path = os.path.join('config', 'base.json') + base_path = os.path.abspath(base_path) + assert os.path.exists(base_path), "File does not exist" + helper = ConfigHelper(env_config=env_path, base_config=base_path) data = helper.loadData() - self.assertIn("version", data) self.assertIn("frontend", data) self.assertIn("backend", data) - - @pytest.mark.asyncio - @pytest.mark.unit - def test_confighelper_loadData_fail(self): - path = os.path.join('app', 'backend', 'ressources', '') - path = os.path.abspath(path) - assert os.path.exists(path), "File does not exist" - path = path + "/" - env="super" - filename = path + env + ".json" - with open(filename, "w") as file: - file.write('{"frontend": {"labels": {"env_name": "MUC tschibidi-C"},"alternative_logo": true}}') - helper = ConfigHelper(path, env) - self.assertEqual(helper.base_config_name, "base") - self.assertEqual(helper.env, env) - self.assertEqual(helper.base_path, path) - self.assertRaises(ValueError, helper.loadData) - os.remove(filename) + self.assertIn("version", data) diff --git a/tests/unit/test_datahelper.py b/tests/unit/test_datahelper.py index 243c3d8e..55cacf49 100644 --- a/tests/unit/test_datahelper.py +++ b/tests/unit/test_datahelper.py @@ -1,8 +1,9 @@ import unittest import pytest +from sqlalchemy import Engine -from core.datahelper import Requestinfo +from core.datahelper import Requestinfo, Repository class Test_Datahelper(unittest.TestCase): @@ -15,4 +16,13 @@ def test_requestinfo_creation(self): self.assertEqual(request.department, 'IT') self.assertEqual(request.messagecount, 50) self.assertEqual(request.method, 'GET') - self.assertEqual(str(request), '') \ No newline at end of file + self.assertEqual(str(request), '') + + @pytest.mark.asyncio + @pytest.mark.unit + def test_repository_creation(self): + repo = Repository("user", "host", "database", "password") + self.assertIsInstance(repo, Repository) + self.assertIsInstance(repo.engine, Engine) + + \ No newline at end of file diff --git a/tests/unit/test_llmhelper.py b/tests/unit/test_llmhelper.py index af9ae6d8..cd357413 100644 --- a/tests/unit/test_llmhelper.py +++ b/tests/unit/test_llmhelper.py @@ -3,7 +3,7 @@ import pytest from langchain_core.runnables.base import RunnableSerializable -from core.llmhelper import getModel +from core.llmhelper import getModel, ModelsConfigurationException class Test_LLMhelper(unittest.TestCase): @@ -17,7 +17,16 @@ def setUp(self): "max_tokens": 128000 } self.model2 ={ - "type": "OPENAI", + "type": "AZURE", + "deployment": "model2", + "model_name": "model2", + "api_version": "preview", + "endpoint": "TODO", + "api_key": "TODO", + "max_tokens": 128000 + } + self.model3 ={ + "type": "TODO", "model_name": "model2", "endpoint": "TODO", "api_key": "TODO", @@ -35,6 +44,36 @@ def test_getModel_returns_llm(self): temperature=0.5, streaming=True) self.assertIsInstance(model, RunnableSerializable) + + @pytest.mark.asyncio + @pytest.mark.unit + def test_getModel_wrong_type(self): + with self.assertRaises(ModelsConfigurationException): + getModel(models=[self.model3], + max_tokens=10, + n=1, + temperature=0.5, + streaming=True) + + @pytest.mark.asyncio + @pytest.mark.unit + def test_getModel_azure_first(self): + model = getModel(models=[self.model2, self.model1], + max_tokens=10, + n=1, + temperature=0.5, + streaming=True) + self.assertIsInstance(model, RunnableSerializable) + + @pytest.mark.asyncio + @pytest.mark.unit + def test_getModel_no_model(self): + with self.assertRaises(ModelsConfigurationException): + getModel(models=[], + max_tokens=10, + n=1, + temperature=0.5, + streaming=True) @pytest.mark.asyncio @pytest.mark.unit diff --git a/tests/unit/test_modelhelper.py b/tests/unit/test_modelhelper.py index 4bee4cb6..6444436b 100644 --- a/tests/unit/test_modelhelper.py +++ b/tests/unit/test_modelhelper.py @@ -17,7 +17,8 @@ def setUp(self): ] self.model_openai = "gpt-3.5-turbo-0613" - self.model_mistral = "mistral-large-2407" + self.model_mistral2407 = "mistral-large-2407" + self.model_mistral = "mistral-large" @pytest.mark.asyncio @@ -25,6 +26,11 @@ def setUp(self): def test_num_tokens_from_messages_openai(self): assert num_tokens_from_messages(self.messages, self.model_openai) == 31 + @pytest.mark.asyncio + @pytest.mark.unit + def test_num_tokens_from_messages_mistral2407(self): + assert num_tokens_from_messages(self.messages, self.model_mistral2407) == 24 + @pytest.mark.asyncio @pytest.mark.unit def test_num_tokens_from_messages_mistral(self): @@ -35,6 +41,32 @@ def test_num_tokens_from_messages_mistral(self): def test_num_tokens_from_messages_invalid_model(self): with self.assertRaises(NotImplementedError): num_tokens_from_messages(self.messages, "invalid-model") + + @pytest.mark.asyncio + @pytest.mark.unit + def test_num_tokens_from_messages_invalid_openai_model(self): + with self.assertRaises(NotImplementedError): + num_tokens_from_openai_model(self.messages, "") + + @pytest.mark.asyncio + @pytest.mark.unit + def test_num_tokens_from_messages_openai_gpt0301(self): + assert num_tokens_from_messages(self.messages, "gpt-3.5-turbo-0301") == 34 + + @pytest.mark.asyncio + @pytest.mark.unit + def test_num_tokens_from_messages_openai_gptturbo(self): + assert num_tokens_from_messages(self.messages, "gpt-3.5-turbo") == 31 + + @pytest.mark.asyncio + @pytest.mark.unit + def test_num_tokens_from_messages_openai_gpt4(self): + assert num_tokens_from_messages(self.messages, "gpt-4") == 31 + + @pytest.mark.asyncio + @pytest.mark.unit + def test_num_tokens_from_messages_openai_gpt4o(self): + assert num_tokens_from_messages(self.messages, "gpt-4o") == 31 @pytest.mark.asyncio @pytest.mark.unit From 97a2b320f7116b48dc6bfe797e0974bc59fb2d2c Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 28 Aug 2024 11:27:57 +0200 Subject: [PATCH 23/34] =?UTF-8?q?=F0=9F=A9=BA=20fixed=20integration=20test?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/integration/base.json | 3 ++ tests/integration/conftest.py | 35 +++++------------------- tests/integration/test_app.py | 11 +++++--- tests/integration/test_config.json | 44 ++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 32 deletions(-) create mode 100644 tests/integration/base.json create mode 100644 tests/integration/test_config.json diff --git a/tests/integration/base.json b/tests/integration/base.json new file mode 100644 index 00000000..15b52047 --- /dev/null +++ b/tests/integration/base.json @@ -0,0 +1,3 @@ +{ + "version": "1.1.3" +} \ No newline at end of file diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 57e86228..cad4b403 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,19 +1,10 @@ -from collections import namedtuple -from unittest import mock - +import os import openai import pytest import pytest_asyncio import app -MockToken = namedtuple("MockToken", ["token", "expires_on"]) - - -class MockAzureCredential: - async def get_token(self, uri): - return MockToken("mock_token", 9999999999) - @pytest.fixture @@ -50,24 +41,12 @@ async def mock_acreate(*args, **kwargs): @pytest_asyncio.fixture async def client(monkeypatch, mock_openai_chatcompletion): - monkeypatch.setenv("AZURE_OPENAI_SERVICE", "test-openai-service") - monkeypatch.setenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT", "test-chatgpt") - monkeypatch.setenv("AZURE_OPENAI_CHATGPT_MODEL", "gpt-35-turbo") - monkeypatch.setenv("AZURE_OPENAI_EMB_DEPLOYMENT", "test-ada") - monkeypatch.setenv("SSO_ISSUER", "testissuer.de") - monkeypatch.setenv("CONFIG_NAME", "test") - monkeypatch.setenv("DB_HOST", "not used") - monkeypatch.setenv("DB_NAME", "not used") - monkeypatch.setenv("DB_PASSWORD", "not used") - monkeypatch.setenv("DB_USER", "not used") - - - with mock.patch("init_app.DefaultAzureCredential") as mock_default_azure_credential: - mock_default_azure_credential.return_value = MockAzureCredential() - quart_app = app.create_app() + monkeypatch.setenv("MUCGPT_CONFIG", os.path.dirname(os.path.realpath(__file__))+"/test_config.json") + monkeypatch.setenv("MUCGPT_BASE_CONFIG", os.path.dirname(os.path.realpath(__file__))+"/base.json") - async with quart_app.test_app() as test_app: - quart_app.config.update({"TESTING": True}) + quart_app = app.create_app() + async with quart_app.test_app() as test_app: + quart_app.config.update({"TESTING": True}) - yield test_app.test_client() + yield test_app.test_client() diff --git a/tests/integration/test_app.py b/tests/integration/test_app.py index e4098cf8..f921ff39 100644 --- a/tests/integration/test_app.py +++ b/tests/integration/test_app.py @@ -89,7 +89,7 @@ async def test_brainstorm_exception(client, monkeypatch,caplog): data = { "topic": "München", "language": "Deutsch", - + "model": "TEST_MODEL", } response = await client.post('/brainstorm', json=data) assert response.status_code == 500 @@ -112,7 +112,7 @@ async def test_brainstorm(client, mocker): data = { "topic": "München", "language": "Deutsch", - + "model": "TEST_MODEL", } response = await client.post('/brainstorm', json=data) assert response.status_code == 200 @@ -128,7 +128,8 @@ async def test_sum_text(client, mocker): data = { "detaillevel": "short", "text": "To be summarized", - "language": "Deutsch" + "language": "Deutsch", + "model": "TEST_MODEL", } response = await client.post('/sum', form={"body": json.dumps(data)}) assert response.status_code == 200 @@ -143,7 +144,8 @@ async def test_sum_pdf(client, mocker): data = { "detaillevel": "short", - "language": "Deutsch" + "language": "Deutsch", + "model": "TEST_MODEL" } tmp = BytesIO() @@ -195,6 +197,7 @@ async def test_chatstream(client, mocker): "temperature": 0.1, "max_tokens": 2400, "system_message": "", + "model": "TEST_MODEL", "history": [{"user": "hi"}] } diff --git a/tests/integration/test_config.json b/tests/integration/test_config.json new file mode 100644 index 00000000..f33398e5 --- /dev/null +++ b/tests/integration/test_config.json @@ -0,0 +1,44 @@ +{ + "frontend": { + "labels": { + "env_name": "MUC tschibidi-test" + }, + "alternative_logo": true + }, + "backend": { + "enable_auth": false, + "enable_database": false, + "sso_config": { + "sso_issuer": "str", + "role": "lhm-ab-mucgpt-user" + }, + "db_config": { + "db_host": "NOT USED", + "db_name": "NOT USED", + "db_user": "NOT USED", + "db_passwort": "NOT USED" + }, + "chat": { + "log_tokens": false + }, + "brainstorm": { + "log_tokens": false + }, + "sum": { + "log_tokens": false + }, + + "models": [ + { + "type": "AZURE", + "model_name": "TEST_MODEL", + "deployment": "NOT USED", + "endpoint": "NOT USED", + "api_key":"NOT USED", + "max_tokens": 128000, + "api_version": "NOT USED", + "description": "NOT USED" + } + ] + } +} \ No newline at end of file From cd4485db0c61bfb3f9ba94b6fa9a32e6da2c1a9a Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 28 Aug 2024 11:28:41 +0200 Subject: [PATCH 24/34] =?UTF-8?q?=F0=9F=92=84=20change=20ariell=20prompt?= =?UTF-8?q?=20to=20work=20with=20mistral?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/components/Example/ExampleList.tsx | 152 +++++++++--------- 1 file changed, 74 insertions(+), 78 deletions(-) diff --git a/app/frontend/src/components/Example/ExampleList.tsx b/app/frontend/src/components/Example/ExampleList.tsx index 2b0e5795..747ab39c 100644 --- a/app/frontend/src/components/Example/ExampleList.tsx +++ b/app/frontend/src/components/Example/ExampleList.tsx @@ -25,86 +25,82 @@ const EXAMPLES: ExampleModel[] = [ }, { text: "🧜‍♀️ Arielle, die Diagramm-Assistentin [Setzt den System-Prompt innerhalb der Chateinstellungen. In diesem ist das Verhalten von Arielle definiert. Für andere Aufgaben abseits der Diagrammerstellung muss der Systemprompt gelöscht werden.]", - value: "Hallo", + value: "Hallo, wie kannst du mir helfen?", system: `Du bist Arielle🧜‍♀️, ein Assistent für das Erstellen von Mermaid Diagrammen. Du hilfst dem Nutzer dabei syntaktisch korrekte Mermaid Diagramme zu erstellen. - Du unterstützt Flussdiagramme, Sequenzdiagramme, Klassendiagramme, User Journeys, Kuchendiagramme, Mindmaps und Gantt-Diagramme. Lehne andere Diagrammtypen ab. - - Gehe in folgenden Schritten vor, jeder Schritt ist eine eigene Nachricht. - 1. Stelle dich kurz freundlich vor und frag den Nutzer nach dem Thema des Diagramms und der Art des Diagramms? - 2. Frage den Nutzer nach den Daten, die dargestellt werden sollen? - 3. Gib den Mermaid-Code für das entsprechende Mermaid Diagramm zurück: - - Halte unbedingt folgende Regeln bei Schritt 3 ein: - - Antworte dabei ausschließlich in Markdown-Codeblöcken in der Programmiersprache mermaid - - Beschrifte die Knoten der Diagramme passend - - Verwende ausschließlich die Daten aus Schritt 1 und 2 - - Eine Beispielausgabe aus Schritt 3 für ein Kuchendiagramm sieht so aus : - \`\`\`mermaid - pie title Pets adopted by volunteers - "Dogs" : 386 - "Cats" : 85 - "Rats" : 15 - \`\`\` - - Eine Beispielausgabe aus Schritt 3 für eine Mindmap sieht so aus: - \`\`\`mermaid - mindmap - root((mindmap)) - Origins - Long history - ::icon(fa fa-book) - Popularisation - British popular psychology author Tony Buzan - Research - On effectivness
and features - On Automatic creation - Uses - Creative techniques - Strategic planning - Argument mapping - Tools - Pen and paper - Mermaid - \`\`\` - Eine Beispielausgabe aus Schritt 3 für ein Sequenzdiagramm sieht so aus: - \`\`\`mermaid - sequenceDiagram - Alice->>+John: Hello John, how are you? - Alice->>+John: John, can you hear me? - John-->>-Alice: Hi Alice, I can hear you! - John-->>-Alice: I feel great! - \`\`\` - - Eine Beispielausgabe aus Schritt 3 für eine Userjourney sieht so aus: - \`\`\`mermaid - journey - title My working day - section Go to work - Make tea: 5: Me - Go upstairs: 3: Me - Do work: 1: Me, Cat - section Go home - Go downstairs: 5: Me - Sit down: 3: Me - \`\`\` - - Eine Beispielausgabe aus Schritt 3 für ein Gantt-diagramm sieht so aus: - - \`\`\`mermaid - gantt - title A Gantt Diagram - dateFormat YYYY-MM-DD - section Section - A task :a1, 2014-01-01, 30d - Another task :after a1, 20d - section Another - Task in Another :2014-01-12, 12d - another task :24d - \`\`\` - - Starte mit Schritt 1. + Du unterstützt Flussdiagramme, Sequenzdiagramme, Klassendiagramme, User Journeys, Kuchendiagramme, Mindmaps und Gantt-Diagramme. Lehne andere Diagrammtypen ab. + + Halte dich an folgende Regeln: + - Bringe die Daten und den Diagrammtyp in Erfahrung + - Gib den Mermaid-Code für das entsprechende Mermaid Diagramm zurück: + - Antworte dabei ausschließlich in Markdown-Codeblöcken in der Programmiersprache mermaid + - Beschrifte die Knoten der Diagramme passend + - Verwende ausschließlich die Daten aus Schritt 1 und 2 + + Eine Beispielausgabe aus Schritt 3 für ein Kuchendiagramm sieht so aus : + \`\`\`mermaid + pie title Pets adopted by volunteers + "Dogs" : 386 + "Cats" : 85 + "Rats" : 15 + \`\`\` + + Eine Beispielausgabe aus Schritt 3 für eine Mindmap sieht so aus: + \`\`\`mermaid + mindmap + root((mindmap)) + Origins + Long history + ::icon(fa fa-book) + Popularisation + British popular psychology author Tony Buzan + Research + On effectivness
and features + On Automatic creation + Uses + Creative techniques + Strategic planning + Argument mapping + Tools + Pen and paper + Mermaid + \`\`\` + + Eine Beispielausgabe aus Schritt 3 für ein Sequenzdiagramm sieht so aus: + \`\`\`mermaid + sequenceDiagram + Alice->>+John: Hello John, how are you? + Alice->>+John: John, can you hear me? + John-->>-Alice: Hi Alice, I can hear you! + John-->>-Alice: I feel great! + \`\`\` + + Eine Beispielausgabe aus Schritt 3 für eine Userjourney sieht so aus: + \`\`\`mermaid + journey + title My working day + section Go to work + Make tea: 5: Me + Go upstairs: 3: Me + Do work: 1: Me, Cat + section Go home + Go downstairs: 5: Me + Sit down: 3: Me + \`\`\` + + Eine Beispielausgabe aus Schritt 3 für ein Gantt-diagramm sieht so aus: + + \`\`\`mermaid + gantt + title A Gantt Diagram + dateFormat YYYY-MM-DD + section Section + A task :a1, 2014-01-01, 30d + Another task :after a1, 20d + section Another + Task in Another :2014-01-12, 12d + another task :24d + \`\`\` ` } ]; From dc5bab80deddd85d3a0016f9b7a9bc259c44d6e6 Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Wed, 28 Aug 2024 12:02:22 +0200 Subject: [PATCH 25/34] :hammer: added used model to db for statistics --- README.md | 2 +- app/backend/app.py | 12 +++++++----- app/backend/brainstorm/brainstorm.py | 3 ++- app/backend/chat/chat.py | 8 +++++--- app/backend/core/datahelper.py | 16 ++++++++++++++-- app/backend/core/modelhelper.py | 13 +++---------- app/backend/summarize/summarize.py | 3 ++- tests/unit/test_modelhelper.py | 10 ---------- 8 files changed, 34 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 428f4725..d5ea14d4 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ The documentation project is built with technologies we use in our projects (see ## Table of contents * [Built With](#built-with) * [Roadmap](#roadmap) -* [Set up](#set-up-on-azure) +* [Run](#Run) * [Documentation](#documentation) * [Contributing](#contributing) * [License](#license) diff --git a/app/backend/app.py b/app/backend/app.py index 06b3b595..33351d0c 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -64,7 +64,7 @@ async def sum(): text = request_json["text"] if file is None else None splits = impl.split(detaillevel=detaillevel, file=file, text=text) - r = await impl.summarize(splits = splits, department=department, language=request_json["language"] or "Deutsch", model_name=request_json["model"]) + r = await impl.summarize(splits = splits, department=department, language=request_json["language"] or "Deutsch", model_name=request_json["model"] or "gpt-4o-mini") return jsonify(r) except Exception as e: logging.exception("Exception in /sum") @@ -81,7 +81,7 @@ async def brainstorm(): try: impl = cfg["brainstorm_approaches"] - r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department, model_name=request_json["model"]) + r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department, model_name=request_json["model"] or "gpt-4o-mini") return jsonify(r) except Exception as e: logging.exception("Exception in /brainstorm") @@ -101,7 +101,7 @@ async def chat_stream(): temperature=request_json['temperature'] or 0.7 max_tokens=request_json['max_tokens'] or 4096 system_message = request_json['system_message'] or None - model = request_json['model'] + model = request_json['model'] or "gpt-4o-mini" response_generator = impl.run_with_streaming(history= request_json["history"], temperature=temperature, max_tokens=max_tokens, @@ -127,13 +127,15 @@ async def chat(): impl = cfg["chat_approaches"] temperature=request_json['temperature'] or 0.7 max_tokens=request_json['max_tokens'] or 4096 + model_name=request_json['model'] or "gpt-4o-mini" system_message = request_json['system_message'] or None history = request_json["history"] chatResult = impl.run_without_streaming(history= history, temperature=temperature, max_tokens=max_tokens, system_message=system_message, - department= department) + department= department, + model_name= model_name) return jsonify(chatResult) except Exception as e: logging.exception("Exception in /chat") @@ -172,7 +174,7 @@ async def counttokens(): request_json = await request.get_json() message=request_json['text'] or "" - model = request_json['model']['model_name'] or "gpt-35-turbo" + model = request_json['model']['model_name'] or "gpt-4o-mini" counted_tokens = num_tokens_from_messages([HumanMessage(message)], model) return jsonify(CountResult(count=counted_tokens)) diff --git a/app/backend/brainstorm/brainstorm.py b/app/backend/brainstorm/brainstorm.py index a71879ab..9f65d6b0 100644 --- a/app/backend/brainstorm/brainstorm.py +++ b/app/backend/brainstorm/brainstorm.py @@ -114,7 +114,8 @@ async def brainstorm(self, topic: str, language: str, department: Optional[str], tokencount = total_tokens, department = department, messagecount= 1, - method = "Brainstorm")) + method = "Brainstorm"), + model = model_name) return BrainstormResult(answer=translation) def cleanup(self, chat_translate_result: str) -> str: diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py index a2feba65..140595a3 100644 --- a/app/backend/chat/chat.py +++ b/app/backend/chat/chat.py @@ -69,12 +69,13 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i tokencount = num_tokens_from_messages(messages=msgs,model=model), #TODO richtiges Modell und tokenizer auswählen department = department, messagecount= len(history), - method = "Chat")) + method = "Chat"), + model = model) info = ChunkInfo(requesttokens=num_tokens_from_messages([msgs[-1]],model), streamedtokens=num_tokens_from_messages([HumanMessage(result)], model)) yield Chunk(type="I", message=info, order=position) - def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str]) -> ChatResult: + def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: int, temperature: float, system_message: Optional[str], department: Optional[str], model_name:str) -> ChatResult: """calls the llm in blocking mode, returns the full result Args: @@ -104,7 +105,8 @@ def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: tokencount = total_tokens, department = department, messagecount= 1, - method = "Brainstorm")) + method = "Brainstorm"), + model = model_name) return ChatResult(content=ai_message.content) diff --git a/app/backend/core/datahelper.py b/app/backend/core/datahelper.py index b5538b46..4f456de9 100644 --- a/app/backend/core/datahelper.py +++ b/app/backend/core/datahelper.py @@ -8,6 +8,9 @@ from sqlalchemy.orm import Session, declarative_base Base = declarative_base() +DEPARTMENT_STRING_LENGTH=30 +MODEL_STRING_LENGTH=20 +METHOD_STRING_LENGTH=10 class Requestinfo(Base): """Information about an Request to MUCGPT that is stored in the database. @@ -16,9 +19,10 @@ class Requestinfo(Base): id = Column(Integer(), primary_key=True) tokencount = Column(Integer()) - department = Column(String(20), nullable=False) + department = Column(String(DEPARTMENT_STRING_LENGTH), nullable=False) + model = Column(String(MODEL_STRING_LENGTH), nullable=False) messagecount = Column(Integer()) - method = Column(String(10)) + method = Column(String(METHOD_STRING_LENGTH)) created_on = Column(DateTime(), default=datetime.now) updated_on = Column(DateTime(), default=datetime.now, onupdate=datetime.now) @@ -47,6 +51,9 @@ def setup_schema(self, base): base.metadata.create_all(self.engine) def addInfo(self, info: Requestinfo): + info.department = self.truncate_string(info.department, DEPARTMENT_STRING_LENGTH) + info.model = self.truncate_string(info.model, MODEL_STRING_LENGTH) + info.method = self.truncate_string(info.method, METHOD_STRING_LENGTH) with Session(self.engine) as session: session.add(info) session.commit() @@ -95,5 +102,10 @@ def export(self): # Das StringIO-Objekt in ein BytesIO-Objekt umwandeln memfile_bytesio = io.BytesIO(memfile.getvalue().encode()) return memfile_bytesio + + def truncate_string(self, s, length): + if len(s) > length: + return s[:length] + return s diff --git a/app/backend/core/modelhelper.py b/app/backend/core/modelhelper.py index 728dbea3..ef608d5c 100644 --- a/app/backend/core/modelhelper.py +++ b/app/backend/core/modelhelper.py @@ -52,14 +52,17 @@ def num_tokens_from_openai_model(messages: List[BaseMessage], model: str): print("Warning: model not found. Using cl100k_base encoding.") encoding = tiktoken.get_encoding("cl100k_base") if model in { + "gpt-3.5-turbo", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", + "gpt-4", "gpt-4-0314", "gpt-4-32k-0314", "gpt-4-0613", "gpt-4-32k-0613", "gpt-4-turbo", "gpt-4-turbo-2024-04-09", + "gpt-4o", "gpt-4o-mini", "gpt-4o-2024-05-13", }: @@ -68,16 +71,6 @@ def num_tokens_from_openai_model(messages: List[BaseMessage], model: str): elif model == "gpt-3.5-turbo-0301": tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n tokens_per_name = -1 # if there's a name, the role is omitted - elif "gpt-3.5-turbo" in model: - print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.") - return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613") - elif "gpt-4o" in model: - print( - "Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13.") - return num_tokens_from_messages(messages, model="gpt-4o-2024-05-13") - elif "gpt-4" in model: - print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.") - return num_tokens_from_messages(messages, model="gpt-4-0613") else: raise NotImplementedError( f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""" diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py index d054271a..8beb88b4 100644 --- a/app/backend/summarize/summarize.py +++ b/app/backend/summarize/summarize.py @@ -201,7 +201,8 @@ async def summarize(self, splits: List[str], language: str, department: Optiona tokencount = total_tokens, department = department, messagecount= 1, - method = "Sum")) + method = "Sum"), + model = model_name) return SummarizeResult(answer= final_summarys) diff --git a/tests/unit/test_modelhelper.py b/tests/unit/test_modelhelper.py index 6444436b..fa3898d6 100644 --- a/tests/unit/test_modelhelper.py +++ b/tests/unit/test_modelhelper.py @@ -52,16 +52,6 @@ def test_num_tokens_from_messages_invalid_openai_model(self): @pytest.mark.unit def test_num_tokens_from_messages_openai_gpt0301(self): assert num_tokens_from_messages(self.messages, "gpt-3.5-turbo-0301") == 34 - - @pytest.mark.asyncio - @pytest.mark.unit - def test_num_tokens_from_messages_openai_gptturbo(self): - assert num_tokens_from_messages(self.messages, "gpt-3.5-turbo") == 31 - - @pytest.mark.asyncio - @pytest.mark.unit - def test_num_tokens_from_messages_openai_gpt4(self): - assert num_tokens_from_messages(self.messages, "gpt-4") == 31 @pytest.mark.asyncio @pytest.mark.unit From 7c12aa1a4343552b65b9cd110bbe9f0d83cb0daf Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:49:37 +0200 Subject: [PATCH 26/34] :bug: fixed bug save to db --- app/backend/brainstorm/brainstorm.py | 4 ++-- app/backend/chat/chat.py | 8 ++++---- app/backend/summarize/summarize.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/app/backend/brainstorm/brainstorm.py b/app/backend/brainstorm/brainstorm.py index 9f65d6b0..2596e3a0 100644 --- a/app/backend/brainstorm/brainstorm.py +++ b/app/backend/brainstorm/brainstorm.py @@ -114,8 +114,8 @@ async def brainstorm(self, topic: str, language: str, department: Optional[str], tokencount = total_tokens, department = department, messagecount= 1, - method = "Brainstorm"), - model = model_name) + method = "Brainstorm", + model = model_name)) return BrainstormResult(answer=translation) def cleanup(self, chat_translate_result: str) -> str: diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py index 140595a3..ffff32a0 100644 --- a/app/backend/chat/chat.py +++ b/app/backend/chat/chat.py @@ -69,8 +69,8 @@ async def run_with_streaming(self, history: 'list[dict[str, str]]',max_tokens: i tokencount = num_tokens_from_messages(messages=msgs,model=model), #TODO richtiges Modell und tokenizer auswählen department = department, messagecount= len(history), - method = "Chat"), - model = model) + method = "Chat", + model = model)) info = ChunkInfo(requesttokens=num_tokens_from_messages([msgs[-1]],model), streamedtokens=num_tokens_from_messages([HumanMessage(result)], model)) yield Chunk(type="I", message=info, order=position) @@ -105,8 +105,8 @@ def run_without_streaming(self, history: "Sequence[dict[str, str]]", max_tokens: tokencount = total_tokens, department = department, messagecount= 1, - method = "Brainstorm"), - model = model_name) + method = "Brainstorm", + model = model_name)) return ChatResult(content=ai_message.content) diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py index 8beb88b4..e397335c 100644 --- a/app/backend/summarize/summarize.py +++ b/app/backend/summarize/summarize.py @@ -201,8 +201,8 @@ async def summarize(self, splits: List[str], language: str, department: Optiona tokencount = total_tokens, department = department, messagecount= 1, - method = "Sum"), - model = model_name) + method = "Sum", + model = model_name)) return SummarizeResult(answer= final_summarys) From 78e1f77144d1109a9b6cd790b71f78c003336e79 Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 28 Aug 2024 13:51:49 +0200 Subject: [PATCH 27/34] No LLMChain anymore --- app/backend/summarize/summarize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py index 8beb88b4..3bbaa7d0 100644 --- a/app/backend/summarize/summarize.py +++ b/app/backend/summarize/summarize.py @@ -3,7 +3,7 @@ from concurrent.futures import ThreadPoolExecutor from typing import Any, List, Optional, Tuple -from langchain.chains import LLMChain, SequentialChain +from langchain.chains import SequentialChain from langchain.prompts import PromptTemplate from langchain_community.callbacks import get_openai_callback from langchain_core.runnables.base import RunnableSerializable @@ -126,12 +126,12 @@ def run_io_tasks_in_parallel(self, tasks) -> List[Tuple[Summarys, int]]: return results - def call_and_cleanup(self, text: str, summarizeChain: LLMChain) -> Tuple[Summarys, int]: + def call_and_cleanup(self, text: str, summarizeChain: RunnableSerializable) -> Tuple[Summarys, int]: """calls summarization chain and cleans the data Args: text (str): text, to be summarized - summarizeChain (LLMChain): the chain, that summarizes and cleans the data + summarizeChain (RunnableSerializable): the chain, that summarizes and cleans the data Returns: Tuple[List[str], int]: the last n summaries, the number of consumed tokens From 2527c10485421c96c4b8639b448f75a3848be346 Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Wed, 28 Aug 2024 14:26:37 +0200 Subject: [PATCH 28/34] :bug: fixed model for /chat --- app/frontend/src/pages/chat/Chat.tsx | 6 +++--- app/frontend/src/service/storage.ts | 12 +++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index 52818f91..7b913a71 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -119,7 +119,7 @@ const Chat = () => { error && setError(undefined); setIsLoading(true); let askResponse: AskResponse = {} as AskResponse; - saveToDB([question, { ...askResponse, answer: "", tokens: 0 }, 0], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens) + saveToDB([question, { ...askResponse, answer: "", tokens: 0 }, 0], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens, LLM.model_name) try { const history: ChatTurn[] = answers.map(a => ({ user: a[0], bot: a[1].answer })); const request: ChatRequest = { @@ -166,7 +166,7 @@ const Chat = () => { } } if (startId == currentId) { - saveToDB([question, latestResponse, user_tokens], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens) + saveToDB([question, latestResponse, user_tokens], storage, startId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens, LLM.model_name) } } else { const parsedResponse: AskResponse = await response.json(); @@ -175,7 +175,7 @@ const Chat = () => { } setAnswers([...answers, [question, parsedResponse, 0]]); if (startId == currentId) { - saveToDB([question, parsedResponse, 0], storage, currentId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens) + saveToDB([question, parsedResponse, 0], storage, currentId, idCounter, setCurrentId, setIdCounter, language, temperature, system ? system : "", max_tokens, LLM.model_name) } } } catch (e) { diff --git a/app/frontend/src/service/storage.ts b/app/frontend/src/service/storage.ts index e494316a..1a87518d 100644 --- a/app/frontend/src/service/storage.ts +++ b/app/frontend/src/service/storage.ts @@ -34,7 +34,8 @@ export async function saveToDB( language?: string, temperature?: number, system_message?: string, - max_tokens?: number + max_tokens?: number, + model?: string ) { let openRequest = indexedDB.open(storage.db_name, storage.db_version); openRequest.onupgradeneeded = () => onUpgrade(openRequest, storage); @@ -66,8 +67,8 @@ export async function saveToDB( // if the chat does not exist in the DB let name: string = ""; let new_idcounter = id_counter; - if (language != undefined && temperature != undefined && system_message != undefined && max_tokens != undefined) { - name = await (await getChatName(a, language, temperature, system_message, max_tokens)).content; + if (language != undefined && temperature != undefined && system_message != undefined && max_tokens != undefined && model != undefined) { + name = await (await getChatName(a, language, temperature, system_message, max_tokens, model)).content; name = name.replaceAll('"', "").replaceAll(".", ""); } if (storage.objectStore_name === "chat") { @@ -99,7 +100,7 @@ export async function saveToDB( }; } -export async function getChatName(answers: any, language: string, temperature: number, system_message: string, max_tokens: number) { +export async function getChatName(answers: any, language: string, temperature: number, system_message: string, max_tokens: number, model: string) { const history: ChatTurn[] = [{ user: answers[0], bot: answers[1].answer }]; const request: ChatRequest = { history: [ @@ -113,7 +114,8 @@ export async function getChatName(answers: any, language: string, temperature: n language: language, temperature: temperature, system_message: system_message, - max_tokens: max_tokens + max_tokens: max_tokens, + model: model }; const response = await chatApi(request); handleRedirect(response); From db0eba9bcb5fdf0451e1b02c667d66bd0ff17207 Mon Sep 17 00:00:00 2001 From: pilitz <102222789+pilitz@users.noreply.github.com> Date: Wed, 28 Aug 2024 15:04:35 +0200 Subject: [PATCH 29/34] :hammer: refactored the deprecated LLMChain --- app/backend/brainstorm/brainstorm.py | 25 ++++++++++++------------- app/backend/summarize/summarize.py | 11 ++++++----- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/app/backend/brainstorm/brainstorm.py b/app/backend/brainstorm/brainstorm.py index 2596e3a0..8588ca25 100644 --- a/app/backend/brainstorm/brainstorm.py +++ b/app/backend/brainstorm/brainstorm.py @@ -1,7 +1,8 @@ +from operator import itemgetter from typing import Optional -from langchain.chains import LLMChain, SequentialChain from langchain.prompts import PromptTemplate +from langchain.schema.output_parser import StrOutputParser from langchain_community.callbacks import get_openai_callback from langchain_core.runnables.base import RunnableSerializable @@ -93,21 +94,19 @@ async def brainstorm(self, topic: str, language: str, department: Optional[str], "llm": model_name } llm = self.llm.with_config(configurable=config) + # get prompts + brainstorm_prompt = self.getBrainstormPrompt() + translation_prompt = self.getTranslationPrompt() # construct chains - brainstormChain = LLMChain(llm=llm, prompt=self.getBrainstormPrompt(), output_key="brainstorm") - translationChain = LLMChain(llm=llm, prompt=self.getTranslationPrompt(), output_key="translation") - overall_chain = SequentialChain( - chains=[brainstormChain, translationChain], - input_variables=["language", "topic"], - output_variables=["brainstorm","translation"]) - - + brainstormChain = brainstorm_prompt |llm | StrOutputParser() + translationChain = translation_prompt |llm | StrOutputParser() + # build complete chain + overall_chain = ({"brainstorm": brainstormChain,"language": itemgetter("language") }| translationChain ) + with get_openai_callback() as cb: - result = await overall_chain.acall({"topic": topic, "language": language}) + result = await overall_chain.ainvoke({"topic": topic, "language": language}) total_tokens = cb.total_tokens - - translation = result['translation'] - translation = self.cleanup(str(translation)) + translation = self.cleanup(str(result)) if self.config["log_tokens"]: self.repo.addInfo(Requestinfo( diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py index e397335c..1b95f7e5 100644 --- a/app/backend/summarize/summarize.py +++ b/app/backend/summarize/summarize.py @@ -3,12 +3,12 @@ from concurrent.futures import ThreadPoolExecutor from typing import Any, List, Optional, Tuple -from langchain.chains import LLMChain, SequentialChain +from langchain.chains import SequentialChain from langchain.prompts import PromptTemplate from langchain_community.callbacks import get_openai_callback -from langchain_core.runnables.base import RunnableSerializable -from langchain_core.pydantic_v1 import BaseModel, Field from langchain_core.prompts import PromptTemplate +from langchain_core.pydantic_v1 import BaseModel, Field +from langchain_core.runnables.base import RunnableSequence, RunnableSerializable from core.datahelper import Repository, Requestinfo from core.textsplit import splitPDF, splitText @@ -16,6 +16,7 @@ from core.types.LlmConfigs import LlmConfigs from summarize.summarizeresult import SummarizeResult + class DenserSummary(BaseModel): missing_entities: List[str] = Field(description="An list of missing entitys") denser_summary: str = Field(description="denser summary, covers every entity in detail") @@ -126,12 +127,12 @@ def run_io_tasks_in_parallel(self, tasks) -> List[Tuple[Summarys, int]]: return results - def call_and_cleanup(self, text: str, summarizeChain: LLMChain) -> Tuple[Summarys, int]: + def call_and_cleanup(self, text: str, summarizeChain: RunnableSequence) -> Tuple[Summarys, int]: """calls summarization chain and cleans the data Args: text (str): text, to be summarized - summarizeChain (LLMChain): the chain, that summarizes and cleans the data + summarizeChain (RunnableSequence): the chain, that summarizes and cleans the data Returns: Tuple[List[str], int]: the last n summaries, the number of consumed tokens From a9a64866b0690a160bc2248c56f01b8aa00c4000 Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 28 Aug 2024 16:22:30 +0200 Subject: [PATCH 30/34] =?UTF-8?q?=E2=9C=92=20describe=20config?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- config/mucgpt_config.schema.json | 430 +++++++++++++++++++++++++++++++ 2 files changed, 431 insertions(+), 1 deletion(-) create mode 100644 config/mucgpt_config.schema.json diff --git a/README.md b/README.md index d5ea14d4..d13f7f00 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ The documentation project is built with technologies we use in our projects (see See the [open issues](https://github.com/it-at-m/mucgpt/issues) for a full list of proposed features (and known issues). ## Run - Configure your environment in [config/default.json](config/default.json). Insert Model Endpoint and API Key for your connection to an OpenAI completion endpoint or an Azure OpenAI completions endpoint. + Configure your environment in [config/default.json](config/default.json). The schema of the configuration is [cofnig/mucgpt_config.schema.json](config/mucgpt_config.schema.json) described. Insert Model Endpoint and API Key for your connection to an OpenAI completion endpoint or an Azure OpenAI completions endpoint. ### Run locally ``` cd app\backend diff --git a/config/mucgpt_config.schema.json b/config/mucgpt_config.schema.json new file mode 100644 index 00000000..ff4d1c89 --- /dev/null +++ b/config/mucgpt_config.schema.json @@ -0,0 +1,430 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "http://example.com/example.json", + "type": "object", + "default": {}, + "title": "MUCGPT config", + "required": [ + "frontend", + "backend" + ], + "properties": { + "frontend": { + "type": "object", + "default": {}, + "title": "The frontend configuration", + "required": [ + "labels", + "alternative_logo" + ], + "properties": { + "labels": { + "type": "object", + "default": {}, + "title": "Labels used in the frontend", + "required": [ + "env_name" + ], + "properties": { + "env_name": { + "type": "string", + "default": "", + "title": "The env_name Schema", + "examples": [ + "MUCGPT" + ] + } + }, + "examples": [{ + "env_name": "MUCGPT" + }] + }, + "alternative_logo": { + "type": "boolean", + "default": false, + "title": "use the alternative logo, more of an easter egg", + "examples": [ + false + ] + } + }, + "examples": [{ + "labels": { + "env_name": "MUCGPT" + }, + "alternative_logo": false + }] + }, + "backend": { + "type": "object", + "default": {}, + "title": "The backend configuration", + "required": [ + "enable_auth", + "enable_database", + "sso_config", + "db_config", + "chat", + "brainstorm", + "sum", + "models" + ], + "properties": { + "enable_auth": { + "type": "boolean", + "default": false, + "title": "Enable authentification over OpenID Connect, currently only works with Azure Easy Auth..", + "examples": [ + false + ] + }, + "enable_database": { + "type": "boolean", + "default": false, + "title": "Use a postgresql database to save usage information", + "examples": [ + false + ] + }, + "sso_config": { + "type": "object", + "default": {}, + "title": "Describes the OpenidConnect Provider", + "required": [ + "sso_issuer", + "role" + ], + "properties": { + "sso_issuer": { + "type": "string", + "default": "", + "title": "The sso_issuer. Something like: https://mysso/auth/realms/myrealm\"", + "examples": [ + "TODO" + ] + }, + "role": { + "type": "string", + "default": "", + "title": "The role, we look if the user has a certain role.", + "examples": [ + "lhm-ab-mucgpt-user" + ] + } + }, + "examples": [{ + "sso_issuer": "TODO", + "role": "lhm-ab-mucgpt-user" + }] + }, + "db_config": { + "type": "object", + "default": {}, + "title": "The database configuration. Has to be a postgresql database.", + "required": [ + "db_host", + "db_name", + "db_user", + "db_passwort" + ], + "properties": { + "db_host": { + "type": "string", + "default": "", + "title": "The db_host", + "examples": [ + "TODO" + ] + }, + "db_name": { + "type": "string", + "default": "", + "title": "The db_name ", + "examples": [ + "postgres" + ] + }, + "db_user": { + "type": "string", + "default": "", + "title": "The db_user", + "examples": [ + "TODO" + ] + }, + "db_passwort": { + "type": "string", + "default": "", + "title": "The db_passwort ", + "examples": [ + "TODO" + ] + } + }, + "examples": [{ + "db_host": "TODO", + "db_name": "postgres", + "db_user": "TODO", + "db_passwort": "TODO" + }] + }, + "chat": { + "type": "object", + "default": {}, + "title": "The chat configuration", + "required": [ + "log_tokens" + ], + "properties": { + "log_tokens": { + "type": "boolean", + "default": false, + "title": "Log usage in database?", + "examples": [ + false + ] + } + }, + "examples": [{ + "log_tokens": false + }] + }, + "brainstorm": { + "type": "object", + "default": {}, + "title": "The brainstorm configuration", + "required": [ + "log_tokens" + ], + "properties": { + "log_tokens": { + "type": "boolean", + "default": false, + "title": "Log usage in database?", + "examples": [ + false + ] + } + }, + "examples": [{ + "log_tokens": false + }] + }, + "sum": { + "type": "object", + "default": {}, + "title": "The sumarization configuration", + "required": [ + "log_tokens" + ], + "properties": { + "log_tokens": { + "type": "boolean", + "default": false, + "title": "Log usage in database?", + "examples": [ + false + ] + } + }, + "examples": [{ + "log_tokens": false + }] + }, + "models": { + "type": "array", + "default": [], + "title": "Configuration for models", + "items": { + "type": "object", + "title": "One model configuration", + "required": [ + "type", + "model_name", + "endpoint", + "api_key", + "max_tokens", + "deployment", + "api_version" + ], + "properties": { + "type": { + "type": "string", + "title": "Either AZURE or OPENAI", + "examples": [ + "OPENAI", + "AZURE" + ] + }, + "model_name": { + "type": "string", + "title": "The name of the model", + "examples": [ + "gpt-4o-mini" + ] + }, + "endpoint": { + "type": "string", + "title": "The model endpoint", + "examples": [ + "mymodel.openai.azure.com/" + ] + }, + "api_key": { + "type": "string", + "title": "The api_key", + "examples": [ + "BLABLUBLAUBLAUBLA" + ] + }, + "max_tokens": { + "type": "integer", + "title": "The context length of the LLM", + "examples": [ + 128000, + 0 + ] + }, + "deployment": { + "type": "string", + "default": "", + "title": "The deployment, only needed for AZURE type Models", + "examples": [ + "chat" + ] + }, + "api_version": { + "type": "string", + "default": "", + "title": "The api_version", + "examples": [ + "\"2023-03-15-preview\"" + ] + } + }, + "examples": [{ + "type": "OPENAI", + "model_name": "mucgpt-mini", + "endpoint": "mucgptmini.openai.azure.com", + "api_key": "BALBLBLABUALB", + "max_tokens": 128000 + }, + { + "type": "AZURE", + "model_name": "mucgpt-maxi", + "deployment": "chat", + "endpoint": "mucgpt-maxi.openai.azure.com", + "api_key": "BALBABUALB", + "api_version": "2024-01", + "max_tokens": 1000000 + }] + }, + "examples": [ + [{ + "type": "OPENAI", + "model_name": "mucgpt-mini", + "endpoint": "mucgptmini.openai.azure.com", + "api_key": "BALBLBLABUALB", + "max_tokens": 128000 + }, + { + "type": "AZURE", + "model_name": "mucgpt-maxi", + "deployment": "chat", + "endpoint": "mucgpt-maxi.openai.azure.com", + "api_key": "BALBABUALB", + "api_version": "2024-01", + "max_tokens": 1000000 + }] + ] + } + }, + "examples": [{ + "enable_auth": false, + "enable_database": false, + "sso_config": { + "sso_issuer": "TODO", + "role": "lhm-ab-mucgpt-user" + }, + "db_config": { + "db_host": "TODO", + "db_name": "postgres", + "db_user": "TODO", + "db_passwort": "TODO" + }, + "chat": { + "log_tokens": false + }, + "brainstorm": { + "log_tokens": false + }, + "sum": { + "log_tokens": false + }, + "models": [{ + "type": "OPENAI", + "model_name": "mucgpt-mini", + "endpoint": "mucgptmini.openai.azure.com", + "api_key": "BALBLBLABUALB", + "max_tokens": 128000 + }, + { + "type": "AZURE", + "model_name": "mucgpt-maxi", + "deployment": "chat", + "endpoint": "mucgpt-maxi.openai.azure.com", + "api_key": "BALBABUALB", + "api_version": "2024-01", + "max_tokens": 1000000 + }] + }] + } + }, + "examples": [{ + "frontend": { + "labels": { + "env_name": "MUCGPT" + }, + "alternative_logo": false + }, + "backend": { + "enable_auth": false, + "enable_database": false, + "sso_config": { + "sso_issuer": "TODO", + "role": "lhm-ab-mucgpt-user" + }, + "db_config": { + "db_host": "TODO", + "db_name": "postgres", + "db_user": "TODO", + "db_passwort": "TODO" + }, + "chat": { + "log_tokens": false + }, + "brainstorm": { + "log_tokens": false + }, + "sum": { + "log_tokens": false + }, + "models": [{ + "type": "OPENAI", + "model_name": "mucgpt-mini", + "endpoint": "mucgptmini.openai.azure.com", + "api_key": "BALBLBLABUALB", + "max_tokens": 128000 + }, + { + "type": "AZURE", + "model_name": "mucgpt-maxi", + "deployment": "chat", + "endpoint": "mucgpt-maxi.openai.azure.com", + "api_key": "BALBABUALB", + "api_version": "2024-01", + "max_tokens": 1000000 + }] + } + }] +} \ No newline at end of file From d772801b327d3d80927ae11fa5a0be001bcf4e4e Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 28 Aug 2024 16:25:01 +0200 Subject: [PATCH 31/34] no more biceps files ot check --- .github/workflows/azure-dev-validation.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/azure-dev-validation.yaml b/.github/workflows/azure-dev-validation.yaml index b0c2c257..dd6b757c 100644 --- a/.github/workflows/azure-dev-validation.yaml +++ b/.github/workflows/azure-dev-validation.yaml @@ -12,11 +12,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - - - name: Build Bicep for linting - uses: azure/CLI@v1 - with: - inlineScript: az config set bicep.use_binary_from_path=false && az bicep build -f infra/main.bicep --stdout - name: Run Microsoft Security DevOps Analysis uses: microsoft/security-devops-action@preview From d93947cfd52183efaa467119576f17bc78827a41 Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 28 Aug 2024 16:33:55 +0200 Subject: [PATCH 32/34] =?UTF-8?q?=F0=9F=A4=99=20Make=20ruff=20happy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/backend/app.py | 10 ++++++---- app/backend/chat/chat.py | 3 +-- app/backend/core/datahelper.py | 2 +- app/backend/core/llmhelper.py | 5 ++++- app/backend/core/modelhelper.py | 17 ++++++++--------- app/backend/core/types/AppConfig.py | 1 + app/backend/init_app.py | 3 +-- app/backend/summarize/summarize.py | 5 +---- tests/integration/conftest.py | 2 +- tests/unit/test_datahelper.py | 2 +- tests/unit/test_llmhelper.py | 2 +- tests/unit/test_modelhelper.py | 9 +++++++-- 12 files changed, 33 insertions(+), 28 deletions(-) diff --git a/app/backend/app.py b/app/backend/app.py index 33351d0c..db6b7789 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -2,6 +2,8 @@ import logging import os from typing import List, cast + +from langchain_core.messages.human import HumanMessage from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware from quart import ( Blueprint, @@ -14,12 +16,12 @@ send_file, send_from_directory, ) -from langchain_core.messages.human import HumanMessage -from core.modelhelper import num_tokens_from_messages -from core.types.Config import ModelsConfig, ModelsDTO + from core.authentification import AuthentificationHelper, AuthError from core.helper import format_as_ndjson +from core.modelhelper import num_tokens_from_messages from core.types.AppConfig import AppConfig +from core.types.Config import ModelsConfig, ModelsDTO from core.types.countresult import CountResult from init_app import initApp @@ -168,7 +170,7 @@ async def getStatistics(): @bp.route("/counttokens", methods=["POST"]) async def counttokens(): - cfg = get_config_and_authentificate() + get_config_and_authentificate() if not request.is_json: return jsonify({"error": "request must be json"}), 415 diff --git a/app/backend/chat/chat.py b/app/backend/chat/chat.py index ffff32a0..25a43c64 100644 --- a/app/backend/chat/chat.py +++ b/app/backend/chat/chat.py @@ -1,9 +1,8 @@ from typing import AsyncGenerator, Optional, Sequence from langchain_community.callbacks import get_openai_callback -from langchain_core.messages import AIMessage +from langchain_core.messages import AIMessage, HumanMessage, SystemMessage from langchain_core.runnables.base import RunnableSerializable -from langchain_core.messages import HumanMessage, SystemMessage from chat.chatresult import ChatResult from core.datahelper import Repository, Requestinfo diff --git a/app/backend/core/datahelper.py b/app/backend/core/datahelper.py index 4f456de9..c6e7a8d5 100644 --- a/app/backend/core/datahelper.py +++ b/app/backend/core/datahelper.py @@ -27,7 +27,7 @@ class Requestinfo(Base): updated_on = Column(DateTime(), default=datetime.now, onupdate=datetime.now) def __repr__(self): - return f'' + return f'' diff --git a/app/backend/core/llmhelper.py b/app/backend/core/llmhelper.py index e935f994..65dffb05 100644 --- a/app/backend/core/llmhelper.py +++ b/app/backend/core/llmhelper.py @@ -1,10 +1,13 @@ +from typing import List + from langchain_community.llms.fake import FakeListLLM from langchain_core.runnables import ConfigurableField from langchain_core.runnables.base import RunnableSerializable from langchain_openai import AzureChatOpenAI, ChatOpenAI -from typing import List + from core.types.Config import ModelsConfig + class ModelsConfigurationException(Exception): pass diff --git a/app/backend/core/modelhelper.py b/app/backend/core/modelhelper.py index ef608d5c..17eb8859 100644 --- a/app/backend/core/modelhelper.py +++ b/app/backend/core/modelhelper.py @@ -1,15 +1,17 @@ from __future__ import annotations -from typing import List import tiktoken from langchain_core.messages.base import BaseMessage from mistral_common.protocol.instruct.messages import ( - UserMessage, SystemMessage, AssistantMessage + AssistantMessage, + SystemMessage, + UserMessage, ) from mistral_common.protocol.instruct.request import ChatCompletionRequest from mistral_common.tokens.tokenizers.mistral import MistralTokenizer -def num_tokens_from_messages(messages: List[BaseMessage], model: str): + +def num_tokens_from_messages(messages: list[BaseMessage], model: str): """Return the number of tokens used by a list of messages.""" if("gpt-" in model): return num_tokens_from_openai_model(messages=messages, model=model) @@ -17,11 +19,10 @@ def num_tokens_from_messages(messages: List[BaseMessage], model: str): return num_tokens_from_mistral_model(messages=messages, model=model) else: raise NotImplementedError( - f"""No tokenizer for model found. currently only openai and mistral are supported.""" + """No tokenizer for model found. currently only openai and mistral are supported.""" ) -def num_tokens_from_mistral_model(messages: List[BaseMessage], model: str): +def num_tokens_from_mistral_model(messages: list[BaseMessage], model: str): """Return the number of tokens used by a list of messages for a given mistral model.""" - num_tokens = 0 # see which tokenizer for which model is needed, https://github.com/mistralai/mistral-common/blob/main/README.md if(model == "mistral-large-2407" ): tokenizer = MistralTokenizer.v3() @@ -44,7 +45,7 @@ def num_tokens_from_mistral_model(messages: List[BaseMessage], model: str): ChatCompletionRequest(messages=mistral_messages)) return len(tokenized.tokens) -def num_tokens_from_openai_model(messages: List[BaseMessage], model: str): +def num_tokens_from_openai_model(messages: list[BaseMessage], model: str): """Return the number of tokens used by a list of messages for a given openai model.""" try: encoding = tiktoken.encoding_for_model(model) @@ -67,10 +68,8 @@ def num_tokens_from_openai_model(messages: List[BaseMessage], model: str): "gpt-4o-2024-05-13", }: tokens_per_message = 3 - tokens_per_name = 1 elif model == "gpt-3.5-turbo-0301": tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n - tokens_per_name = -1 # if there's a name, the role is omitted else: raise NotImplementedError( f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""" diff --git a/app/backend/core/types/AppConfig.py b/app/backend/core/types/AppConfig.py index 44deae5f..d7939976 100644 --- a/app/backend/core/types/AppConfig.py +++ b/app/backend/core/types/AppConfig.py @@ -1,4 +1,5 @@ from typing import List, TypedDict + from brainstorm.brainstorm import Brainstorm from chat.chat import Chat from core.authentification import AuthentificationHelper diff --git a/app/backend/init_app.py b/app/backend/init_app.py index 30d10e8b..90ebed16 100644 --- a/app/backend/init_app.py +++ b/app/backend/init_app.py @@ -1,5 +1,6 @@ import os from typing import Tuple + from brainstorm.brainstorm import Brainstorm from chat.chat import Chat from core.authentification import AuthentificationHelper @@ -11,8 +12,6 @@ from summarize.summarize import Summarize - - def initApproaches(cfg: BackendConfig, repoHelper: Repository) -> Tuple[Chat, Brainstorm, Summarize]: """init different approaches diff --git a/app/backend/summarize/summarize.py b/app/backend/summarize/summarize.py index bae4a4de..17e7c440 100644 --- a/app/backend/summarize/summarize.py +++ b/app/backend/summarize/summarize.py @@ -1,10 +1,7 @@ -import json -import re from concurrent.futures import ThreadPoolExecutor -from typing import Any, List, Optional, Tuple +from typing import List, Optional, Tuple from langchain.chains import SequentialChain -from langchain.prompts import PromptTemplate from langchain_community.callbacks import get_openai_callback from langchain_core.prompts import PromptTemplate from langchain_core.pydantic_v1 import BaseModel, Field diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index cad4b403..e6689620 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,4 +1,5 @@ import os + import openai import pytest import pytest_asyncio @@ -6,7 +7,6 @@ import app - @pytest.fixture def mock_openai_chatcompletion(monkeypatch): class AsyncChatCompletionIterator: diff --git a/tests/unit/test_datahelper.py b/tests/unit/test_datahelper.py index 55cacf49..ace4beaa 100644 --- a/tests/unit/test_datahelper.py +++ b/tests/unit/test_datahelper.py @@ -3,7 +3,7 @@ import pytest from sqlalchemy import Engine -from core.datahelper import Requestinfo, Repository +from core.datahelper import Repository, Requestinfo class Test_Datahelper(unittest.TestCase): diff --git a/tests/unit/test_llmhelper.py b/tests/unit/test_llmhelper.py index cd357413..7ec597ec 100644 --- a/tests/unit/test_llmhelper.py +++ b/tests/unit/test_llmhelper.py @@ -3,7 +3,7 @@ import pytest from langchain_core.runnables.base import RunnableSerializable -from core.llmhelper import getModel, ModelsConfigurationException +from core.llmhelper import ModelsConfigurationException, getModel class Test_LLMhelper(unittest.TestCase): diff --git a/tests/unit/test_modelhelper.py b/tests/unit/test_modelhelper.py index fa3898d6..d7e6efcd 100644 --- a/tests/unit/test_modelhelper.py +++ b/tests/unit/test_modelhelper.py @@ -1,10 +1,15 @@ import unittest import pytest - -from core.modelhelper import num_tokens_from_messages, num_tokens_from_openai_model, num_tokens_from_mistral_model from langchain_core.messages.base import BaseMessage +from core.modelhelper import ( + num_tokens_from_messages, + num_tokens_from_mistral_model, + num_tokens_from_openai_model, +) + + class Test_Modelhelper(unittest.TestCase): def setUp(self): From 4dbd78b24d496ad77e4c72923684471da36617b1 Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 28 Aug 2024 16:37:32 +0200 Subject: [PATCH 33/34] Support only for newer python versions --- .github/workflows/python-test.yaml | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-test.yaml b/.github/workflows/python-test.yaml index fd32eb8e..ab904469 100644 --- a/.github/workflows/python-test.yaml +++ b/.github/workflows/python-test.yaml @@ -14,7 +14,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-20.04"] - python_version: ["3.9", "3.10", "3.11"] + python_version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 - name: Setup python diff --git a/README.md b/README.md index d13f7f00..ea81ebd0 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Why should you use MUCGPT? See for yourself: The documentation project is built with technologies we use in our projects (see [requirements-dev.txt](/requirements-dev.txt)): ### Backend: -* [Python 3.9, 3.10 or 3.11](https://www.python.org/downloads/) +* [Python 3.10, 3.11 or 3.12](https://www.python.org/downloads/) * [Quart](https://pgjones.gitlab.io/quart/) * [LangChain](https://www.langchain.com/) From dc362830fd1adca0f0d75d82b568ba89cd031a75 Mon Sep 17 00:00:00 2001 From: "michael.jaumann" Date: Wed, 28 Aug 2024 16:45:31 +0200 Subject: [PATCH 34/34] =?UTF-8?q?=F0=9F=9B=A0=20fix=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/backend/core/datahelper.py | 2 +- tests/unit/test_datahelper.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/app/backend/core/datahelper.py b/app/backend/core/datahelper.py index c6e7a8d5..0f9b9027 100644 --- a/app/backend/core/datahelper.py +++ b/app/backend/core/datahelper.py @@ -27,7 +27,7 @@ class Requestinfo(Base): updated_on = Column(DateTime(), default=datetime.now, onupdate=datetime.now) def __repr__(self): - return f'' + return f'' diff --git a/tests/unit/test_datahelper.py b/tests/unit/test_datahelper.py index ace4beaa..c754c2c9 100644 --- a/tests/unit/test_datahelper.py +++ b/tests/unit/test_datahelper.py @@ -10,13 +10,14 @@ class Test_Datahelper(unittest.TestCase): @pytest.mark.asyncio @pytest.mark.unit def test_requestinfo_creation(self): - request = Requestinfo(tokencount=100, department='IT', messagecount=50, method='GET') + request = Requestinfo(tokencount=100, department='IT', messagecount=50, method='GET', model="MUCGPT") self.assertIsInstance(request, Requestinfo) self.assertEqual(request.tokencount, 100) self.assertEqual(request.department, 'IT') self.assertEqual(request.messagecount, 50) self.assertEqual(request.method, 'GET') - self.assertEqual(str(request), '') + self.assertEqual(request.model, "MUCGPT") + self.assertEqual(str(request), '') @pytest.mark.asyncio @pytest.mark.unit