Merge pull request #61 from it-at-m/dockerify

Docker instead of azure, Multiple language models
it-at-m · Aug 29, 2024 · 28d613b · 28d613b
2 parents 60c2620 + dc36283
commit 28d613b
Show file tree

Hide file tree

Showing 73 changed files with 1,474 additions and 1,843 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,8 @@
+.git*
+**/*.pyc
+.venv/
+/tests
+/notebooks
+/infra
+*.ipynb
+**/node_modules
diff --git a/.github/workflows/azure-dev-validation.yaml b/.github/workflows/azure-dev-validation.yaml
@@ -12,11 +12,6 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v4
-
-      - name: Build Bicep for linting
-        uses: azure/CLI@v1
-        with:
-          inlineScript: az config set bicep.use_binary_from_path=false && az bicep build -f infra/main.bicep --stdout
 
       - name: Run Microsoft Security DevOps Analysis
         uses: microsoft/security-devops-action@preview

diff --git a/.github/workflows/python-test.yaml b/.github/workflows/python-test.yaml
@@ -14,7 +14,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-20.04"]
-        python_version: ["3.9", "3.10", "3.11"]
+        python_version: ["3.10", "3.11", "3.12"]
     steps:
         - uses: actions/checkout@v4
         - name: Setup python

diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,24 @@
+# syntax=docker/dockerfile:1
+
+FROM node:19-alpine AS builder
+
+ENV GENERATE_SOURCEMAP=false
+ENV NODE_OPTIONS=--max_old_space_size=4096
+WORKDIR /build
+COPY app/ .
+WORKDIR /build/frontend
+RUN npm install
+RUN npm run build
+
+FROM python:3.12
+WORKDIR /code
+COPY --from=builder /build/backend .
+
+ARG fromconfig="./config/default.json"
+COPY $fromconfig /code/config.json
+COPY "./config/base.json"  /code/base.json
+
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+
+EXPOSE 8000
+CMD ["gunicorn","main:app"]
diff --git a/README.md b/README.md
@@ -31,9 +31,8 @@ Why should you use MUCGPT? See for yourself:
 
 The documentation project is built with technologies we use in our projects (see [requirements-dev.txt](/requirements-dev.txt)):
 ### Backend:
-* [Python 3.9, 3.10 or 3.11](https://www.python.org/downloads/)
+* [Python 3.10, 3.11 or 3.12](https://www.python.org/downloads/)
 * [Quart](https://pgjones.gitlab.io/quart/)
-* [Azure OpenAI](https://azure.microsoft.com/de-de/products/ai-services/openai-service)
 * [LangChain](https://www.langchain.com/)
 
 ### Frontend:
@@ -43,15 +42,15 @@ The documentation project is built with technologies we use in our projects (see
 * [Javascript](https://wiki.selfhtml.org/wiki/JavaScript)
 
 ### Deployment:
-  * [Azure Developer CLI](https://learn.microsoft.com/en-us/azure/developer/azure-developer-cli/install-azd?tabs=winget-windows%2Cbrew-mac%2Cscript-linux&pivots=os-windows)
   * [Node.js 14+](https://nodejs.org/en/download/package-manager)
   * [Git](https://git-scm.com/downloads)
-  * [Powershell 7+ (pwsh)](https://github.com/powershell/powershell)
+  * Python 12
+  * Docker
 
 ## Table of contents
 * [Built With](#built-with)
 * [Roadmap](#roadmap)
-* [Set up](#set-up-on-azure)
+* [Run](#Run)
 * [Documentation](#documentation)
 * [Contributing](#contributing)
 * [License](#license)
@@ -64,60 +63,34 @@ The documentation project is built with technologies we use in our projects (see
 
 See the [open issues](https://github.com/it-at-m/mucgpt/issues) for a full list of proposed features (and known issues).
 
+## Run
+ Configure your environment in [config/default.json](config/default.json). The schema of the configuration is [cofnig/mucgpt_config.schema.json](config/mucgpt_config.schema.json) described.  Insert Model Endpoint and API Key for your connection to an OpenAI completion endpoint or an Azure OpenAI completions endpoint.
+### Run locally
+```
+cd app\backend
+pip install --no-cache-dir --upgrade -r requirements.txt   
+cd ..\frontend
+npm run build
+cd ..\backend
+$env:MUCGPT_CONFIG="path to default.json"
+$env:MUCGPT_BASE_CONFIG="path to base.json"
+python -m  quart --app main:app run
+```
 
-## Set up on Azure
-As this project bases on a template of Microsoft Azure see also [here](https://github.com/Azure-Samples/azure-search-openai-demo?tab=readme-ov-file#azure-deployment) for the deployment documentation.
-### You need the following requirements to set up MUCGPT on Azure:
-* Azure account
-* Azure subscription with access enabled for the Azure OpenAI service
-* Account Permissions:
-  * `Microsoft.Authorization/roleAssignments/write`
-  * Role Based Access Control Administrator, User Access Administrator, or Owner
-  * subscription-level permissions
-  * `Microsoft.Resources/deployments/write` on the subscription level 
 
+### Run with docker
+1. Build an Image
+   ``` docker build --tag mucgpt-local . --build-arg   fromconfig="./config/default.json"```
+2. Run the image ```docker run --detach --publish 8080:8000  mucgpt-local```
 
-### Cost estimation: 
-Pricing varies per region and usage, so it isn't possible to predict exact costs for your usage. However, you can try the [Azure pricing calculator](https://azure.microsoft.com/en-us/pricing/calculator/) for the resources below.
-* Azure App Service
-* Azure OpenAI
-* Flexibler Azure Database for PostgreSQL-Server
-* App Service-Plan
-
-### Deploying
-1. Install the [required tools](#built-with)
-2. Clone the repository with the command `git clone https://github.com/it-at-m/mucgpt` and switch in your terminal to the folder
-3. Login to your Azure account: `azd auth login`
-4. Create a new azd environemnt with `azd env new`. Enter a name that will be used for the resource group. This will create a new folder in the `.azure` folder, and set it as the active environment for any calls to `azd` going forward.
-5. (Optional) This is the point where you can customize the deployment by setting environment variables, in order to use existing resources, enable optional features (such as auth or vision), or deploy to free tiers.
-6. Run `azd up` - This will provision Azure resources and deploy this sample to those resources.
-7. After the application has been successfully deployed you will see a URL printed to the console. Click that URL to interact with the application in your browser. It will look like the following:  
-![](/docs/endpoint.png)
-    > **_NOTE:_**  It may take 5-10 minutes after you see 'SUCCESS' for the application to be fully deployed. If you see a "Python Developer" welcome screen or an error page, then wait a bit and refresh the page.
-
-### Deploying again
-If you've only changed the backend/frontend code in the `app` folder, then you don't need to re-provision the Azure resources. You can just run:
-
-`azd deploy`
-
-If you've changed the infrastructure files (`infra` folder or `azure.yaml`), then you'll need to re-provision the Azure resources. You can do that by running:
-
-`azd up`
-
-### Running locally
-You can only run locally after having successfully run the `azd up` command. If you haven't yet, follow the steps in [Deploying](#deploying) above.
-
-1. Run `azd auth login`
-2. Change dir to app
-3. Run `./start.ps1` or `./start.sh` to start the app
 
 ## Documentation
 ![Architecture](docs/appcomponents_en.png)  
  The architecture of MUCGPT is divided into two parts, the frontend and the backend. MUCGPT is deployed on Microsoft Azure as an AppService with a PostgreSQL database and an Azure OpenAI resource.
 
 The frontend is based on a template from [Microsoft Azure](https://github.com/Azure-Samples/azure-search-openai-demo) and is implemented using React, Typescript and Javascript.
 
-The framework used to implement the backend of MUCGPT is called [Quart](https://pgjones.gitlab.io/quart/). It is a fast Python web microframework for building JSON APIs, rendering and serving HTML, serving web sockets and much more. The backend uses LangChain to connect to LLMs like Chat-GPT-3.5, which is currently in use.
+The framework used to implement the backend of MUCGPT is called [Quart](https://pgjones.gitlab.io/quart/). It is a fast Python web microframework for building JSON APIs, rendering and serving HTML, serving web sockets and much more. The backend uses LangChain to connect to LLMs. In the [config](config/default.json) file, you can provide the user with various LLM options to select from in the frontend.
 
 
 For more information about all the features of MUCGPT click [here](/docs/FEATURES.md).  

diff --git a/app/backend/app.py b/app/backend/app.py
@@ -1,11 +1,9 @@
 import json
 import logging
 import os
-import time
-from typing import cast
+from typing import List, cast
 
-from azure.monitor.opentelemetry import configure_azure_monitor
-from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor
+from langchain_core.messages.human import HumanMessage
 from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware
 from quart import (
     Blueprint,
@@ -21,8 +19,9 @@
 
 from core.authentification import AuthentificationHelper, AuthError
 from core.helper import format_as_ndjson
-from core.modelhelper import num_tokens_from_message
+from core.modelhelper import num_tokens_from_messages
 from core.types.AppConfig import AppConfig
+from core.types.Config import ModelsConfig, ModelsDTO
 from core.types.countresult import CountResult
 from init_app import initApp
 
@@ -67,7 +66,7 @@ async def sum():
         text = request_json["text"] if file is None else None
         splits = impl.split(detaillevel=detaillevel, file=file, text=text)
 
-        r = await impl.summarize(splits = splits, department=department, language=request_json["language"] or "Deutsch")
+        r = await impl.summarize(splits = splits, department=department, language=request_json["language"] or "Deutsch", model_name=request_json["model"] or "gpt-4o-mini")
         return jsonify(r)
     except Exception as e:
         logging.exception("Exception in /sum")
@@ -84,7 +83,7 @@ async def brainstorm():
 
     try:
         impl = cfg["brainstorm_approaches"]
-        r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department)
+        r = await impl.brainstorm(topic=request_json["topic"],language= request_json["language"] or "Deutsch", department=department, model_name=request_json["model"] or "gpt-4o-mini")
         return jsonify(r)
     except Exception as e:
         logging.exception("Exception in /brainstorm")
@@ -104,10 +103,12 @@ async def chat_stream():
         temperature=request_json['temperature'] or 0.7
         max_tokens=request_json['max_tokens'] or 4096
         system_message = request_json['system_message'] or None
+        model = request_json['model'] or "gpt-4o-mini"
         response_generator = impl.run_with_streaming(history= request_json["history"],
                                                     temperature=temperature,
                                                     max_tokens=max_tokens,
                                                     system_message=system_message,
+                                                    model=model,
                                                     department= department)
         response = await make_response(format_as_ndjson(response_generator))
         response.timeout = None # type: ignore
@@ -128,13 +129,15 @@ async def chat():
         impl = cfg["chat_approaches"]
         temperature=request_json['temperature'] or 0.7
         max_tokens=request_json['max_tokens'] or 4096
+        model_name=request_json['model'] or "gpt-4o-mini"
         system_message = request_json['system_message'] or None
         history =  request_json["history"]
         chatResult = impl.run_without_streaming(history= history,
                                                     temperature=temperature,
                                                     max_tokens=max_tokens,
                                                     system_message=system_message,
-                                                    department= department)
+                                                    department= department,
+                                                    model_name= model_name)
         return jsonify(chatResult)
     except Exception as e:
         logging.exception("Exception in /chat")
@@ -143,7 +146,16 @@ async def chat():
 @bp.route("/config", methods=["GET"])
 async def getConfig():
     cfg = get_config_and_authentificate()
-    return jsonify(cfg["configuration_features"])
+    frontend_features = cfg["configuration_features"]["frontend"]
+    models= cast(List[ModelsConfig], cfg["configuration_features"]["backend"]["models"])
+    models_dto_list = []
+    for model in models:
+        dto = ModelsDTO(model_name=model["model_name"], max_tokens=model["max_tokens"], description=model["description"])
+        models_dto_list.append(dto)
+    return jsonify({
+        "frontend": frontend_features,
+        "models": models_dto_list
+    })
 
 @bp.route("/statistics", methods=["GET"])
 async def getStatistics():
@@ -158,14 +170,14 @@ async def getStatistics():
 
 @bp.route("/counttokens", methods=["POST"])
 async def counttokens():
-    cfg = get_config_and_authentificate()
+    get_config_and_authentificate()
     if not request.is_json:
         return jsonify({"error": "request must be json"}), 415
 
-    model = cfg["model_info"]["model"]
     request_json = await request.get_json()
     message=request_json['text'] or ""
-    counted_tokens = num_tokens_from_message(message,model)
+    model = request_json['model']['model_name'] or "gpt-4o-mini"
+    counted_tokens = num_tokens_from_messages([HumanMessage(message)], model)
     return jsonify(CountResult(count=counted_tokens))
 
 @bp.route("/statistics/export", methods=["GET"])
@@ -207,29 +219,14 @@ def get_department(request: Request):
     else:
         return None
 
-
-
-@bp.before_request
-async def ensure_openai_token():
-    cfg = get_config()
-    openai_token = cfg["model_info"]["openai_token"]
-    if openai_token.expires_on < time.time() + 60:
-        openai_token = await cfg["azure_credential"].get_token("https://cognitiveservices.azure.com/.default")
-        # updates tokens, the approaches should get the newest version of the token via reference 
-        cfg["model_info"]["openai_token"] = openai_token
-        cfg["model_info"]["openai_api_key"] = openai_token.token
-
 @bp.before_app_serving
 async def setup_clients():
     current_app.config[APPCONFIG_KEY] = await initApp()
 
 def create_app():
-    if os.getenv("APPLICATIONINSIGHTS_CONNECTION_STRING"):
-        configure_azure_monitor()
-        AioHttpClientInstrumentor().instrument()
     app = Quart(__name__)
     app.register_blueprint(bp)
-    app.asgi_app = OpenTelemetryMiddleware(app.asgi_app)
+    app.asgi_app = OpenTelemetryMiddleware(app = app.asgi_app)
     # Level should be one of https://docs.python.org/3/library/logging.html#logging-levels
     logging.basicConfig(level=os.getenv("APP_LOG_LEVEL", "ERROR"))
     return app
diff --git a/app/backend/brainstorm/brainstorm.py b/app/backend/brainstorm/brainstorm.py
@@ -1,13 +1,13 @@
+from operator import itemgetter
 from typing import Optional
 
-from langchain.chains import LLMChain, SequentialChain
 from langchain.prompts import PromptTemplate
+from langchain.schema.output_parser import StrOutputParser
 from langchain_community.callbacks import get_openai_callback
 from langchain_core.runnables.base import RunnableSerializable
 
 from brainstorm.brainstormresult import BrainstormResult
 from core.datahelper import Repository, Requestinfo
-from core.types.AzureChatGPTConfig import AzureChatGPTConfig
 from core.types.Config import ApproachConfig
 from core.types.LlmConfigs import LlmConfigs
 
@@ -56,10 +56,9 @@ class Brainstorm:
     Text: 
     {brainstorm}"""
 
-    def __init__(self, llm: RunnableSerializable, config: ApproachConfig, model_info: AzureChatGPTConfig, repo: Repository):
+    def __init__(self, llm: RunnableSerializable, config: ApproachConfig, repo: Repository):
         self.llm = llm
         self.config = config
-        self.model_info = model_info
         self.repo = repo
 
     def getBrainstormPrompt(self) -> PromptTemplate:
@@ -78,45 +77,44 @@ def getTranslationPrompt(self) -> PromptTemplate:
         return PromptTemplate(input_variables=["language", "brainstorm"], template=self.user_translate_prompt)
 
 
-    async def brainstorm(self, topic: str, language: str, department: Optional[str]) -> BrainstormResult:
+    async def brainstorm(self, topic: str, language: str, department: Optional[str], model_name:str) -> BrainstormResult:
         """Generates ideas for a given topic structured in markdown, translates the result into the target language 
 
         Args:
             topic (str): topic of the brainstorming
             language (str): target language
             department (Optional[str]): department, who is responsible for the call
+            model_name (str): the choosen llm
 
         Returns:
             BrainstormResult: the structured markdown with ideas about the topic
         """
         # configure
         config: LlmConfigs = {
-            "llm_api_key": self.model_info["openai_api_key"]
+            "llm": model_name
         }
         llm = self.llm.with_config(configurable=config)
-
+        # get prompts
+        brainstorm_prompt = self.getBrainstormPrompt()
+        translation_prompt = self.getTranslationPrompt()
         # construct chains
-        brainstormChain = LLMChain(llm=llm, prompt=self.getBrainstormPrompt(), output_key="brainstorm")
-        translationChain = LLMChain(llm=llm, prompt=self.getTranslationPrompt(), output_key="translation")
-        overall_chain = SequentialChain(
-            chains=[brainstormChain, translationChain], 
-            input_variables=["language", "topic"],
-            output_variables=["brainstorm","translation"])
-
-
+        brainstormChain =  brainstorm_prompt |llm | StrOutputParser()
+        translationChain = translation_prompt |llm | StrOutputParser()
+        # build complete chain
+        overall_chain = ({"brainstorm": brainstormChain,"language": itemgetter("language") }| translationChain )
+
         with get_openai_callback() as cb:
-            result = await overall_chain.acall({"topic": topic, "language": language})
+            result = await overall_chain.ainvoke({"topic": topic, "language": language})
         total_tokens = cb.total_tokens
-
-        translation = result['translation']     
-        translation = self.cleanup(str(translation))
+        translation = self.cleanup(str(result))
 
         if self.config["log_tokens"]:
             self.repo.addInfo(Requestinfo( 
                 tokencount = total_tokens,
                 department = department,
                 messagecount=  1,
-                method = "Brainstorm"))
+                method = "Brainstorm",
+                model = model_name))
         return BrainstormResult(answer=translation)
 
     def cleanup(self, chat_translate_result: str) -> str: