Add LLM observability tool Phoenix to QA in a box stack

- Includes doc to run Phoenix locally - Includes doc to run Phoenix in K8s
elotl · Jan 6, 2025 · 11be6f1 · 11be6f1
1 parent f198b47
commit 11be6f1
Show file tree

Hide file tree

Showing 13 changed files with 449 additions and 37 deletions.
diff --git a/demo/llm.rag.service/chat-serveragllmpluslb.yaml b/demo/llm.rag.service/chat-serveragllmpluslb.yaml
@@ -15,11 +15,11 @@ spec:
         model: serveragllm
         elotl-luna: "true"
       annotations:
-        node.elotl.co/instance-type-regexp: "^(t3\.xlarge|n2-standard-4)$"
+        node.elotl.co/instance-type-regexp: "^(t3.xlarge|n2-standard-4)$"
     spec:
       containers:
         - name: serveragllm
-          image: elotl/serveragllm:v1.2.1
+          image: elotl/serveragllm:v1.3
           imagePullPolicy: Always
           ports:
             - containerPort: 8000

diff --git a/demo/llm.vdb.service/createvdb.yaml b/demo/llm.vdb.service/createvdb.yaml
@@ -6,7 +6,7 @@ metadata:
     app: modeldataingest
     elotl-luna: "true"
   annotations:
-    node.elotl.co/instance-type-regexp: "^(t3\.xlarge|n2-standard-4)$"
+    node.elotl.co/instance-type-regexp: "^(t3.xlarge|n2-standard-4)$"
 spec:
   ttlSecondsAfterFinished: 120
   template:

diff --git a/dockers/llm.phoenix.service/phoenix.yaml b/dockers/llm.phoenix.service/phoenix.yaml
@@ -0,0 +1,76 @@
+# phoenix.yaml
+# Source: https://docs.arize.com/phoenix/deployment/kubernetes
+apiVersion: v1
+kind: Namespace
+metadata:
+  labels:
+    name: phoenix
+  name: phoenix
+---
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app: phoenix
+  name: phoenix
+  namespace: phoenix
+spec:
+  ports:
+  - port: 6006
+    protocol: TCP
+    targetPort: 6006
+  selector:
+    app: phoenix
+  type: ClusterIP
+---
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: phoenix
+  namespace: phoenix
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: phoenix
+  template:
+    metadata:
+      # Assume k8s pod service discovery for prometheus
+      annotations:
+        prometheus.io/path: /metrics
+        prometheus.io/port: "9090"
+        prometheus.io/scrape: "true"
+      labels:
+        app: phoenix
+    spec:
+      containers:
+      - name: phoenix 
+        args:
+        - -m
+        - phoenix.server.main
+        - serve
+        command:
+        - python
+        env:
+        - name: PHOENIX_WORKING_DIR
+          value: /mnt/data
+        - name: PHOENIX_PORT
+          value: "6006"
+        # The version of phoenix you want should be used here  
+        image: docker.io/arizephoenix/phoenix:latest
+        ports:
+        - containerPort: 6006
+        - containerPort: 4317
+        - containerPort: 9090
+        volumeMounts:
+        - mountPath: /mnt/data
+          name: phoenix
+  volumeClaimTemplates:
+  - metadata:
+      name: phoenix
+    spec:
+      accessModes:
+        - ReadWriteOnce
+      resources:
+        requests:
+          storage: 8Gi
diff --git a/dockers/llm.rag.service/Dockerfile b/dockers/llm.rag.service/Dockerfile
@@ -1,32 +1,84 @@
 # syntax=docker/dockerfile-upstream:master
-# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
-FROM python:3.11-slim as base-container
+FROM python:3.9-slim AS base-container
 
 # Automatically set by buildx
 ARG TARGETPLATFORM
 
 ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV PIP_NO_CACHE_DIR=1
 
+# Install system dependencies
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-  build-essential \
-  ca-certificates \
-  ccache \
-  curl \
-  libssl-dev ca-certificates make \
-  git python3-pip && \
-  rm -rf /var/lib/apt/lists/*
+    build-essential \
+    ca-certificates \
+    ccache \
+    curl \
+    libssl-dev \
+    make \
+    git \
+    python3-pip \
+    python3-dev \
+    cmake \
+    pkg-config \
+    && rm -rf /var/lib/apt/lists/*
 
 WORKDIR /serveragllm
 
+# Upgrade pip
+RUN pip3 install --no-cache-dir --upgrade pip setuptools wheel
+
+# Install dependencies in separate layers
+RUN pip3 install --no-cache-dir \
+    "openai" \
+    "fastapi" \
+    "uvicorn[standard]" \
+    "boto3"
+
+RUN pip3 install --no-cache-dir \
+    "langchain" \
+    "langchain_community" \
+    "langchain_huggingface"
+
+RUN pip3 install --no-cache-dir \
+    "sentence-transformers" \
+    "unstructured" \
+    "faiss-cpu"
+
+RUN pip3 install --no-cache-dir \
+    "arize-phoenix" \
+    "openinference-instrumentation-langchain"
+
+RUN pip3 install --no-cache-dir \
+    "arize-phoenix[evals]" \
+    "tiktoken" \
+    "nest-asyncio"
+
+RUN pip3 install --no-cache-dir \
+    "httpx<0.28"
+
+RUN pip3 install --no-cache-dir \
+    "langchain-openai"
+
+RUN pip3 install --no-cache-dir \
+    "langchain-huggingface" \
+    "opentelemetry-api" \
+    "opentelemetry-instrumentation" \
+    "opentelemetry-semantic-conventions" \
+    "opentelemetry-exporter-otlp-proto-http" \
+    "opentelemetry-sdk" \
+    "opentelemetry-exporter-otlp" \
+    "openai>=1"
+
+# Copy application files
+COPY pyproject.toml .
 COPY __init__.py .
+COPY common.py .
 COPY serveragllm.py .
 COPY serverragllm_jira_cvs_local.py .
-COPY pyproject.toml .
 
-RUN --mount=type=cache,target=/root/.cache/pip \
-  pip3 install -v --no-cache-dir \
-  "openai" "langchain" "langchain_community" "langchain_huggingface" "unstructured" "sentence-transformers" "faiss-cpu" "uvicorn[standard]" "fastapi" "boto3" && \
-  pip3 install --no-cache-dir -e .
+# Install the local package
+RUN pip3 install -e .
 
 EXPOSE 8000
 

diff --git a/dockers/llm.rag.service/pyproject.toml b/dockers/llm.rag.service/pyproject.toml
@@ -3,3 +3,22 @@ name = "ragllm"  # Required
 version = "1.0.0"  # Required
 description = "Web service question/answer RAG-enhanced LLM model"
 requires-python = ">=3.8"
+dependencies = [
+    "openai",
+    "langchain",
+    "langchain_community",
+    "langchain_huggingface",
+    "unstructured",
+    "sentence-transformers",
+    "faiss-cpu",
+    "uvicorn[standard]",
+    "fastapi",
+    "boto3",
+    "arize-phoenix",
+    "openinference-instrumentation-langchain",
+]
+[tool.setuptools]
+py-modules = [
+    "serveragllm",
+    "serverragllm_jira_cvs_local"
+]
diff --git a/dockers/llm.rag.service/requirements.txt b/dockers/llm.rag.service/requirements.txt
@@ -0,0 +1,20 @@
+faiss-cpu
+fastapi
+langchain-huggingface
+uvicorn
+opentelemetry-api
+opentelemetry-instrumentation
+opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp-proto-http
+arize-phoenix
+arize-phoenix[evals]
+tiktoken
+openinference-instrumentation-langchain
+nest-asyncio 
+langchain
+langchain_community
+opentelemetry-sdk
+opentelemetry-exporter-otlp
+openai>=1
+langchain-openai
+httpx<0.28
diff --git a/dockers/llm.rag.service/serveragllm.py b/dockers/llm.rag.service/serveragllm.py
@@ -7,9 +7,13 @@
 from botocore.exceptions import ClientError, NoCredentialsError
 from fastapi import FastAPI
 from openai import OpenAI
-
 from common import get_answer_with_settings
 
+import phoenix as px
+from phoenix.otel import register
+from phoenix.session.evaluation import get_qa_with_reference, get_retrieved_documents
+from openinference.instrumentation.langchain import LangChainInstrumentor
+
 ########
 # Setup model name and query template parameters
 MICROSOFT_MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
@@ -194,6 +198,20 @@ def get_answer(question: Union[str, None]):
 retriever = vectorstore.as_retriever(search_kwargs={"k": relevant_docs})
 print("Created Vector DB retriever successfully. \n")
 
+# Setup Phoenix
+phoenix_svc_url = "http://phoenix-svc.phoenix.svc.cluster.local:6006"
+
+print("Setting up Phoenix (LLM ops tool) tracer \n")
+tracer_provider = register(
+    project_name="default",
+    endpoint="http://localhost:6006/v1/traces",
+)
+LangChainInstrumentor(tracer_provider=tracer_provider).instrument(skip_dep_check=True)
+
+print("Setting up Phoenix's configuration: \n")
+queries_df = get_qa_with_reference(px.Client(endpoint=phoenix_svc_url))
+retrieved_documents_df = get_retrieved_documents(px.Client(endpoint=phoenix_svc_url)) 
+
 # Uncomment to run a local test
 # print("Testing with a sample question:")
 # get_answer("who are you?")

diff --git a/dockers/llm.rag.service/serverragllm_jira_cvs_local.py b/dockers/llm.rag.service/serverragllm_jira_cvs_local.py
@@ -10,23 +10,29 @@
 # ]
 # ///
 
+import json
 import os
 import pickle
 import sys
 import uvicorn
+import click
 
+from fastapi import FastAPI
 from functools import partial
 from typing import Union
 
-import click
-from fastapi import FastAPI
 from openai import OpenAI
+#from langchain_openai import OpenAI
 
+from openinference.instrumentation.langchain import LangChainInstrumentor
 from common import get_answer_with_settings
 
+import phoenix as px
+from phoenix.otel import register
+from phoenix.session.evaluation import get_qa_with_reference, get_retrieved_documents
 
 def setup(
-        file_path: str,
+        vectore_store_path: str,
         relevant_docs: int,
         llm_server_url:str,
         model_id: str,
@@ -36,7 +42,7 @@ def setup(
     app = FastAPI()
 
     # Load the object from the pickle file
-    with open(file_path, "rb") as file:
+    with open(vectore_store_path, "rb") as file:
         print("Loading Vector DB...\n")
         vectorstore = pickle.load(file)
 
@@ -61,6 +67,18 @@ def setup(
         model_temperature=model_temperature,
     )
 
+    print("Setting up Phoenix (LLM ops tool) tracer \n")
+    tracer_provider = register(
+        project_name="default",
+        endpoint="http://localhost:6006/v1/traces",
+    )
+
+    LangChainInstrumentor(tracer_provider=tracer_provider).instrument(skip_dep_check=True)
+
+    print("Setting up Phoenix's configuration: \n")
+    queries_df = get_qa_with_reference(px.Client())
+    retrieved_documents_df = get_retrieved_documents(px.Client()) 
+
     @app.get("/answer/{question}")
     def read_item(question: Union[str, None] = None):
         print(f"Received question: {question}")
@@ -69,26 +87,30 @@ def read_item(question: Union[str, None] = None):
 
     return app
 
-
+print("Setting up configuration for RAG LLM")
 MICROSOFT_MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
 MOSAICML_MODEL_ID = "mosaicml/mpt-7b-chat"
 RELEVANT_DOCS_DEFAULT = 2
 MAX_TOKENS_DEFAULT = 64
 MODEL_TEMPERATURE_DEFAULT = 0.01
 
+vectore_store_path = os.getenv("VECTOR_STORE_PATH")
+if not vectore_store_path:
+    print("Please provide the pickled vector store path via env var, VECTORE_STORE_PATH")
 
-file_path = os.getenv("FILE_PATH")
-if not file_path:
-    print("Please provide the pickeled vector store path")
-
-relevant_docs = os.getenv("RELEVANT_DOCS", RELEVANT_DOCS_DEFAULT)
+relevant_docs = int(os.getenv("RELEVANT_DOCS", RELEVANT_DOCS_DEFAULT))
 llm_server_url = os.getenv("LLM_SERVER_URL", "http://localhost:11434/v1")
 model_id = os.getenv("MODEL_ID", "llama2")
 max_tokens = int(os.getenv("MAX_TOKENS", MAX_TOKENS_DEFAULT))
 model_temperature = float(os.getenv("MODEL_TEMPERATURE", MODEL_TEMPERATURE_DEFAULT))
 
-app = setup(file_path, relevant_docs, llm_server_url, model_id, max_tokens, model_temperature)
+# Uncomment the following 2 lines if you would like to bring
+# up a local Phoenix app
+#print("Starting LLM Ops tool, Phoenix locally")
+#session = px.launch_app()
 
+print("Setting up Fast API app \n")
+app = setup(vectore_store_path, relevant_docs, llm_server_url, model_id, max_tokens, model_temperature)
 
 @click.command()
 @click.option("--host", default="127.0.0.1", help="Host for the FastAPI server (default: 127.0.0.1)")
@@ -97,6 +119,5 @@ def run(host, port):
     # Serve the app using Uvicorn
     uvicorn.run("serverragllm_jira_cvs_local:app", host=host, port=port, reload=True)
 
-
 if __name__ == "__main__":
     run()