Skip to content

Commit

Permalink
Add LLM observability tool Phoenix to QA in a box stack
Browse files Browse the repository at this point in the history
- Includes doc to run Phoenix locally
- Includes doc to run Phoenix in K8s
  • Loading branch information
selvik committed Jan 6, 2025
1 parent f198b47 commit 11be6f1
Show file tree
Hide file tree
Showing 13 changed files with 449 additions and 37 deletions.
4 changes: 2 additions & 2 deletions demo/llm.rag.service/chat-serveragllmpluslb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ spec:
model: serveragllm
elotl-luna: "true"
annotations:
node.elotl.co/instance-type-regexp: "^(t3\.xlarge|n2-standard-4)$"
node.elotl.co/instance-type-regexp: "^(t3.xlarge|n2-standard-4)$"
spec:
containers:
- name: serveragllm
image: elotl/serveragllm:v1.2.1
image: elotl/serveragllm:v1.3
imagePullPolicy: Always
ports:
- containerPort: 8000
Expand Down
2 changes: 1 addition & 1 deletion demo/llm.vdb.service/createvdb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ metadata:
app: modeldataingest
elotl-luna: "true"
annotations:
node.elotl.co/instance-type-regexp: "^(t3\.xlarge|n2-standard-4)$"
node.elotl.co/instance-type-regexp: "^(t3.xlarge|n2-standard-4)$"
spec:
ttlSecondsAfterFinished: 120
template:
Expand Down
76 changes: 76 additions & 0 deletions dockers/llm.phoenix.service/phoenix.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# phoenix.yaml
# Source: https://docs.arize.com/phoenix/deployment/kubernetes
apiVersion: v1
kind: Namespace
metadata:
labels:
name: phoenix
name: phoenix
---
apiVersion: v1
kind: Service
metadata:
labels:
app: phoenix
name: phoenix
namespace: phoenix
spec:
ports:
- port: 6006
protocol: TCP
targetPort: 6006
selector:
app: phoenix
type: ClusterIP
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: phoenix
namespace: phoenix
spec:
replicas: 1
selector:
matchLabels:
app: phoenix
template:
metadata:
# Assume k8s pod service discovery for prometheus
annotations:
prometheus.io/path: /metrics
prometheus.io/port: "9090"
prometheus.io/scrape: "true"
labels:
app: phoenix
spec:
containers:
- name: phoenix
args:
- -m
- phoenix.server.main
- serve
command:
- python
env:
- name: PHOENIX_WORKING_DIR
value: /mnt/data
- name: PHOENIX_PORT
value: "6006"
# The version of phoenix you want should be used here
image: docker.io/arizephoenix/phoenix:latest
ports:
- containerPort: 6006
- containerPort: 4317
- containerPort: 9090
volumeMounts:
- mountPath: /mnt/data
name: phoenix
volumeClaimTemplates:
- metadata:
name: phoenix
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 8Gi
80 changes: 66 additions & 14 deletions dockers/llm.rag.service/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,32 +1,84 @@
# syntax=docker/dockerfile-upstream:master
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
FROM python:3.11-slim as base-container
FROM python:3.9-slim AS base-container

# Automatically set by buildx
ARG TARGETPLATFORM

ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV PIP_NO_CACHE_DIR=1

# Install system dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
ccache \
curl \
libssl-dev ca-certificates make \
git python3-pip && \
rm -rf /var/lib/apt/lists/*
build-essential \
ca-certificates \
ccache \
curl \
libssl-dev \
make \
git \
python3-pip \
python3-dev \
cmake \
pkg-config \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /serveragllm

# Upgrade pip
RUN pip3 install --no-cache-dir --upgrade pip setuptools wheel

# Install dependencies in separate layers
RUN pip3 install --no-cache-dir \
"openai" \
"fastapi" \
"uvicorn[standard]" \
"boto3"

RUN pip3 install --no-cache-dir \
"langchain" \
"langchain_community" \
"langchain_huggingface"

RUN pip3 install --no-cache-dir \
"sentence-transformers" \
"unstructured" \
"faiss-cpu"

RUN pip3 install --no-cache-dir \
"arize-phoenix" \
"openinference-instrumentation-langchain"

RUN pip3 install --no-cache-dir \
"arize-phoenix[evals]" \
"tiktoken" \
"nest-asyncio"

RUN pip3 install --no-cache-dir \
"httpx<0.28"

RUN pip3 install --no-cache-dir \
"langchain-openai"

RUN pip3 install --no-cache-dir \
"langchain-huggingface" \
"opentelemetry-api" \
"opentelemetry-instrumentation" \
"opentelemetry-semantic-conventions" \
"opentelemetry-exporter-otlp-proto-http" \
"opentelemetry-sdk" \
"opentelemetry-exporter-otlp" \
"openai>=1"

# Copy application files
COPY pyproject.toml .
COPY __init__.py .
COPY common.py .
COPY serveragllm.py .
COPY serverragllm_jira_cvs_local.py .
COPY pyproject.toml .

RUN --mount=type=cache,target=/root/.cache/pip \
pip3 install -v --no-cache-dir \
"openai" "langchain" "langchain_community" "langchain_huggingface" "unstructured" "sentence-transformers" "faiss-cpu" "uvicorn[standard]" "fastapi" "boto3" && \
pip3 install --no-cache-dir -e .
# Install the local package
RUN pip3 install -e .

EXPOSE 8000

Expand Down
19 changes: 19 additions & 0 deletions dockers/llm.rag.service/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,22 @@ name = "ragllm" # Required
version = "1.0.0" # Required
description = "Web service question/answer RAG-enhanced LLM model"
requires-python = ">=3.8"
dependencies = [
"openai",
"langchain",
"langchain_community",
"langchain_huggingface",
"unstructured",
"sentence-transformers",
"faiss-cpu",
"uvicorn[standard]",
"fastapi",
"boto3",
"arize-phoenix",
"openinference-instrumentation-langchain",
]
[tool.setuptools]
py-modules = [
"serveragllm",
"serverragllm_jira_cvs_local"
]
20 changes: 20 additions & 0 deletions dockers/llm.rag.service/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
faiss-cpu
fastapi
langchain-huggingface
uvicorn
opentelemetry-api
opentelemetry-instrumentation
opentelemetry-semantic-conventions
opentelemetry-exporter-otlp-proto-http
arize-phoenix
arize-phoenix[evals]
tiktoken
openinference-instrumentation-langchain
nest-asyncio
langchain
langchain_community
opentelemetry-sdk
opentelemetry-exporter-otlp
openai>=1
langchain-openai
httpx<0.28
20 changes: 19 additions & 1 deletion dockers/llm.rag.service/serveragllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@
from botocore.exceptions import ClientError, NoCredentialsError
from fastapi import FastAPI
from openai import OpenAI

from common import get_answer_with_settings

import phoenix as px
from phoenix.otel import register
from phoenix.session.evaluation import get_qa_with_reference, get_retrieved_documents
from openinference.instrumentation.langchain import LangChainInstrumentor

########
# Setup model name and query template parameters
MICROSOFT_MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
Expand Down Expand Up @@ -194,6 +198,20 @@ def get_answer(question: Union[str, None]):
retriever = vectorstore.as_retriever(search_kwargs={"k": relevant_docs})
print("Created Vector DB retriever successfully. \n")

# Setup Phoenix
phoenix_svc_url = "http://phoenix-svc.phoenix.svc.cluster.local:6006"

print("Setting up Phoenix (LLM ops tool) tracer \n")
tracer_provider = register(
project_name="default",
endpoint="http://localhost:6006/v1/traces",
)
LangChainInstrumentor(tracer_provider=tracer_provider).instrument(skip_dep_check=True)

print("Setting up Phoenix's configuration: \n")
queries_df = get_qa_with_reference(px.Client(endpoint=phoenix_svc_url))
retrieved_documents_df = get_retrieved_documents(px.Client(endpoint=phoenix_svc_url))

# Uncomment to run a local test
# print("Testing with a sample question:")
# get_answer("who are you?")
Expand Down
45 changes: 33 additions & 12 deletions dockers/llm.rag.service/serverragllm_jira_cvs_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,29 @@
# ]
# ///

import json
import os
import pickle
import sys
import uvicorn
import click

from fastapi import FastAPI
from functools import partial
from typing import Union

import click
from fastapi import FastAPI
from openai import OpenAI
#from langchain_openai import OpenAI

from openinference.instrumentation.langchain import LangChainInstrumentor
from common import get_answer_with_settings

import phoenix as px
from phoenix.otel import register
from phoenix.session.evaluation import get_qa_with_reference, get_retrieved_documents

def setup(
file_path: str,
vectore_store_path: str,
relevant_docs: int,
llm_server_url:str,
model_id: str,
Expand All @@ -36,7 +42,7 @@ def setup(
app = FastAPI()

# Load the object from the pickle file
with open(file_path, "rb") as file:
with open(vectore_store_path, "rb") as file:
print("Loading Vector DB...\n")
vectorstore = pickle.load(file)

Expand All @@ -61,6 +67,18 @@ def setup(
model_temperature=model_temperature,
)

print("Setting up Phoenix (LLM ops tool) tracer \n")
tracer_provider = register(
project_name="default",
endpoint="http://localhost:6006/v1/traces",
)

LangChainInstrumentor(tracer_provider=tracer_provider).instrument(skip_dep_check=True)

print("Setting up Phoenix's configuration: \n")
queries_df = get_qa_with_reference(px.Client())
retrieved_documents_df = get_retrieved_documents(px.Client())

@app.get("/answer/{question}")
def read_item(question: Union[str, None] = None):
print(f"Received question: {question}")
Expand All @@ -69,26 +87,30 @@ def read_item(question: Union[str, None] = None):

return app


print("Setting up configuration for RAG LLM")
MICROSOFT_MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
MOSAICML_MODEL_ID = "mosaicml/mpt-7b-chat"
RELEVANT_DOCS_DEFAULT = 2
MAX_TOKENS_DEFAULT = 64
MODEL_TEMPERATURE_DEFAULT = 0.01

vectore_store_path = os.getenv("VECTOR_STORE_PATH")
if not vectore_store_path:
print("Please provide the pickled vector store path via env var, VECTORE_STORE_PATH")

file_path = os.getenv("FILE_PATH")
if not file_path:
print("Please provide the pickeled vector store path")

relevant_docs = os.getenv("RELEVANT_DOCS", RELEVANT_DOCS_DEFAULT)
relevant_docs = int(os.getenv("RELEVANT_DOCS", RELEVANT_DOCS_DEFAULT))
llm_server_url = os.getenv("LLM_SERVER_URL", "http://localhost:11434/v1")
model_id = os.getenv("MODEL_ID", "llama2")
max_tokens = int(os.getenv("MAX_TOKENS", MAX_TOKENS_DEFAULT))
model_temperature = float(os.getenv("MODEL_TEMPERATURE", MODEL_TEMPERATURE_DEFAULT))

app = setup(file_path, relevant_docs, llm_server_url, model_id, max_tokens, model_temperature)
# Uncomment the following 2 lines if you would like to bring
# up a local Phoenix app
#print("Starting LLM Ops tool, Phoenix locally")
#session = px.launch_app()

print("Setting up Fast API app \n")
app = setup(vectore_store_path, relevant_docs, llm_server_url, model_id, max_tokens, model_temperature)

@click.command()
@click.option("--host", default="127.0.0.1", help="Host for the FastAPI server (default: 127.0.0.1)")
Expand All @@ -97,6 +119,5 @@ def run(host, port):
# Serve the app using Uvicorn
uvicorn.run("serverragllm_jira_cvs_local:app", host=host, port=port, reload=True)


if __name__ == "__main__":
run()
Loading

0 comments on commit 11be6f1

Please sign in to comment.