-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
116 lines (78 loc) · 3.31 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
from datetime import datetime
import fitz # PyMuPDF
import dspy
from indexify import IndexifyClient,ExtractionGraph
from indexify_dspy import retriever
client = IndexifyClient()
lm = dspy.OllamaLocal(model="llama3",timeout_s = 180)
from langchain.document_loaders import PyPDFLoader
index_name = "testdb.embeddings.embedding"
indexify_retriever_model = retriever.IndexifyRM(index_name,client)
dspy.settings.configure(lm=lm, rm=indexify_retriever_model)
def get_context(question):
retrieve = retriever.IndexifyRM(client)
context = retrieve(question, index_name, k=3).passages
return context
class GenerateAnswer(dspy.Signature):
"""Answer questions with factoid answers."""
context = dspy.InputField(desc="may contain relevant facts")
question = dspy.InputField()
answer = dspy.OutputField(desc="an explained answer")
class RAG(dspy.Module):
def __init__(self, num_passages=3):
super().__init__()
self.retrieve = dspy.Retrieve(k=num_passages)
self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
def forward(self, question):
context = get_context(question)
prediction = self.generate_answer(context=context, question=question)
return dspy.Prediction(context=context, answer=prediction.answer)
uncompiled_rag = RAG()
# Function to extract file names from uploaded files
def extract_file_names(uploaded_files):
file_names = []
for file in uploaded_files:
#doc = fitz.open(stream=file.read(), filetype="pdf")
#print(file)
file_names.append(file.name)
return file_names
def upload_data(doc_list):
for doc_name in doc_list:
pdf_loader = PyPDFLoader(file_path=doc_name)
documents = pdf_loader.load()
for doc in documents:
print(doc.page_content)
content_id = client.add_documents("testdb", doc.page_content)
client.wait_for_extraction(content_id)
# Function to generate bot response
def get_bot_response(user_input):
# Replace this with actual logic to generate responses (e.g., API call to a language model)
return f"Bot: {user_input}"
# Streamlit app layout
st.set_page_config(layout="wide")
# Left sidebar for PDF uploader
with st.sidebar:
st.title("Upload PDFs")
uploaded_files = st.file_uploader("Choose PDFs", accept_multiple_files=True, type=["pdf"])
# Upload button
if st.button("Upload"):
if uploaded_files:
file_names = extract_file_names(uploaded_files)
upload_data(file_names)
st.success("PDFs uploaded successfully!")
for uploaded_file in uploaded_files:
st.write(uploaded_file.name)
# Here you can add code to handle the uploaded PDF files
else:
st.error("Please select PDFs to upload.")
# Main chat interface
st.title("Indexify Chatbot")
# User input
user_input = st.text_input("You: ", "")
# If the user submits a message
if user_input and st.button("Submit"):
# Get bot response
bot_response = uncompiled_rag(user_input).answer
# Display bot response below the input box
st.text_area("Bot response:", bot_response, height=100)