-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy path.env.example
100 lines (83 loc) · 4.34 KB
/
.env.example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#-------------------------------------------------------------------------------
# LLM APIs settings
#-------------------------------------------------------------------------------
# NOTE:
# - WARC-GPT can use both OpenAI and Ollama at the same time, but needs at least one of the two.
# - Ollama is one of the simplest ways to get started running models locally: https://ollama.ai/
OLLAMA_API_URL="http://localhost:11434"
#OPENAI_API_KEY=""
#OPENAI_ORG_ID=""
# NOTE:
# OPENAI_BASE_URL can be used to interact with OpenAI-compatible providers.
# For example:
# - https://huggingface.co/blog/tgi-messages-api
# - https://docs.vllm.ai/en/latest/getting_started/quickstart.html#using-openai-completions-api-with-vllm
#
# Important:
# - Make sure to specify both OPENAI_BASE_URL and OPENAI_COMPATIBLE_MODEL when doing so.
# - These two values do not need to be set when using OpenAI as a provider.
#OPENAI_BASE_URL=""
#OPENAI_COMPATIBLE_MODEL=""
#-------------------------------------------------------------------------------
# Text Completion Prompts
#-------------------------------------------------------------------------------
# NOTE: {history} {rag} and {request} are reserved keywords.
TEXT_COMPLETION_BASE_PROMPT = "
{history}
You are a helpful assistant.
{rag}
Request: {request}
Helpful response (plain text, no markdown):
"
# NOTE: Injected into BASE prompt when relevant.
# Inspired by LangChain's default RAG prompt.
# {context} is a reserved keyword.
TEXT_COMPLETION_RAG_PROMPT = "
Here is context to help you fulfill the user's request:
{context}
----------------
Context comes from web pages that were captured as part of a web archives collection.
When possible, use context to answer the question asked by the user.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Ignore context if it is empty or irrelevant.
Cite and quote your sources whenever possible. Use their number (for example: [1]) and / or URL to reference them.
"
# NOTE: Injected into BASE prompt when relevant.
# NOTE: {history} is a reserved keyword
TEXT_COMPLETION_HISTORY_PROMPT = "
Here is a summary of the conversation thus far:
{history}
----------------
"
#-------------------------------------------------------------------------------
# Paths
#-------------------------------------------------------------------------------
WARC_FOLDER_PATH="./warc"
VISUALIZATIONS_FOLDER_PATH="./visualizations"
VECTOR_SEARCH_PATH="./chromadb"
#-------------------------------------------------------------------------------
# Vector Store / Search settings
#-------------------------------------------------------------------------------
VECTOR_SEARCH_COLLECTION_NAME="collection"
VECTOR_SEARCH_SENTENCE_TRANSFORMER_MODEL="intfloat/e5-large-v2" # Can be any Sentence-Transformers compatible model available on Hugging Face
VECTOR_SEARCH_SENTENCE_TRANSFORMER_DEVICE="cpu" # "mps" or "cuda" might help with performance, depending on available hardware. See: https://www.sbert.net/examples/applications/computing-embeddings/README.html#sentence_transformers.SentenceTransformer
VECTOR_SEARCH_DISTANCE_FUNCTION="cosine" # https://docs.trychroma.com/usage-guide#changing-the-distance-function
VECTOR_SEARCH_NORMALIZE_EMBEDDINGS="true"
VECTOR_SEARCH_CHUNK_PREFIX="passage: " # Can be used to add prefix to text embeddings stored in vector store
VECTOR_SEARCH_QUERY_PREFIX="query: " # Can be used to add prefix to text embeddings used for semantic search
VECTOR_SEARCH_TEXT_SPLITTER_CHUNK_OVERLAP=25 # Determines, for a given chunk of text, how many tokens must overlap with adjacent chunks.
VECTOR_SEARCH_SEARCH_N_RESULTS=4 # How many entries should the vector search return?
#-------------------------------------------------------------------------------
# Basic Rate Limiting
#-------------------------------------------------------------------------------
# NOTE:
# - This set of variables allows for applying rate-limiting to individual API routes.
# - See https://flask-limiter.readthedocs.io/en/stable/ for details and syntax.
RATE_LIMIT_STORAGE_URI="memory://"
API_MODELS_RATE_LIMIT="1/second"
API_SEARCH_RATE_LIMIT="120 per 1 hour"
API_COMPLETE_RATE_LIMIT="60 per 1 hour"
#-------------------------------------------------------------------------------
# Hugging Face's tokenizer settings
#-------------------------------------------------------------------------------
TOKENIZERS_PARALLELISM="false"