diff --git a/Dockerfile.fireworks b/Dockerfile.fireworks new file mode 100644 index 000000000..9be6e0a44 --- /dev/null +++ b/Dockerfile.fireworks @@ -0,0 +1,54 @@ +FROM python:3.11.6-slim-bookworm as base + +# Install poetry +RUN pip install pipx +RUN python3 -m pipx ensurepath +RUN pipx install poetry==1.8.3 +ENV PATH="/root/.local/bin:$PATH" +ENV PATH=".venv/bin/:$PATH" + +RUN apt update && apt install -y \ + build-essential + +# https://python-poetry.org/docs/configuration/#virtualenvsin-project +ENV POETRY_VIRTUALENVS_IN_PROJECT=true + +FROM base as dependencies +WORKDIR /home/worker/app +COPY pyproject.toml poetry.lock ./ + +ARG POETRY_EXTRAS="ui llms-fireworks embeddings-fireworks vector-stores-qdrant embeddings-openai" +RUN poetry install --no-root --extras "${POETRY_EXTRAS}" + +FROM base as app +ENV PYTHONUNBUFFERED=1 +ENV PORT=8080 +ENV APP_ENV=prod +ENV PYTHONPATH="$PYTHONPATH:/home/worker/app/private_gpt/" +EXPOSE 8080 + +# Prepare a non-root user +# More info about how to configure UIDs and GIDs in Docker: +# https://github.com/systemd/systemd/blob/main/docs/UIDS-GIDS.md + +# Define the User ID (UID) for the non-root user +# UID 100 is chosen to avoid conflicts with existing system users +ARG UID=100 + +# Define the Group ID (GID) for the non-root user +# GID 65534 is often used for the 'nogroup' or 'nobody' group +ARG GID=65534 + +RUN adduser --system --gid ${GID} --uid ${UID} --home /home/worker worker +WORKDIR /home/worker/app + +RUN chown worker /home/worker/app +RUN mkdir local_data && chown worker local_data +RUN mkdir models && chown worker models +COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv +COPY --chown=worker private_gpt/ private_gpt +COPY --chown=worker *.yaml . +COPY --chown=worker scripts/ scripts + +USER worker +ENTRYPOINT python -m private_gpt diff --git a/docker-compose.yaml b/docker-compose.yaml index d81286d84..0dd79b886 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,5 +1,4 @@ services: - #----------------------------------- #---- Private-GPT services --------- #----------------------------------- @@ -7,7 +6,7 @@ services: # Private-GPT service for the Ollama CPU and GPU modes # This service builds from an external Dockerfile and runs the Ollama mode. private-gpt-ollama: - image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-ollama # x-release-please-version + image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-ollama # x-release-please-version user: root build: context: . @@ -93,7 +92,7 @@ services: ports: - "11434:11434" volumes: - - ./models:/root/.ollama + - ./local_data:/root/.ollama profiles: - "" - ollama-cpu @@ -114,3 +113,21 @@ services: capabilities: [gpu] profiles: - ollama-cuda + + # fireworks service + private-gpt-fireworks: + build: + context: . + dockerfile: Dockerfile.fireworks + volumes: + - ./local_data/:/home/worker/app/local_data + ports: + - "3001:8080" + environment: + PORT: 8080 + PGPT_PROFILES: fireworks + FIREWORKS_API_KEY: ${FIREWORKS_API_KEY} + env_file: + - .env + profiles: + - fireworks diff --git a/fern/docs/pages/installation/installation.mdx b/fern/docs/pages/installation/installation.mdx index e7f80c87d..59fcd81ba 100644 --- a/fern/docs/pages/installation/installation.mdx +++ b/fern/docs/pages/installation/installation.mdx @@ -3,45 +3,63 @@ It is important that you review the [Main Concepts](../concepts) section to unde ## Base requirements to run PrivateGPT ### 1. Clone the PrivateGPT Repository + Clone the repository and navigate to it: + ```bash git clone https://github.com/zylon-ai/private-gpt cd private-gpt ``` ### 2. Install Python 3.11 + If you do not have Python 3.11 installed, install it using a Python version manager like `pyenv`. Earlier Python versions are not supported. + #### macOS/Linux + Install and set Python 3.11 using [pyenv](https://github.com/pyenv/pyenv): + ```bash pyenv install 3.11 pyenv local 3.11 ``` + #### Windows + Install and set Python 3.11 using [pyenv-win](https://github.com/pyenv-win/pyenv-win): + ```bash pyenv install 3.11 pyenv local 3.11 ``` ### 3. Install `Poetry` + Install [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer) for dependency management: Follow the instructions on the official Poetry website to install it. -A bug exists in Poetry versions 1.7.0 and earlier. We strongly recommend upgrading to a tested version. -To upgrade Poetry to latest tested version, run `poetry self update 1.8.3` after installing it. + A bug exists in Poetry versions 1.7.0 and earlier. We strongly recommend + upgrading to a tested version. To upgrade Poetry to latest tested version, run + `poetry self update 1.8.3` after installing it. ### 4. Optional: Install `make` + To run various scripts, you need to install `make`. Follow the instructions for your operating system: + #### macOS + (Using Homebrew): + ```bash brew install make ``` + #### Windows + (Using Chocolatey): + ```bash choco install make ``` @@ -53,6 +71,7 @@ PrivateGPT allows customization of the setup, from fully local to cloud-based, b ```bash poetry install --extras " ..." ``` + Where `` can be any of the following options described below. ### Available Modules @@ -61,46 +80,49 @@ You need to choose one option per category (LLM, Embeddings, Vector Stores, UI). #### LLM -| **Option** | **Description** | **Extra** | -|--------------|------------------------------------------------------------------------|---------------------| -| **ollama** | Adds support for Ollama LLM, requires Ollama running locally | llms-ollama | -| llama-cpp | Adds support for local LLM using LlamaCPP | llms-llama-cpp | -| sagemaker | Adds support for Amazon Sagemaker LLM, requires Sagemaker endpoints | llms-sagemaker | -| openai | Adds support for OpenAI LLM, requires OpenAI API key | llms-openai | -| openailike | Adds support for 3rd party LLM providers compatible with OpenAI's API | llms-openai-like | -| azopenai | Adds support for Azure OpenAI LLM, requires Azure endpoints | llms-azopenai | -| gemini | Adds support for Gemini LLM, requires Gemini API key | llms-gemini | +| **Option** | **Description** | **Extra** | +| ---------- | --------------------------------------------------------------------- | ---------------- | +| **ollama** | Adds support for Ollama LLM, requires Ollama running locally | llms-ollama | +| llama-cpp | Adds support for local LLM using LlamaCPP | llms-llama-cpp | +| sagemaker | Adds support for Amazon Sagemaker LLM, requires Sagemaker endpoints | llms-sagemaker | +| openai | Adds support for OpenAI LLM, requires OpenAI API key | llms-openai | +| openailike | Adds support for 3rd party LLM providers compatible with OpenAI's API | llms-openai-like | +| azopenai | Adds support for Azure OpenAI LLM, requires Azure endpoints | llms-azopenai | +| gemini | Adds support for Gemini LLM, requires Gemini API key | llms-gemini | #### Embeddings -| **Option** | **Description** | **Extra** | -|------------------|--------------------------------------------------------------------------------|-------------------------| -| **ollama** | Adds support for Ollama Embeddings, requires Ollama running locally | embeddings-ollama | -| huggingface | Adds support for local Embeddings using HuggingFace | embeddings-huggingface | -| openai | Adds support for OpenAI Embeddings, requires OpenAI API key | embeddings-openai | -| sagemaker | Adds support for Amazon Sagemaker Embeddings, requires Sagemaker endpoints | embeddings-sagemaker | -| azopenai | Adds support for Azure OpenAI Embeddings, requires Azure endpoints | embeddings-azopenai | -| gemini | Adds support for Gemini Embeddings, requires Gemini API key | embeddings-gemini | +| **Option** | **Description** | **Extra** | +| ----------- | -------------------------------------------------------------------------- | ---------------------- | +| **ollama** | Adds support for Ollama Embeddings, requires Ollama running locally | embeddings-ollama | +| huggingface | Adds support for local Embeddings using HuggingFace | embeddings-huggingface | +| openai | Adds support for OpenAI Embeddings, requires OpenAI API key | embeddings-openai | +| sagemaker | Adds support for Amazon Sagemaker Embeddings, requires Sagemaker endpoints | embeddings-sagemaker | +| azopenai | Adds support for Azure OpenAI Embeddings, requires Azure endpoints | embeddings-azopenai | +| gemini | Adds support for Gemini Embeddings, requires Gemini API key | embeddings-gemini | #### Vector Stores -| **Option** | **Description** | **Extra** | -|------------------|-----------------------------------------|-------------------------| -| **qdrant** | Adds support for Qdrant vector store | vector-stores-qdrant | -| milvus | Adds support for Milvus vector store | vector-stores-milvus | -| chroma | Adds support for Chroma DB vector store | vector-stores-chroma | -| postgres | Adds support for Postgres vector store | vector-stores-postgres | -| clickhouse | Adds support for Clickhouse vector store| vector-stores-clickhouse| +| **Option** | **Description** | **Extra** | +| ---------- | ---------------------------------------- | ------------------------ | +| **qdrant** | Adds support for Qdrant vector store | vector-stores-qdrant | +| milvus | Adds support for Milvus vector store | vector-stores-milvus | +| chroma | Adds support for Chroma DB vector store | vector-stores-chroma | +| postgres | Adds support for Postgres vector store | vector-stores-postgres | +| clickhouse | Adds support for Clickhouse vector store | vector-stores-clickhouse | #### UI -| **Option** | **Description** | **Extra** | -|--------------|------------------------------------------|-----------| -| Gradio | Adds support for UI using Gradio | ui | +| **Option** | **Description** | **Extra** | +| ---------- | -------------------------------- | --------- | +| Gradio | Adds support for UI using Gradio | ui | - -A working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk -model download script, ingestion script, documents folder watch, etc. Please refer to the [UI alternatives](/manual/user-interface/alternatives) page for more UI alternatives. + + A working **Gradio UI client** is provided to test the API, together with a + set of useful tools such as bulk model download script, ingestion script, + documents folder watch, etc. Please refer to the [UI + alternatives](/manual/user-interface/alternatives) page for more UI + alternatives. ## Recommended Setups @@ -109,7 +131,7 @@ There are just some examples of recommended setups. You can mix and match the di You'll find more information in the Manual section of the documentation. > **Important for Windows**: In the examples below or how to run PrivateGPT with `make run`, `PGPT_PROFILES` env var is being set inline following Unix command line syntax (works on MacOS and Linux). -If you are using Windows, you'll need to set the env var in a different way, for example: +> If you are using Windows, you'll need to set the env var in a different way, for example: ```powershell # Powershell @@ -136,6 +158,7 @@ Go to [ollama.ai](https://ollama.ai/) and follow the instructions to install Oll After the installation, make sure the Ollama desktop app is closed. Now, start Ollama service (it will start a local inference server, serving both the LLM and the Embeddings): + ```bash ollama serve ``` @@ -152,6 +175,7 @@ ollama pull nomic-embed-text ``` Once done, on a different terminal, you can install PrivateGPT with the following command: + ```bash poetry install --extras "ui llms-ollama embeddings-ollama vector-stores-qdrant" ``` @@ -175,6 +199,7 @@ You need to have access to sagemaker inference endpoints for the LLM and / or th Edit the `settings-sagemaker.yaml` file to include the correct Sagemaker endpoints. Then, install PrivateGPT with the following command: + ```bash poetry install --extras "ui llms-sagemaker embeddings-sagemaker vector-stores-qdrant" ``` @@ -198,6 +223,7 @@ You need an OPENAI API key to run this setup. Edit the `settings-openai.yaml` file to include the correct API KEY. Never commit it! It's a secret! As an alternative to editing `settings-openai.yaml`, you can just set the env var OPENAI_API_KEY. Then, install PrivateGPT with the following command: + ```bash poetry install --extras "ui llms-openai embeddings-openai vector-stores-qdrant" ``` @@ -221,6 +247,7 @@ You need to have access to Azure OpenAI inference endpoints for the LLM and / or Edit the `settings-azopenai.yaml` file to include the correct Azure OpenAI endpoints. Then, install PrivateGPT with the following command: + ```bash poetry install --extras "ui llms-azopenai embeddings-azopenai vector-stores-qdrant" ``` @@ -235,6 +262,30 @@ PrivateGPT will use the already existing `settings-azopenai.yaml` settings file, The UI will be available at http://localhost:8001 +### Non-Private, FIREWORKS-powered test setup + +If you want to test PrivateGPT with FIREWORKS's LLM and Embeddings -taking into account your data is going to FIREWORKS!- you can run the following command: + +You need an FIREWORKS API key to run this setup. + +Edit the `settings-fireworks.yaml` file to include the correct API KEY. Never commit it! It's a secret! As an alternative to editing `settings-fireworks.yaml`, you can just set the env var FIREWORKS_API_KEY. + +Then, install PrivateGPT with the following command: + +```bash +poetry install --extras "ui llms-fireworks embeddings-fireworks vector-stores-qdrant embeddings-openai" +``` + +Once installed, you can run PrivateGPT. + +```bash +PGPT_PROFILES=fireworks make run +``` + +PrivateGPT will use the already existing `settings-fireworks.yaml` settings file, which is already configured to use FIREWORKS LLM and Embeddings endpoints, and Qdrant. + +The UI will be available at http://localhost:8001 + ### Local, Llama-CPP powered setup If you want to run PrivateGPT fully locally without relying on Ollama, you can run the following command: @@ -244,6 +295,7 @@ poetry install --extras "ui llms-llama-cpp embeddings-huggingface vector-stores- ``` In order for local LLM and embeddings to work, you need to download the models to the `models` folder. You can do so by running the `setup` script: + ```bash poetry run python scripts/setup ``` @@ -277,6 +329,7 @@ To do that, you need to install `llama.cpp` python's binding `llama-cpp-python` that activate `METAL`: you have to pass `-DLLAMA_METAL=on` to the CMake command tha `pip` runs for you (see below). In other words, one should simply run: + ```bash CMAKE_ARGS="-DLLAMA_METAL=on" pip install --force-reinstall --no-cache-dir llama-cpp-python ``` @@ -285,9 +338,10 @@ The above command will force the re-installation of `llama-cpp-python` with `MET `llama.cpp` locally with your `METAL` libraries (shipped by default with your macOS). More information is available in the documentation of the libraries themselves: -* [llama-cpp-python](https://github.com/abetlen/llama-cpp-python#installation-with-hardware-acceleration) -* [llama-cpp-python's documentation](https://llama-cpp-python.readthedocs.io/en/latest/#installation-with-hardware-acceleration) -* [llama.cpp](https://github.com/ggerganov/llama.cpp#build) + +- [llama-cpp-python](https://github.com/abetlen/llama-cpp-python#installation-with-hardware-acceleration) +- [llama-cpp-python's documentation](https://llama-cpp-python.readthedocs.io/en/latest/#installation-with-hardware-acceleration) +- [llama.cpp](https://github.com/ggerganov/llama.cpp#build) ##### Llama-CPP Windows NVIDIA GPU support @@ -297,11 +351,11 @@ dependencies. Some tips to get it working with an NVIDIA card and CUDA (Tested on Windows 10 with CUDA 11.5 RTX 3070): -* Install latest VS2022 (and build tools) https://visualstudio.microsoft.com/vs/community/ -* Install CUDA toolkit https://developer.nvidia.com/cuda-downloads -* Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to +- Install latest VS2022 (and build tools) https://visualstudio.microsoft.com/vs/community/ +- Install CUDA toolkit https://developer.nvidia.com/cuda-downloads +- Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to date and your GPU is detected. -* [Optional] Install CMake to troubleshoot building issues by compiling llama.cpp directly https://cmake.org/download/ +- [Optional] Install CMake to troubleshoot building issues by compiling llama.cpp directly https://cmake.org/download/ If you have all required dependencies properly configured running the following powershell command should succeed. @@ -332,9 +386,9 @@ dependencies. Some tips: -* Make sure you have an up-to-date C++ compiler -* Install CUDA toolkit https://developer.nvidia.com/cuda-downloads -* Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to +- Make sure you have an up-to-date C++ compiler +- Install CUDA toolkit https://developer.nvidia.com/cuda-downloads +- Verify your installation is correct by running `nvcc --version` and `nvidia-smi`, ensure your CUDA version is up to date and your GPU is detected. After that running the following command in the repository will install llama.cpp with GPU support: @@ -356,13 +410,17 @@ AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | Linux GPU support is done through ROCm. Some tips: -* Install ROCm from [quick-start install guide](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html) -* [Install PyTorch for ROCm](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/install/install-pytorch.html) + +- Install ROCm from [quick-start install guide](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html) +- [Install PyTorch for ROCm](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/install/install-pytorch.html) + ```bash wget https://repo.radeon.com/rocm/manylinux/rocm-rel-6.0/torch-2.1.1%2Brocm6.0-cp311-cp311-linux_x86_64.whl poetry run pip install --force-reinstall --no-cache-dir torch-2.1.1+rocm6.0-cp311-cp311-linux_x86_64.whl ``` -* Install bitsandbytes for ROCm + +- Install bitsandbytes for ROCm + ```bash PYTORCH_ROCM_ARCH=gfx900,gfx906,gfx908,gfx90a,gfx1030,gfx1100,gfx1101,gfx940,gfx941,gfx942 BITSANDBYTES_VERSION=62353b0200b8557026c176e74ac48b84b953a854 @@ -374,6 +432,7 @@ pip install . --extra-index-url https://download.pytorch.org/whl/nightly ``` After that running the following command in the repository will install llama.cpp with GPU support: + ```bash LLAMA_CPP_PYTHON_VERSION=0.2.56 DAMDGPU_TARGETS=gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942 @@ -391,15 +450,15 @@ AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = Execution of LLMs locally still has a lot of sharp edges, specially when running on non Linux platforms. You might encounter several issues: -* Performance: RAM or VRAM usage is very high, your computer might experience slowdowns or even crashes. -* GPU Virtualization on Windows and OSX: Simply not possible with docker desktop, you have to run the server directly on +- Performance: RAM or VRAM usage is very high, your computer might experience slowdowns or even crashes. +- GPU Virtualization on Windows and OSX: Simply not possible with docker desktop, you have to run the server directly on the host. -* Building errors: Some of PrivateGPT dependencies need to build native code, and they might fail on some platforms. +- Building errors: Some of PrivateGPT dependencies need to build native code, and they might fail on some platforms. Most likely you are missing some dev tools in your machine (updated C++ compiler, CUDA is not on PATH, etc.). If you encounter any of these issues, please open an issue and we'll try to help. One of the first reflex to adopt is: get more information. -If, during your installation, something does not go as planned, retry in *verbose* mode, and see what goes wrong. +If, during your installation, something does not go as planned, retry in _verbose_ mode, and see what goes wrong. For example, when installing packages with `pip install`, you can add the option `-vvv` to show the details of the installation. @@ -414,8 +473,8 @@ To install a C++ compiler on Windows 10/11, follow these steps: 1. Install Visual Studio 2022. 2. Make sure the following components are selected: - * Universal Windows Platform development - * C++ CMake tools for Windows + - Universal Windows Platform development + - C++ CMake tools for Windows 3. Download the MinGW installer from the [MinGW website](https://sourceforge.net/projects/mingw/). 4. Run the installer and select the `gcc` component. diff --git a/poetry.lock b/poetry.lock index c595edcb1..6dfdf3e86 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2685,6 +2685,21 @@ llama-index-core = ">=0.11.0,<0.12.0" llama-index-embeddings-openai = ">=0.2.3,<0.3.0" llama-index-llms-azure-openai = ">=0.2.0,<0.3.0" +[[package]] +name = "llama-index-embeddings-fireworks" +version = "0.2.0" +description = "llama-index embeddings fireworks integration" +optional = true +python-versions = "<3.12,>=3.8.1" +files = [ + {file = "llama_index_embeddings_fireworks-0.2.0-py3-none-any.whl", hash = "sha256:44958479691f55005bd3bbf773316c556e5b1428c6ec174a4f443016e79e48ea"}, + {file = "llama_index_embeddings_fireworks-0.2.0.tar.gz", hash = "sha256:0085a8fd5b4d4f71f797cfef11a85c4c3fbe763a3680edeae8f410184fa2d266"}, +] + +[package.dependencies] +llama-index-core = ">=0.11.0,<0.12.0" +llama-index-llms-openai = ">=0.2.0,<0.3.0" + [[package]] name = "llama-index-embeddings-gemini" version = "0.2.0" @@ -2778,6 +2793,21 @@ httpx = "*" llama-index-core = ">=0.11.0,<0.12.0" llama-index-llms-openai = ">=0.2.1,<0.3.0" +[[package]] +name = "llama-index-llms-fireworks" +version = "0.2.0" +description = "llama-index llms fireworks integration" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_llms_fireworks-0.2.0-py3-none-any.whl", hash = "sha256:65a604f8cf622f7ce695c458d375cd7dac6e27f4596ba90e5464b2594b0688a0"}, + {file = "llama_index_llms_fireworks-0.2.0.tar.gz", hash = "sha256:cfdd07b6bc01890e55a4dfc3af2e62fe82e5a08b362d52314d024728ebcf7c5b"}, +] + +[package.dependencies] +llama-index-core = ">=0.11.0,<0.12.0" +llama-index-llms-openai = ">=0.2.0,<0.3.0" + [[package]] name = "llama-index-llms-gemini" version = "0.3.5" @@ -6242,11 +6272,6 @@ files = [ {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, - {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"}, - {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"}, - {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"}, - {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"}, - {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"}, ] [package.dependencies] @@ -7082,6 +7107,7 @@ cffi = ["cffi (>=1.11)"] [extras] embeddings-azopenai = ["llama-index-embeddings-azure-openai"] +embeddings-fireworks = ["llama-index-embeddings-fireworks"] embeddings-gemini = ["llama-index-embeddings-gemini"] embeddings-huggingface = ["einops", "llama-index-embeddings-huggingface"] embeddings-mistral = ["llama-index-embeddings-mistralai"] @@ -7089,6 +7115,7 @@ embeddings-ollama = ["llama-index-embeddings-ollama"] embeddings-openai = ["llama-index-embeddings-openai"] embeddings-sagemaker = ["boto3"] llms-azopenai = ["llama-index-llms-azure-openai"] +llms-fireworks = ["llama-index-llms-fireworks"] llms-gemini = ["llama-index-llms-gemini"] llms-llama-cpp = ["llama-index-llms-llama-cpp"] llms-ollama = ["llama-index-llms-ollama"] @@ -7107,4 +7134,4 @@ vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "16e3be4521aa64c936ee8fb841655f15090b71cf8faaeed7e73a4bcdf3fbdea2" +content-hash = "f41ee2165df33fd6815114a9d6b01508e1e8726dd7a8baf99825514586f250f0" diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py index b55cef873..2ef3afed0 100644 --- a/private_gpt/components/embedding/embedding_component.py +++ b/private_gpt/components/embedding/embedding_component.py @@ -67,6 +67,24 @@ def __init__(self, settings: Settings) -> None: api_key=api_key, model=model, ) + case "fireworks": + try: + from llama_index.embeddings.fireworks import ( # type: ignore + FireworksEmbedding, + ) + except ImportError as e: + raise ImportError( + "FireworksEmbedding dependencies not found, install with `poetry install --extras embeddings-fireworks`" + ) from e + + api_key = ( + settings.fireworks.embedding_api_key or settings.fireworks.api_key + ) + model = settings.openai.embedding_model + self.embedding_model = FireworksEmbedding( + api_key=api_key, + model=model, + ) case "ollama": try: from llama_index.embeddings.ollama import ( # type: ignore diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index eb752e547..492ec6845 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -102,6 +102,19 @@ def __init__(self, settings: Settings) -> None: api_key=openai_settings.api_key, model=openai_settings.model, ) + case "fireworks": + try: + from llama_index.llms.fireworks import Fireworks # type: ignore + except ImportError as e: + raise ImportError( + "fireworks dependencies not found, install with `poetry install --extras llms-fireworks`" + ) from e + + fireworks_settings = settings.fireworks + self.llm = Fireworks( + model=fireworks_settings.model, + api_key=fireworks_settings.api_key, + ) case "openailike": try: from llama_index.llms.openai_like import OpenAILike # type: ignore diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 0589f13a3..03be58aad 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -115,6 +115,7 @@ class LLMSettings(BaseModel): "mock", "ollama", "gemini", + "fireworks", ] max_new_tokens: int = Field( 256, @@ -205,6 +206,7 @@ class EmbeddingSettings(BaseModel): "mock", "gemini", "mistralai", + "fireworks", ] ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field( "simple", @@ -268,6 +270,23 @@ class OpenAISettings(BaseModel): ) +class FireWorksSettings(BaseModel): + api_key: str + model: str = Field( + "accounts/fireworks/models/llama-v3p1-70b-instruct", + description="FireWorks Model to use. Example: 'accounts/fireworks/models/llama-v3p1-70b-instruct'.", + ) + embedding_api_base: str = Field( + None, + description="Base URL of FIREWORKS API. Example: 'https://api.fireworks.ai/inference/v1'.", + ) + embedding_api_key: str + embedding_model: str = Field( + "nomic-ai/nomic-embed-text-v1.5", + description="FIREWORKS embedding Model to use. Example: 'nomic-ai/nomic-embed-text-v1.5'.", + ) + + class GeminiSettings(BaseModel): api_key: str model: str = Field( @@ -597,6 +616,7 @@ class Settings(BaseModel): huggingface: HuggingFaceSettings sagemaker: SagemakerSettings openai: OpenAISettings + fireworks: FireWorksSettings gemini: GeminiSettings ollama: OllamaSettings azopenai: AzureOpenAISettings diff --git a/private_gpt/ui/ui.py b/private_gpt/ui/ui.py index abdfb0c6d..2b3f793b0 100644 --- a/private_gpt/ui/ui.py +++ b/private_gpt/ui/ui.py @@ -381,7 +381,7 @@ def _build_ui_blocks(self) -> gr.Blocks: ".contain { display: flex !important; flex-direction: column !important; }" "#component-0, #component-3, #component-10, #component-8 { height: 100% !important; }" "#chatbot { flex-grow: 1 !important; overflow: auto !important;}" - "#col { height: calc(100vh - 112px - 16px) !important; }" + "#col { min-height: calc(100vh - 112px - 16px) !important; }" "hr { margin-top: 1em; margin-bottom: 1em; border: 0; border-top: 1px solid #FFF; }" ".avatar-image { background-color: antiquewhite; border-radius: 2px; }" ".footer { text-align: center; margin-top: 20px; font-size: 14px; display: flex; align-items: center; justify-content: center; }" @@ -522,6 +522,7 @@ def get_model_label() -> str | None: model_mapping = { "llamacpp": config_settings.llamacpp.llm_hf_model_file, "openai": config_settings.openai.model, + "fireworks": config_settings.fireworks.model, "openailike": config_settings.openai.model, "azopenai": config_settings.azopenai.llm_model, "sagemaker": config_settings.sagemaker.llm_endpoint_name, diff --git a/pyproject.toml b/pyproject.toml index ca355c16b..c0818e261 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,8 @@ llama-index-vector-stores-postgres = {version ="*", optional = true} llama-index-vector-stores-clickhouse = {version ="*", optional = true} llama-index-storage-docstore-postgres = {version ="*", optional = true} llama-index-storage-index-store-postgres = {version ="*", optional = true} +llama-index-llms-fireworks = {version = "*", optional = true} +llama-index-embeddings-fireworks = {version = "*", optional = true} # Postgres psycopg2-binary = {version ="^2.9.9", optional = true} asyncpg = {version="^0.29.0", optional = true} @@ -83,6 +85,8 @@ vector-stores-postgres = ["llama-index-vector-stores-postgres"] vector-stores-milvus = ["llama-index-vector-stores-milvus"] storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"] rerank-sentence-transformers = ["torch", "sentence-transformers"] +llms-fireworks = ["llama-index-llms-fireworks"] +embeddings-fireworks = ["llama-index-embeddings-fireworks"] [tool.poetry.group.dev.dependencies] black = "^24" @@ -194,4 +198,4 @@ asyncio_mode = "auto" testpaths = ["tests"] addopts = [ "--import-mode=importlib", -] +] \ No newline at end of file diff --git a/settings-fireworks.yaml b/settings-fireworks.yaml new file mode 100644 index 000000000..e789f2cf3 --- /dev/null +++ b/settings-fireworks.yaml @@ -0,0 +1,13 @@ +server: + env_name: ${APP_ENV:fireworks} + +llm: + mode: fireworks + +embedding: + mode: fireworks + +fireworks: + api_key: ${FIREWORKS_API_KEY:} + model: "accounts/fireworks/models/llama-v3p1-70b-instruct" + #poetry install --extras "ui llms-fireworks embeddings-fireworks vector-stores-qdrant embeddings-openai" diff --git a/settings.yaml b/settings.yaml index eda1af860..4dc5ab3b2 100644 --- a/settings.yaml +++ b/settings.yaml @@ -54,7 +54,7 @@ llm: context_window: 3900 # Select your tokenizer. Llama-index tokenizer is the default. # tokenizer: meta-llama/Meta-Llama-3.1-8B-Instruct - temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1) + temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1) rag: similarity_top_k: 2 @@ -70,19 +70,19 @@ summarize: use_async: true clickhouse: - host: localhost - port: 8443 - username: admin - password: clickhouse - database: embeddings + host: localhost + port: 8443 + username: admin + password: clickhouse + database: embeddings llamacpp: llm_hf_repo_id: lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF llm_hf_model_file: Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf - tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting - top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) - top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) - repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) + tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting + top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) + top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) + repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) embedding: # Should be matching the value above in most cases @@ -128,11 +128,16 @@ openai: model: gpt-3.5-turbo embedding_api_key: ${OPENAI_API_KEY:} +fireworks: + api_key: ${FIREWORKS_API_KEY:} + model: "accounts/fireworks/models/llama-v3p1-70b-instruct" + embedding_api_key: ${FIREWORKS_API_KEY:} + ollama: llm_model: llama3.1 embedding_model: nomic-embed-text api_base: http://localhost:11434 - embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama + embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama keep_alive: 5m request_timeout: 120.0 autopull_models: true