Add support OctoAI LLM and embeddings (#301)

* "fixed typo in dense.py docstring" * adding octoAI embeddings * added octoai system test * increased batch size * added information for OctoAI env vars * updated record_encoder batch size * support for OctoAI LLM adaptor * changed prefix after code review * added OctoAI to llm unit tests * fixed linting * fixing typos on function call * Added OctoAI keys, removed trailing white space
pinecone-io · Mar 26, 2024 · 73338bb · 73338bb
1 parent 7db7737
commit 73338bb
Show file tree

Hide file tree

Showing 9 changed files with 266 additions and 4 deletions.
diff --git a/.github/workflows/PR.yml b/.github/workflows/PR.yml
@@ -70,14 +70,15 @@ jobs:
  INDEX_NAME: system-${{ steps.gen_suffix.outputs.INDEX_NAME_SUFFIX }}
  PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+ OCTOAI_API_KEY: ${{ secrets.OCTOAI_API_KEY }}
  ANYSCALE_API_KEY: ${{ secrets.ANYSCALE_API_KEY }}
  CO_API_KEY: ${{ secrets.CO_API_KEY }}
  AZURE_DEPLOYMENT_NAME: ${{ secrets.AZURE_DEPLOYMENT_NAME }}
  AZURE_EMBEDDING_DEPLOYMENT_NAME: ${{ secrets.AZURE_EMBEDDING_DEPLOYMENT_NAME }}
  AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
  AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
  run: |
- run_id=$(uuidgen | tr -d '-' | tr '[:upper:]' '[:lower:]') 
+ run_id=$(uuidgen | tr -d '-' | tr '[:upper:]' '[:lower:]')
  echo "run_id=${run_id}" >> $GITHUB_OUTPUT
  echo "Test Run ID: ${run_id}"
  poetry run pytest -n 3 --dist loadscope --testrunuid $run_id --html=report_system.html --self-contained-html tests/system
@@ -88,6 +89,7 @@ jobs:
  INDEX_NAME: e2e-${{ steps.gen_suffix.outputs.INDEX_NAME_SUFFIX }}
  PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+ OCTOAI_API_KEY: ${{ secrets.OCTOAI_API_KEY }}
  ANYSCALE_API_KEY: ${{ secrets.ANYSCALE_API_KEY }}
  CO_API_KEY: ${{ secrets.CO_API_KEY }}
  CE_LOG_FILENAME: e2e.log

diff --git a/README.md b/README.md
@@ -109,6 +109,7 @@ These optional environment variables are used to authenticate to other supported
 | `JINA_API_KEY` | API key for Jina AI. Used to authenticate to JinaAI's services for embedding and chat API | You can find your OpenAI API key [here](https://platform.openai.com/account/api-keys). You might need to login or register to OpenAI services |
 | `AZURE_OPENAI_ENDOINT`| The URL of the Azure OpenAI endpoint you deployed. | You can find this in the Azure OpenAI portal under _Keys and Endpoints`|
 | `AZURE_OPENAI_API_KEY` | The API key to use for your Azure OpenAI models. | You can find this in the Azure OpenAI portal under _Keys and Endpoints`|
+| `OCTOAI_API_KEY` | API key for OctoAI. Used to authenticate for open source LLMs served in OctoAI | You can sign up for OctoAI and find your API key [here](https://octo.ai/)
 
 </details>
 
@@ -280,4 +281,3 @@ gunicorn canopy_server.app:app --worker-class uvicorn.workers.UvicornWorker --bi
 >  The server interacts with services like Pinecone and OpenAI using your own authentication credentials. 
  When deploying the server on a public web hosting provider, it is recommended to enable an authentication mechanism, 
  so that your server would only take requests from authenticated users.
-
diff --git a/src/canopy/config_templates/octoai.yaml b/src/canopy/config_templates/octoai.yaml
@@ -0,0 +1,50 @@
+# ===========================================================
+# Configuration file for Canopy Server
+# ===========================================================
+tokenizer:
+ # -------------------------------------------------------------------------------------------
+ # Tokenizer configuration
+ # Use LLamaTokenizer from HuggingFace with the relevant OSS model (e.g. LLama2)
+ # -------------------------------------------------------------------------------------------
+ type: LlamaTokenizer # Options: [OpenAITokenizer, LlamaTokenizer]
+ params:
+ model_name: hf-internal-testing/llama-tokenizer
+
+chat_engine:
+ # -------------------------------------------------------------------------------------------
+ # Chat engine configuration
+ # Use OctoAI as the open source LLM provider
+ # You can find the list of supported LLMs at https://octo.ai/docs/text-gen-solution/rest-api
+ # -------------------------------------------------------------------------------------------
+ params:
+ max_prompt_tokens: 2048 # The maximum number of tokens to use for input prompt to the LLM.
+ llm: &llm
+ type: OctoAILLM
+ params:
+ model_name: mistral-7b-instruct-fp16 # The name of the model to use.
+
+ # query_builder:
+ # type: FunctionCallingQueryGenerator # Options: [FunctionCallingQueryGenerator, LastMessageQueryGenerator, InstructionQueryGenerator]
+ # llm: 
+ # type: OctoAILLM
+ # params:
+ # model_name: mistral-7b-instruct-fp16
+
+ context_engine:
+ # -------------------------------------------------------------------------------------------------------------
+ # ContextEngine configuration
+ # -------------------------------------------------------------------------------------------------------------
+ knowledge_base:
+ # -----------------------------------------------------------------------------------------------------------
+ # KnowledgeBase configuration
+ # -----------------------------------------------------------------------------------------------------------
+ record_encoder:
+ # --------------------------------------------------------------------------
+ # Configuration for the RecordEncoder subcomponent of the knowledge base.
+ # Use OctoAI's Embedding endpoint for dense encoding
+ # --------------------------------------------------------------------------
+ type: OctoAIRecordEncoder
+ params:
+ model_name: # The name of the model to use for encoding
+ thenlper/gte-large
+ batch_size: 2048 # The number of document chunks to encode in each call to the encoding model
diff --git a/src/canopy/knowledge_base/record_encoder/__init__.py b/src/canopy/knowledge_base/record_encoder/__init__.py
@@ -7,3 +7,4 @@
 from .jina import JinaRecordEncoder
 from .sentence_transformers import SentenceTransformerRecordEncoder
 from .hybrid import HybridRecordEncoder
+from .octoai import OctoAIRecordEncoder
diff --git a/src/canopy/knowledge_base/record_encoder/octoai.py b/src/canopy/knowledge_base/record_encoder/octoai.py
@@ -0,0 +1,68 @@
+import os
+from typing import List
+from pinecone_text.dense.openai_encoder import OpenAIEncoder
+from canopy.knowledge_base.models import KBDocChunk, KBEncodedDocChunk, KBQuery
+from canopy.knowledge_base.record_encoder.dense import DenseRecordEncoder
+from canopy.models.data_models import Query
+
+OCTOAI_BASE_URL = "https://text.octoai.run/v1"
+
+
+class OctoAIRecordEncoder(DenseRecordEncoder):
+ """
+ OctoAIRecordEncoder is a type of DenseRecordEncoder that uses the OpenAI `embeddings` API.
+ The implementation uses the `OpenAIEncoder` class from the `pinecone-text` library.
+ For more information about see: https://github.com/pinecone-io/pinecone-text
+
+ """ # noqa: E501
+ """
+ Initialize the OctoAIRecordEncoder
+
+ Args:
+ api_key: The OctoAI Endpoint API Key
+ base_url: The Base URL for the OctoAI Endpoint
+ model_name: The name of the OctoAI embeddings model to use for encoding. See https://octo.ai/docs/text-gen-solution/getting-started
+ batch_size: The number of documents or queries to encode at once.
+ Defaults to 1.
+ **kwargs: Additional arguments to pass to the underlying `pinecone-text. OpenAIEncoder`.
+ """ # noqa: E501
+ def __init__(self,
+ *,
+ api_key: str = "",
+ base_url: str = OCTOAI_BASE_URL,
+ model_name: str = "thenlper/gte-large",
+ batch_size: int = 1024,
+ **kwargs):
+
+ octoai_api_key = api_key or os.environ.get("OCTOAI_API_KEY")
+ if not octoai_api_key:
+ raise ValueError(
+ "An OctoAI API token is required to use OctoAI. "
+ "Please provide it as an argument "
+ "or set the OCTOAI_API_KEY environment variable."
+ )
+ octoai_base_url = base_url
+ encoder = OpenAIEncoder(model_name,
+ base_url=octoai_base_url, api_key=octoai_api_key,
+ **kwargs)
+ super().__init__(dense_encoder=encoder, batch_size=batch_size)
+
+ def encode_documents(self, documents: List[KBDocChunk]) -> List[KBEncodedDocChunk]:
+ """
+ Encode a list of documents, takes a list of KBDocChunk and returns a list of KBEncodedDocChunk.
+
+ Args:
+ documents: A list of KBDocChunk to encode.
+
+ Returns:
+ encoded chunks: A list of KBEncodedDocChunk, with the `values` field populated by the generated embeddings vector.
+ """ # noqa: E501
+ return super().encode_documents(documents)
+
+ async def _aencode_documents_batch(self,
+ documents: List[KBDocChunk]
+ ) -> List[KBEncodedDocChunk]:
+ raise NotImplementedError
+
+ async def _aencode_queries_batch(self, queries: List[Query]) -> List[KBQuery]:
+ raise NotImplementedError
diff --git a/src/canopy/llm/__init__.py b/src/canopy/llm/__init__.py
@@ -3,3 +3,4 @@
 from .anyscale import AnyscaleLLM
 from .azure_openai_llm import AzureOpenAILLM
 from .cohere import CohereLLM
+from .octoai import OctoAILLM
diff --git a/src/canopy/llm/octoai.py b/src/canopy/llm/octoai.py
@@ -0,0 +1,61 @@
+from typing import Optional, Any
+import os
+from canopy.llm import OpenAILLM
+from canopy.llm.models import Function
+from canopy.models.data_models import Messages
+
+OCTOAI_BASE_URL = "https://text.octoai.run/v1"
+
+
+class OctoAILLM(OpenAILLM):
+ """
+ OctoAI LLM wrapper built on top of the OpenAI Python client.
+
+ Note: OctoAI requires a valid API key to use this class.
+ You can set the "OCTOAI_API_KEY" environment variable.
+ """
+
+ def __init__(
+ self,
+ model_name: str = "mistral-7b-instruct-fp16",
+ *,
+ base_url: Optional[str] = OCTOAI_BASE_URL,
+ api_key: Optional[str] = None,
+ **kwargs: Any,
+ ):
+ octoai_api_key = api_key or os.environ.get("OCTOAI_API_KEY")
+ if not octoai_api_key:
+ raise ValueError(
+ "OctoAI API key is required to use OctoAI. "
+ "If you haven't done it, please sign up at https://octo.ai \n"
+ "The key can be provided as an argument or "
+ "via the OCTOAI_API_KEY environment variable."
+ )
+ octoai_base_url = base_url
+ super().__init__(
+ model_name,
+ api_key=octoai_api_key,
+ base_url=octoai_base_url,
+ **kwargs
+ )
+
+ def enforced_function_call(
+ self,
+ system_prompt: str,
+ chat_history: Messages,
+ function: Function,
+ *,
+ max_tokens: Optional[int] = None,
+ model_params: Optional[dict] = None,
+ ) -> dict:
+ raise NotImplementedError("OctoAI doesn't support function calling.")
+
+ def aenforced_function_call(self,
+ system_prompt: str,
+ chat_history: Messages,
+ function: Function,
+ *,
+ max_tokens: Optional[int] = None,
+ model_params: Optional[dict] = None
+ ):
+ raise NotImplementedError("OctoAI doesn't support function calling.")
diff --git a/tests/system/llm/test_openai.py b/tests/system/llm/test_openai.py
@@ -4,7 +4,7 @@
 import jsonschema
 import pytest
 
-from canopy.llm import AzureOpenAILLM, AnyscaleLLM
+from canopy.llm import AzureOpenAILLM, AnyscaleLLM, OctoAILLM
 from canopy.models.data_models import Role, MessageBase, Context, StringContextContent # noqa
 from canopy.models.api_models import ChatResponse, StreamingChatChunk # noqa
 from canopy.llm.openai import OpenAILLM # noqa
@@ -60,7 +60,7 @@ def model_params_low_temperature():
  return {"temperature": 0.2, "top_p": 0.5, "n": 1}
 
 
-@pytest.fixture(params=[OpenAILLM, AzureOpenAILLM, AnyscaleLLM])
+@pytest.fixture(params=[OpenAILLM, AzureOpenAILLM, AnyscaleLLM, OctoAILLM])
 def openai_llm(request):
  llm_class = request.param
  if llm_class == AzureOpenAILLM:
@@ -73,6 +73,10 @@ def openai_llm(request):
  if os.getenv("ANYSCALE_API_KEY") is None:
  pytest.skip("Couldn't find Anyscale API key. Skipping Anyscale tests.")
  model_name = "mistralai/Mistral-7B-Instruct-v0.1"
+ elif llm_class == OctoAILLM:
+ if os.getenv("OCTOAI_API_KEY") is None:
+ pytest.skip("Couldn't find OctoAI API key. Skipping OctoAI tests.")
+ model_name = "mistral-7b-instruct"
  else:
  model_name = "gpt-3.5-turbo-0613"
 
@@ -121,6 +125,8 @@ def test_chat_completion_with_context(openai_llm, messages):
 def test_enforced_function_call(openai_llm,
  messages,
  function_query_knowledgebase):
+ if isinstance(openai_llm, OctoAILLM):
+ pytest.skip("OctoAI doesn't support function calling at the moment")
  result = openai_llm.enforced_function_call(
  system_prompt=SYSTEM_PROMPT,
  chat_history=messages,
@@ -134,11 +140,15 @@ def test_chat_completion_high_temperature(openai_llm,
  if isinstance(openai_llm, AnyscaleLLM):
  pytest.skip("Anyscale don't support n>1 for the moment.")
 
+ if isinstance(openai_llm, OctoAILLM):
+ pytest.skip("OctoAI doesn't support n>1 for the moment.")
+
  response = openai_llm.chat_completion(
  system_prompt=SYSTEM_PROMPT,
  chat_history=messages,
  model_params=model_params_high_temperature
  )
+
  assert_chat_completion(response,
  num_choices=model_params_high_temperature["n"])
 
@@ -160,6 +170,9 @@ def test_enforced_function_call_high_temperature(openai_llm,
  if isinstance(openai_llm, AnyscaleLLM):
  pytest.skip("Anyscale don't support n>1 for the moment.")
 
+ if isinstance(openai_llm, OctoAILLM):
+ pytest.skip("OctoAI doesn't support function calling at the moment")
+
  result = openai_llm.enforced_function_call(
  system_prompt=SYSTEM_PROMPT,
  chat_history=messages,
@@ -177,6 +190,9 @@ def test_enforced_function_call_low_temperature(openai_llm,
  if isinstance(openai_llm, AnyscaleLLM):
  model_params["top_p"] = 1.0
 
+ if isinstance(openai_llm, OctoAILLM):
+ pytest.skip("OctoAI doesn't support function calling at the moment")
+
  result = openai_llm.enforced_function_call(
  system_prompt=SYSTEM_PROMPT,
  chat_history=messages,
@@ -191,6 +207,8 @@ def test_chat_completion_with_model_name(openai_llm, messages):
  pytest.skip("In Azure the model name has to be a valid deployment")
  elif isinstance(openai_llm, AnyscaleLLM):
  new_model_name = "meta-llama/Llama-2-7b-chat-hf"
+ elif isinstance(openai_llm, OctoAILLM):
+ new_model_name = "codellama-7b-instruct"
  else:
  new_model_name = "gpt-3.5-turbo-1106"
 
@@ -248,6 +266,9 @@ def test_chat_complete_api_failure_populates(openai_llm,
 def test_enforce_function_api_failure_populates(openai_llm,
  messages,
  function_query_knowledgebase):
+ if isinstance(openai_llm, OctoAILLM):
+ pytest.skip("OctoAI doesn't support function calling at the moment")
+
  openai_llm._client = MagicMock()
  openai_llm._client.chat.completions.create.side_effect = Exception(
  "API call failed")
@@ -261,6 +282,9 @@ def test_enforce_function_api_failure_populates(openai_llm,
 def test_enforce_function_wrong_output_schema(openai_llm,
  messages,
  function_query_knowledgebase):
+ if isinstance(openai_llm, OctoAILLM):
+ pytest.skip("OctoAI doesn't support function calling at the moment")
+
  openai_llm._client = MagicMock()
  openai_llm._client.chat.completions.create.return_value = MagicMock(
  choices=[MagicMock(
@@ -302,6 +326,8 @@ def test_enforce_function_unsupported_model(openai_llm,
 def test_available_models(openai_llm):
  if isinstance(openai_llm, AzureOpenAILLM):
  pytest.skip("Azure does not support listing models")
+ if isinstance(openai_llm, OctoAILLM):
+ pytest.skip("OctoAI does not support listing models")
  models = openai_llm.available_models
  assert isinstance(models, list)
  assert len(models) > 0

diff --git a/tests/system/record_encoder/test_octoai_record_encoder.py b/tests/system/record_encoder/test_octoai_record_encoder.py
@@ -0,0 +1,53 @@
+import pytest
+
+from canopy.knowledge_base.models import KBDocChunk
+from canopy.knowledge_base.record_encoder.octoai import OctoAIRecordEncoder
+from canopy.models.data_models import Query
+
+
+documents = [KBDocChunk(
+ id=f"doc_1_{i}",
+ text=f"Sample document {i}",
+ document_id=f"doc_{i}",
+ metadata={"test": i},
+ source="doc_1",
+ )
+ for i in range(4)
+ ]
+
+queries = [Query(text="Sample query 1"),
+ Query(text="Sample query 2"),
+ Query(text="Sample query 3"),
+ Query(text="Sample query 4")]
+
+
+@pytest.fixture
+def encoder():
+ return OctoAIRecordEncoder(batch_size=2)
+
+
+def test_dimension(encoder):
+ assert encoder.dimension == 1024
+
+
+@pytest.mark.parametrize("items,function",
+ [(documents, "encode_documents"),
+ (queries, "encode_queries"),
+ ([], "encode_documents"),
+ ([], "encode_queries")])
+def test_encode_documents(encoder, items, function):
+
+ encoded_documents = getattr(encoder, function)(items)
+
+ assert len(encoded_documents) == len(items)
+ assert all(len(encoded.values) == encoder.dimension
+ for encoded in encoded_documents)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("items,function",
+ [("aencode_documents", documents),
+ ("aencode_queries", queries)])
+async def test_aencode_not_implemented(encoder, function, items):
+ with pytest.raises(NotImplementedError):
+ await encoder.aencode_queries(items)