From cbab5fb8f8349f3d2a4267896498b722eb04f482 Mon Sep 17 00:00:00 2001
From: Lukas Kreussel <65088241+LLukas22@users.noreply.github.com>
Date: Wed, 19 Jul 2023 16:15:22 +0200
Subject: [PATCH] Better HuggingFace integration

---
 .vscode/launch.json           | 16 +++++++
 examples/haystack_example.py  | 30 ++++++++-----
 examples/langchain_example.py | 11 ++++-
 llm_rs/auto.py                | 80 +++++++++++++++++++++++++----------
 llm_rs/haystack/haystack.py   |  2 +-
 5 files changed, 103 insertions(+), 36 deletions(-)
 create mode 100644 .vscode/launch.json

diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..2b2502c
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,16 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "justMyCode": false
+        }
+    ]
+}
\ No newline at end of file
diff --git a/examples/haystack_example.py b/examples/haystack_example.py
index 445b2cd..9dffbf6 100644
--- a/examples/haystack_example.py
+++ b/examples/haystack_example.py
@@ -1,15 +1,23 @@
-from haystack.nodes import PromptNode, PromptModel
+from haystack.nodes import PromptModel
 from llm_rs.haystack import RustformersInvocationLayer
+from llm_rs import KnownModels,SessionConfig
 
-model = PromptModel("rustformers/open-llama-ggml",
-                    max_length=1024,
-                    invocation_layer_class=RustformersInvocationLayer,
-                    model_kwargs={"model_file":"open_llama_3b-q5_1-ggjt.bin"})
-
-pn = PromptNode(
-    model,
-    max_length=1024
-)
 
-pn("Life is",stream=True)
+#Enable `use_gpu` to use GPU acceleration
+session_config = SessionConfig(use_gpu=False)
+    
+model = PromptModel("TheBloke/Llama-2-7B-Chat-GGML",
+                    max_length=4096,
+                    invocation_layer_class=RustformersInvocationLayer,
+                    model_kwargs={
+                        "model_file":"llama-2-7b-chat.ggmlv3.q4_K_S.bin",
+                        "session_config":session_config,
+                        "verbose":True,
+                        })
 
+prompt= """
+System: You are a helpful, respectful and honest assistant.
+User: Tell me a Story about a Lama riding the crab named Ferris in about 1000 words.
+Assistant:
+"""
+model.invoke(prompt=prompt,stream=True)
\ No newline at end of file
diff --git a/examples/langchain_example.py b/examples/langchain_example.py
index cd08eff..c7ecf4c 100644
--- a/examples/langchain_example.py
+++ b/examples/langchain_example.py
@@ -1,4 +1,5 @@
 from llm_rs.langchain import RustformersLLM
+from llm_rs import KnownModels, SessionConfig
 from langchain import PromptTemplate
 from langchain.chains import LLMChain
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
@@ -11,8 +12,14 @@
 
 prompt = PromptTemplate(input_variables=["instruction"],template=template,)
 
-llm = RustformersLLM(model_path_or_repo_id="rustformers/mpt-7b-ggml",model_file="mpt-7b-instruct-q5_1-ggjt.bin",callbacks=[StreamingStdOutCallbackHandler()])
+llm = RustformersLLM(model_path_or_repo_id="TheBloke/Nous-Hermes-13B-GGML",
+                     model_file="nous-hermes-13b.ggmlv3.q4_K_S.bin",
+                     verbose=True,
+                     model_type=KnownModels.Llama,
+                     session_config=SessionConfig(use_gpu=True),
+                     callbacks=[StreamingStdOutCallbackHandler()]
+)
 
 chain = LLMChain(llm=llm, prompt=prompt)
 
-chain.run("Write a short post congratulating rustformers on their new release of their langchain integration.")
\ No newline at end of file
+chain.run("Write me some Cypher Querry language examples for Neo4j. Try to use the example movie dataset. Give me 5 examples of how to create nodes and relationships and how to query them.")
\ No newline at end of file
diff --git a/llm_rs/auto.py b/llm_rs/auto.py
index d1ca3f7..cb80a80 100644
--- a/llm_rs/auto.py
+++ b/llm_rs/auto.py
@@ -51,6 +51,18 @@ class KnownModels(Enum):
     KnownModels.Llama: Llama
 }
 
+
+_STRING_TO_KNOWN_MODEL_MAP = {
+    "gpt2": KnownModels.Gpt2,
+    "starcoder": KnownModels.Gpt2,
+    "gpt_neox": KnownModels.GptNeoX,
+    "dolly-v2": KnownModels.GptNeoX,
+    "llama": KnownModels.Llama,
+    "mpt": KnownModels.Mpt,
+    "gptj": KnownModels.GptJ,
+    "bloom": KnownModels.Bloom,
+}
+
 CURRENT_QUANTIZATION_VERSION = QuantizationVersions.V2
 
 class PathType(Enum):
@@ -131,17 +143,16 @@ def from_pretrained(
         **kwargs,
     ) -> 'AutoConfig':
         path_type = _get_path_type(model_path_or_repo_id)
+        path = pathlib.Path(model_path_or_repo_id)
         if path_type == PathType.UNKNOWN:
             raise ValueError(
                 f"Model path '{model_path_or_repo_id}' doesn't exist.")
         elif path_type == PathType.FILE:
-            raise ValueError(
-                f"Model path '{model_path_or_repo_id}' is a file. "
-                "Please provide a directory or a repo id.")
+            path = path.resolve().parent
         
         auto_config = AutoConfig()
         if path_type == PathType.DIR:
-            cls._update_from_dir(str(model_path_or_repo_id), auto_config)
+            cls._update_from_dir(str(path), auto_config)
         elif path_type == PathType.REPO:
             cls._update_from_repo(str(model_path_or_repo_id), auto_config)
 
@@ -169,12 +180,26 @@ def _update_from_file(cls, path: str, auto_config: 'AutoConfig') -> None:
         with open(path) as f:
             config = json.load(f)
         auto_config.model_type = config.get('model_type')
-        auto_config.repo_type = config.get('repo_type')
+        if 'repo_type' in config:
+            auto_config.repo_type = config.get('repo_type')
+        elif len(config) == 1:
+            auto_config.repo_type = "GGML"
+        else:
+            auto_config.repo_type = "HuggingFace"
+            
+        
 
 class AutoModel():
     """
     Utility to load models, without having to specify the model type.
     """
+
+    @classmethod
+    def has_metadata_file(cls,model_file:Union[str,os.PathLike])->bool:
+        path = pathlib.Path(model_file)
+        metadata_file = path.with_suffix(".meta")
+        return metadata_file.exists()
+
     @classmethod
     def load_metadata(cls,model_file:Union[str,os.PathLike])->ModelMetadata:
         path = pathlib.Path(model_file)
@@ -191,13 +216,21 @@ def load_metadata(cls,model_file:Union[str,os.PathLike])->ModelMetadata:
         return metadata
     
     @classmethod
-    def _infer_model_type(cls,model_file:Union[str,os.PathLike],known_model:Optional[KnownModels]=None)->Type[Model]:
+    def _infer_model_type(cls,model_file:Union[str,os.PathLike],known_model:Optional[KnownModels]=None,config:Optional[AutoConfig]=None)->Type[Model]:
         model_to_lookup = None
         if known_model:
             model_to_lookup = known_model
-        else:
+        elif cls.has_metadata_file(model_file):
             metadata = cls.load_metadata(model_file)
             model_to_lookup = metadata.model
+        elif config and config.model_type:
+            if config.model_type.lower() in _STRING_TO_KNOWN_MODEL_MAP:
+                model_to_lookup = _STRING_TO_KNOWN_MODEL_MAP[config.model_type.lower()]
+            else:
+                raise ValueError(f"Unknown model type '{config.model_type}' in config file '{model_file}'! Please specify the model type via `known_model`.")
+        else:
+            raise ValueError(f"Model file '{model_file}' does not have a metadata or config file and no model type was specified! Please specify the model type via `known_model`.")
+            
 
         if model_to_lookup in _KNOWN_MODELS_MAP:
             return _KNOWN_MODELS_MAP[model_to_lookup]
@@ -206,7 +239,9 @@ def _infer_model_type(cls,model_file:Union[str,os.PathLike],known_model:Optional
             
         
     @classmethod
-    def from_file(cls, path:Union[str,os.PathLike],
+    def from_file(cls, 
+                  path:Union[str,os.PathLike],
+                  config:Optional[AutoConfig],
                   model_type: Optional[KnownModels] = None,
                   session_config:SessionConfig=SessionConfig(),
                   tokenizer_path_or_repo_id: Optional[Union[str,os.PathLike]]=None,
@@ -225,7 +260,7 @@ def from_file(cls, path:Union[str,os.PathLike],
             if tokenizer is None or tokenizer == "":
                 logging.warning(f"Model file '{path}' does not have a base_model specified in its metadata file but wants to use a huggingface-tokenizer! Please expilicitly specify a tokenizer via `tokenizer_path_or_repo_id` if you intend to use a huggingface-tokenizer.")
 
-        model = cls._infer_model_type(path,model_type)
+        model = cls._infer_model_type(path,model_type,config)
         return model(path,session_config,tokenizer_path_or_repo_id,lora_paths,verbose)
     
     @classmethod
@@ -241,34 +276,35 @@ def from_pretrained(cls,
         default_quantization:QuantizationType=QuantizationType.Q4_0,
         default_container:ContainerType=ContainerType.GGJT)->Model:
 
+        try: 
+            config = AutoConfig.from_pretrained(
+                model_path_or_repo_id,
+            )
+        except ValueError:
+            logging.warning("Could not find config.json in repo, assuming GGML model...")
+            config = AutoConfig(repo_type="GGML")
+
+        if model_file:
+            config.repo_type = "GGML"
+        
         path_type = _get_path_type(model_path_or_repo_id)
 
         if path_type == PathType.UNKNOWN:
             raise ValueError(f"Unknown path type for '{model_path_or_repo_id}'")
         elif path_type == PathType.FILE:
-            return cls.from_file(model_path_or_repo_id,model_type,session_config,tokenizer_path_or_repo_id,lora_paths,verbose,use_hf_tokenizer)
+            return cls.from_file(model_path_or_repo_id,config,model_type,session_config,tokenizer_path_or_repo_id,lora_paths,verbose,use_hf_tokenizer)
         else:
             if path_type == PathType.REPO:
-
-                try: 
-                    config = AutoConfig.from_pretrained(
-                        model_path_or_repo_id,
-                    )
-                except ValueError:
-                    logging.warning("Could not find config.json in repo, assuming GGML model...")
-                    config = AutoConfig(repo_type="GGML")
-                       
-
                 if config.repo_type != "GGML":
                     logging.warning("Found normal HuggingFace model, starting conversion...")
                     return cls.from_transformer(model_path_or_repo_id, session_config, tokenizer_path_or_repo_id, lora_paths, verbose, use_hf_tokenizer,default_quantization, default_container)
             
                 resolved_path = cls._find_model_path_from_repo(str(model_path_or_repo_id),model_file)
-                return cls.from_file(resolved_path,model_type,session_config,tokenizer_path_or_repo_id,lora_paths,verbose,use_hf_tokenizer)
+                return cls.from_file(resolved_path,config,model_type,session_config,tokenizer_path_or_repo_id,lora_paths,verbose,use_hf_tokenizer)
             
             elif path_type == PathType.DIR:
                 resolved_path = cls._find_model_path_from_dir(str(model_path_or_repo_id),model_file)
-                return cls.from_file(resolved_path,model_type,session_config,tokenizer_path_or_repo_id,lora_paths,verbose,use_hf_tokenizer)
+                return cls.from_file(resolved_path,config,model_type,session_config,tokenizer_path_or_repo_id,lora_paths,verbose,use_hf_tokenizer)
             
             else:
                 raise ValueError(f"Unknown path type '{path_type}'")
diff --git a/llm_rs/haystack/haystack.py b/llm_rs/haystack/haystack.py
index d371925..dfa51d8 100644
--- a/llm_rs/haystack/haystack.py
+++ b/llm_rs/haystack/haystack.py
@@ -3,7 +3,7 @@
     from haystack.nodes.prompt.invocation_layer import DefaultTokenStreamingHandler
 except ImportError:
      raise ImportError(
-        'To use the llm_rs.haystack module, please install llm-rs with the additional "haystack" dependencies via: pip install llm-rs[haystack]')
+        'To use the llm_rs.haystack module, please install llm-rs with the additional "haystack" dependencies e.g. via: pip install llm-rs[haystack]')
 
 import os
 from typing import Dict, List, Union, Type, Optional