From cbab5fb8f8349f3d2a4267896498b722eb04f482 Mon Sep 17 00:00:00 2001 From: Lukas Kreussel <65088241+LLukas22@users.noreply.github.com> Date: Wed, 19 Jul 2023 16:15:22 +0200 Subject: [PATCH] Better HuggingFace integration --- .vscode/launch.json | 16 +++++++ examples/haystack_example.py | 30 ++++++++----- examples/langchain_example.py | 11 ++++- llm_rs/auto.py | 80 +++++++++++++++++++++++++---------- llm_rs/haystack/haystack.py | 2 +- 5 files changed, 103 insertions(+), 36 deletions(-) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..2b2502c --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": false + } + ] +} \ No newline at end of file diff --git a/examples/haystack_example.py b/examples/haystack_example.py index 445b2cd..9dffbf6 100644 --- a/examples/haystack_example.py +++ b/examples/haystack_example.py @@ -1,15 +1,23 @@ -from haystack.nodes import PromptNode, PromptModel +from haystack.nodes import PromptModel from llm_rs.haystack import RustformersInvocationLayer +from llm_rs import KnownModels,SessionConfig -model = PromptModel("rustformers/open-llama-ggml", - max_length=1024, - invocation_layer_class=RustformersInvocationLayer, - model_kwargs={"model_file":"open_llama_3b-q5_1-ggjt.bin"}) - -pn = PromptNode( - model, - max_length=1024 -) -pn("Life is",stream=True) +#Enable `use_gpu` to use GPU acceleration +session_config = SessionConfig(use_gpu=False) + +model = PromptModel("TheBloke/Llama-2-7B-Chat-GGML", + max_length=4096, + invocation_layer_class=RustformersInvocationLayer, + model_kwargs={ + "model_file":"llama-2-7b-chat.ggmlv3.q4_K_S.bin", + "session_config":session_config, + "verbose":True, + }) +prompt= """ +System: You are a helpful, respectful and honest assistant. +User: Tell me a Story about a Lama riding the crab named Ferris in about 1000 words. +Assistant: +""" +model.invoke(prompt=prompt,stream=True) \ No newline at end of file diff --git a/examples/langchain_example.py b/examples/langchain_example.py index cd08eff..c7ecf4c 100644 --- a/examples/langchain_example.py +++ b/examples/langchain_example.py @@ -1,4 +1,5 @@ from llm_rs.langchain import RustformersLLM +from llm_rs import KnownModels, SessionConfig from langchain import PromptTemplate from langchain.chains import LLMChain from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler @@ -11,8 +12,14 @@ prompt = PromptTemplate(input_variables=["instruction"],template=template,) -llm = RustformersLLM(model_path_or_repo_id="rustformers/mpt-7b-ggml",model_file="mpt-7b-instruct-q5_1-ggjt.bin",callbacks=[StreamingStdOutCallbackHandler()]) +llm = RustformersLLM(model_path_or_repo_id="TheBloke/Nous-Hermes-13B-GGML", + model_file="nous-hermes-13b.ggmlv3.q4_K_S.bin", + verbose=True, + model_type=KnownModels.Llama, + session_config=SessionConfig(use_gpu=True), + callbacks=[StreamingStdOutCallbackHandler()] +) chain = LLMChain(llm=llm, prompt=prompt) -chain.run("Write a short post congratulating rustformers on their new release of their langchain integration.") \ No newline at end of file +chain.run("Write me some Cypher Querry language examples for Neo4j. Try to use the example movie dataset. Give me 5 examples of how to create nodes and relationships and how to query them.") \ No newline at end of file diff --git a/llm_rs/auto.py b/llm_rs/auto.py index d1ca3f7..cb80a80 100644 --- a/llm_rs/auto.py +++ b/llm_rs/auto.py @@ -51,6 +51,18 @@ class KnownModels(Enum): KnownModels.Llama: Llama } + +_STRING_TO_KNOWN_MODEL_MAP = { + "gpt2": KnownModels.Gpt2, + "starcoder": KnownModels.Gpt2, + "gpt_neox": KnownModels.GptNeoX, + "dolly-v2": KnownModels.GptNeoX, + "llama": KnownModels.Llama, + "mpt": KnownModels.Mpt, + "gptj": KnownModels.GptJ, + "bloom": KnownModels.Bloom, +} + CURRENT_QUANTIZATION_VERSION = QuantizationVersions.V2 class PathType(Enum): @@ -131,17 +143,16 @@ def from_pretrained( **kwargs, ) -> 'AutoConfig': path_type = _get_path_type(model_path_or_repo_id) + path = pathlib.Path(model_path_or_repo_id) if path_type == PathType.UNKNOWN: raise ValueError( f"Model path '{model_path_or_repo_id}' doesn't exist.") elif path_type == PathType.FILE: - raise ValueError( - f"Model path '{model_path_or_repo_id}' is a file. " - "Please provide a directory or a repo id.") + path = path.resolve().parent auto_config = AutoConfig() if path_type == PathType.DIR: - cls._update_from_dir(str(model_path_or_repo_id), auto_config) + cls._update_from_dir(str(path), auto_config) elif path_type == PathType.REPO: cls._update_from_repo(str(model_path_or_repo_id), auto_config) @@ -169,12 +180,26 @@ def _update_from_file(cls, path: str, auto_config: 'AutoConfig') -> None: with open(path) as f: config = json.load(f) auto_config.model_type = config.get('model_type') - auto_config.repo_type = config.get('repo_type') + if 'repo_type' in config: + auto_config.repo_type = config.get('repo_type') + elif len(config) == 1: + auto_config.repo_type = "GGML" + else: + auto_config.repo_type = "HuggingFace" + + class AutoModel(): """ Utility to load models, without having to specify the model type. """ + + @classmethod + def has_metadata_file(cls,model_file:Union[str,os.PathLike])->bool: + path = pathlib.Path(model_file) + metadata_file = path.with_suffix(".meta") + return metadata_file.exists() + @classmethod def load_metadata(cls,model_file:Union[str,os.PathLike])->ModelMetadata: path = pathlib.Path(model_file) @@ -191,13 +216,21 @@ def load_metadata(cls,model_file:Union[str,os.PathLike])->ModelMetadata: return metadata @classmethod - def _infer_model_type(cls,model_file:Union[str,os.PathLike],known_model:Optional[KnownModels]=None)->Type[Model]: + def _infer_model_type(cls,model_file:Union[str,os.PathLike],known_model:Optional[KnownModels]=None,config:Optional[AutoConfig]=None)->Type[Model]: model_to_lookup = None if known_model: model_to_lookup = known_model - else: + elif cls.has_metadata_file(model_file): metadata = cls.load_metadata(model_file) model_to_lookup = metadata.model + elif config and config.model_type: + if config.model_type.lower() in _STRING_TO_KNOWN_MODEL_MAP: + model_to_lookup = _STRING_TO_KNOWN_MODEL_MAP[config.model_type.lower()] + else: + raise ValueError(f"Unknown model type '{config.model_type}' in config file '{model_file}'! Please specify the model type via `known_model`.") + else: + raise ValueError(f"Model file '{model_file}' does not have a metadata or config file and no model type was specified! Please specify the model type via `known_model`.") + if model_to_lookup in _KNOWN_MODELS_MAP: return _KNOWN_MODELS_MAP[model_to_lookup] @@ -206,7 +239,9 @@ def _infer_model_type(cls,model_file:Union[str,os.PathLike],known_model:Optional @classmethod - def from_file(cls, path:Union[str,os.PathLike], + def from_file(cls, + path:Union[str,os.PathLike], + config:Optional[AutoConfig], model_type: Optional[KnownModels] = None, session_config:SessionConfig=SessionConfig(), tokenizer_path_or_repo_id: Optional[Union[str,os.PathLike]]=None, @@ -225,7 +260,7 @@ def from_file(cls, path:Union[str,os.PathLike], if tokenizer is None or tokenizer == "": logging.warning(f"Model file '{path}' does not have a base_model specified in its metadata file but wants to use a huggingface-tokenizer! Please expilicitly specify a tokenizer via `tokenizer_path_or_repo_id` if you intend to use a huggingface-tokenizer.") - model = cls._infer_model_type(path,model_type) + model = cls._infer_model_type(path,model_type,config) return model(path,session_config,tokenizer_path_or_repo_id,lora_paths,verbose) @classmethod @@ -241,34 +276,35 @@ def from_pretrained(cls, default_quantization:QuantizationType=QuantizationType.Q4_0, default_container:ContainerType=ContainerType.GGJT)->Model: + try: + config = AutoConfig.from_pretrained( + model_path_or_repo_id, + ) + except ValueError: + logging.warning("Could not find config.json in repo, assuming GGML model...") + config = AutoConfig(repo_type="GGML") + + if model_file: + config.repo_type = "GGML" + path_type = _get_path_type(model_path_or_repo_id) if path_type == PathType.UNKNOWN: raise ValueError(f"Unknown path type for '{model_path_or_repo_id}'") elif path_type == PathType.FILE: - return cls.from_file(model_path_or_repo_id,model_type,session_config,tokenizer_path_or_repo_id,lora_paths,verbose,use_hf_tokenizer) + return cls.from_file(model_path_or_repo_id,config,model_type,session_config,tokenizer_path_or_repo_id,lora_paths,verbose,use_hf_tokenizer) else: if path_type == PathType.REPO: - - try: - config = AutoConfig.from_pretrained( - model_path_or_repo_id, - ) - except ValueError: - logging.warning("Could not find config.json in repo, assuming GGML model...") - config = AutoConfig(repo_type="GGML") - - if config.repo_type != "GGML": logging.warning("Found normal HuggingFace model, starting conversion...") return cls.from_transformer(model_path_or_repo_id, session_config, tokenizer_path_or_repo_id, lora_paths, verbose, use_hf_tokenizer,default_quantization, default_container) resolved_path = cls._find_model_path_from_repo(str(model_path_or_repo_id),model_file) - return cls.from_file(resolved_path,model_type,session_config,tokenizer_path_or_repo_id,lora_paths,verbose,use_hf_tokenizer) + return cls.from_file(resolved_path,config,model_type,session_config,tokenizer_path_or_repo_id,lora_paths,verbose,use_hf_tokenizer) elif path_type == PathType.DIR: resolved_path = cls._find_model_path_from_dir(str(model_path_or_repo_id),model_file) - return cls.from_file(resolved_path,model_type,session_config,tokenizer_path_or_repo_id,lora_paths,verbose,use_hf_tokenizer) + return cls.from_file(resolved_path,config,model_type,session_config,tokenizer_path_or_repo_id,lora_paths,verbose,use_hf_tokenizer) else: raise ValueError(f"Unknown path type '{path_type}'") diff --git a/llm_rs/haystack/haystack.py b/llm_rs/haystack/haystack.py index d371925..dfa51d8 100644 --- a/llm_rs/haystack/haystack.py +++ b/llm_rs/haystack/haystack.py @@ -3,7 +3,7 @@ from haystack.nodes.prompt.invocation_layer import DefaultTokenStreamingHandler except ImportError: raise ImportError( - 'To use the llm_rs.haystack module, please install llm-rs with the additional "haystack" dependencies via: pip install llm-rs[haystack]') + 'To use the llm_rs.haystack module, please install llm-rs with the additional "haystack" dependencies e.g. via: pip install llm-rs[haystack]') import os from typing import Dict, List, Union, Type, Optional