diff --git a/README.md b/README.md index 6bff98cb..f658e6ce 100644 --- a/README.md +++ b/README.md @@ -212,6 +212,9 @@ For **advanced installation instructions** or if you see weird errors during ins - Go to our official [Documentation](https://docs.unsloth.ai) for saving to GGUF, checkpointing, evaluation and more! - We support Huggingface's TRL, Trainer, Seq2SeqTrainer or even Pytorch code! - We're in 🤗Hugging Face's official docs! Check out the [SFT docs](https://huggingface.co/docs/trl/main/en/sft_trainer#accelerate-fine-tuning-2x-using-unsloth) and [DPO docs](https://huggingface.co/docs/trl/main/en/dpo_trainer#accelerate-dpo-fine-tuning-using-unsloth)! +- If you want to download models from the ModelScope community, please use an environment variable: `UNSLOTH_USE_MODELSCOPE=1`, and install the modelscope library by: `pip install modelscope -U`. + +> unsloth_cli.py also supports `UNSLOTH_USE_MODELSCOPE=1` to download models and datasets. please remember to use the model and dataset id in the ModelScope community. ```python from unsloth import FastLanguageModel diff --git a/unsloth-cli.py b/unsloth-cli.py index ddb0ac8b..b7613f92 100644 --- a/unsloth-cli.py +++ b/unsloth-cli.py @@ -30,11 +30,14 @@ """ import argparse +import os + def run(args): import torch from unsloth import FastLanguageModel from datasets import load_dataset + from transformers.utils import strtobool from trl import SFTTrainer from transformers import TrainingArguments from unsloth import is_bfloat16_supported @@ -86,8 +89,13 @@ def formatting_prompts_func(examples): texts.append(text) return {"text": texts} - # Load and format dataset - dataset = load_dataset(args.dataset, split="train") + use_modelscope = strtobool(os.environ.get('UNSLOTH_USE_MODELSCOPE', 'False')) + if use_modelscope: + from modelscope import MsDataset + dataset = MsDataset.load(args.dataset, split="train") + else: + # Load and format dataset + dataset = load_dataset(args.dataset, split="train") dataset = dataset.map(formatting_prompts_func, batched=True) print("Data is formatted and ready!") diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py index 9c5ea5ba..4281b4a9 100644 --- a/unsloth/models/loader.py +++ b/unsloth/models/loader.py @@ -31,6 +31,15 @@ pass from huggingface_hub import HfFileSystem +# [TODO] Move USE_MODELSCOPE to utils +USE_MODELSCOPE = os.environ.get("UNSLOTH_USE_MODELSCOPE", "0") == "1" +if USE_MODELSCOPE: + import importlib + if importlib.util.find_spec("modelscope") is None: + raise ImportError(f'You are using the modelscope hub, please install modelscope by `pip install modelscope -U`') + pass +pass + # https://github.com/huggingface/transformers/pull/26037 allows 4 bit loading! from unsloth_zoo.utils import Version transformers_version = Version(transformers_version) @@ -87,6 +96,11 @@ def from_pretrained( old_model_name = model_name model_name = get_model_name(model_name, load_in_4bit) + if USE_MODELSCOPE and not os.path.exists(model_name): + from modelscope import snapshot_download + model_name = snapshot_download(model_name) + pass + # First check if it's a normal model via AutoConfig from huggingface_hub.utils import disable_progress_bars, enable_progress_bars, are_progress_bars_disabled was_disabled = are_progress_bars_disabled() @@ -366,6 +380,11 @@ def from_pretrained( old_model_name = model_name model_name = get_model_name(model_name, load_in_4bit) + if USE_MODELSCOPE and not os.path.exists(model_name): + from modelscope import snapshot_download + model_name = snapshot_download(model_name) + pass + # First check if it's a normal model via AutoConfig from huggingface_hub.utils import disable_progress_bars, enable_progress_bars, are_progress_bars_disabled was_disabled = are_progress_bars_disabled()