unslothai · Erland366 · Nov 18, 2024 · Nov 20, 2024 · Nov 20, 2024 · Nov 20, 2024
diff --git a/README.md b/README.md
@@ -41,15 +41,17 @@ All notebooks are **beginner friendly**! Add your dataset, click "Run All", and
 - Click [here](https://docs.unsloth.ai/) for detailed documentation for Unsloth.
 
 ## 🦥 Unsloth.ai News
+- 📣 NEW! [Llama 3.3 (70B)](https://huggingface.co/collections/unsloth/llama-33-all-versions-67535d7d994794b9d7cf5e9f), Meta's latest model is now supported.
+- 📣 NEW! We worked with Apple to add [Cut Cross Entropy](https://arxiv.org/abs/2411.09009). Unsloth now supports 89K context for Meta's Llama 3.3 (70B) on a 80GB GPU - 13x longer than HF+FA2. For Llama 3.1 (8B), Unsloth enables 342K context, surpassing its native 128K support.
 - 📣 NEW! Introducing Unsloth [Dynamic 4-bit Quantization](https://unsloth.ai/blog/dynamic-4bit)! We dynamically opt not to quantize certain parameters and this greatly increases accuracy while only using <10% more VRAM than BnB 4-bit. See our collection on [Hugging Face here.](https://huggingface.co/collections/unsloth/unsloth-4-bit-dynamic-quants-67503bb873f89e15276c44e7)
 - 📣 NEW! [Vision models](https://unsloth.ai/blog/vision) now supported! [Llama 3.2 Vision (11B)](https://colab.research.google.com/drive/1j0N4XTY1zXXy7mPAhOC1_gMYZ2F2EBlk?usp=sharing), [Qwen 2.5 VL (7B)](https://colab.research.google.com/drive/1whHb54GNZMrNxIsi2wm2EY_-Pvo2QyKh?usp=sharing) and [Pixtral (12B) 2409](https://colab.research.google.com/drive/1K9ZrdwvZRE96qGkCq_e88FgV3MLnymQq?usp=sharing)
 - 📣 NEW! Qwen-2.5 including [Coder](https://colab.research.google.com/drive/18sN803sU23XuJV9Q8On2xgqHSer6-UZF?usp=sharing) models are now supported with bugfixes. 14b fits in a Colab GPU! [Qwen 2.5 conversational notebook](https://colab.research.google.com/drive/1qN1CEalC70EO1wGKhNxs1go1W9So61R5?usp=sharing)
 - 📣 NEW! We found and helped fix a [gradient accumulation bug](https://unsloth.ai/blog/gradient)! Please update Unsloth and transformers.
-- 📣 NEW! [Mistral Small 22b notebook](https://colab.research.google.com/drive/1oCEHcED15DzL8xXGU1VTx5ZfOJM8WY01?usp=sharing) finetuning fits in under 16GB of VRAM!
 <details>
   <summary>Click for more news</summary>
-
+  
 - 📣 Try out [Chat interface](https://colab.research.google.com/drive/1i-8ESvtLRGNkkUQQr_-z_rcSAIo9c3lM?usp=sharing)!
+- 📣 NEW! [Mistral Small 22b notebook](https://colab.research.google.com/drive/1oCEHcED15DzL8xXGU1VTx5ZfOJM8WY01?usp=sharing) finetuning fits in under 16GB of VRAM!
 - 📣 NEW! [Llama 3.1 8b, 70b](https://colab.research.google.com/drive/1Ys44kVvmeZtnICzWz0xgpRnrIOjZAuxp?usp=sharing) & [Mistral Nemo-12b](https://colab.research.google.com/drive/17d3U-CAIwzmbDRqbZ9NnpHxCkmXB6LZ0?usp=sharing) both Base and Instruct are now supported
 - 📣 NEW! `pip install unsloth` now works! Head over to [pypi](https://pypi.org/project/unsloth/) to check it out! This allows non git pull installs. Use `pip install unsloth[colab-new]` for non dependency installs.
 - 📣 NEW! Continued Pretraining [notebook](https://colab.research.google.com/drive/1tEd1FrOXWMnCU9UIvdYhs61tkxdMuKZu?usp=sharing) for other languages like Korean!
@@ -66,6 +68,7 @@ All notebooks are **beginner friendly**! Add your dataset, click "Run All", and
 | 🥇 **Benchmarking**                   | [Performance Tables](https://github.com/unslothai/unsloth/tree/main#-performance-benchmarking)
 | 🌐 **Released Models**            | [Unsloth Releases](https://docs.unsloth.ai/get-started/all-our-models)|
 | ✍️ **Blog**                    | [Read our Blogs](https://unsloth.ai/blog)|
+| <img height="14" src="https://redditinc.com/hs-fs/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" />&nbsp; **Reddit**                    | [Join our Reddit page](https://reddit.com/r/unsloth)|
 
 ## ⭐ Key Features
 - All kernels written in [OpenAI's Triton](https://openai.com/research/triton) language. **Manual backprop engine**.
@@ -469,7 +472,7 @@ Two Tesla T4s on Kaggle
 ![](https://i.ibb.co/sJ7RhGG/image-41.png)
 <br>
 
-### Citing
+### Citation
 
 You can cite the Unsloth repo as follows:
 ```bibtex
@@ -482,6 +485,7 @@ You can cite the Unsloth repo as follows:
 ```
 
 ### Thank You to
+- [Erik](https://github.com/erikwijmans) for his help adding [Apple's ML Cross Entropy](https://github.com/apple/ml-cross-entropy) in Unsloth
 - [HuyNguyen-hust](https://github.com/HuyNguyen-hust) for making [RoPE Embeddings 28% faster](https://github.com/unslothai/unsloth/pull/238)
 - [RandomInternetPreson](https://github.com/RandomInternetPreson) for confirming WSL support
 - [152334H](https://github.com/152334H) for experimental DPO support

diff --git a/unsloth/models/loader.py b/unsloth/models/loader.py
@@ -131,7 +131,7 @@ def from_pretrained(
                 exist_config         = os.path.exists(os.path.join(model_name, "config.json"))
                 both_exist = exist_adapter_config and exist_config
             else:
-                files = HfFileSystem(token = token).glob(os.path.join(model_name, "*.json"))
+                files = HfFileSystem(token = token).glob(f"{model_name}/*.json")
                 files = (os.path.split(x)[-1] for x in files)
                 if sum(x == "adapter_config.json" or x == "config.json" for x in files) >= 2:
                     both_exist = True

diff --git a/unsloth/save.py b/unsloth/save.py
@@ -442,7 +442,7 @@ def unsloth_save_model(
             print("Saved to https://huggingface.co/" + save_pretrained_settings["save_directory"])
         pass
 
-        print(" Done.")
+        print(" Done. Model were saved at " + save_pretrained_settings["save_directory"])
         return save_directory, None
     pass
 
@@ -1180,7 +1180,7 @@ def save_to_gguf(
     # Check if quantization succeeded!
     if not os.path.isfile(final_location):
         if IS_KAGGLE_ENVIRONMENT:
-            if not Path(final_location).resolve().is_relative_to(Path('/tmp').resolve()):
+            if not Path(final_location).resolve().is_relative_to(Path(KAGGLE_TMP).resolve()):
                 raise RuntimeError(
                     f"Unsloth: Quantization failed for {final_location}\n"\
                     "You are in a Kaggle environment, which might be the reason this is failing.\n"\
@@ -1222,7 +1222,7 @@ def save_to_gguf(
             # Check if quantization succeeded!
             if not os.path.isfile(final_location):
                 if IS_KAGGLE_ENVIRONMENT:
-                    if not Path(final_location).resolve().is_relative_to(Path('/tmp').resolve()):
+                    if not Path(final_location).resolve().is_relative_to(Path(KAGGLE_TMP).resolve()):
                         raise RuntimeError(
                             f"Unsloth: Quantization failed for {final_location}\n"\
                             "You are in a Kaggle environment, which might be the reason this is failing.\n"\
@@ -1663,6 +1663,9 @@ def unsloth_save_pretrained_gguf(
     del arguments["quantization_method"]
     del arguments["first_conversion"]
 
+    if IS_KAGGLE_ENVIRONMENT:
+        arguments["save_directory"] = os.path.join(KAGGLE_TMP, save_directory)
+
     # Fix tokenizer adding an extra BOS token at the front
     fix_bos_token, old_chat_template = fix_tokenizer_bos_token(tokenizer)
 
@@ -1754,6 +1757,19 @@ def unsloth_save_pretrained_gguf(
         )
     pass
 
+    if IS_KAGGLE_ENVIRONMENT:
+        list_of_files = list(all_file_locations)
+        if modelfile_location is not None:
+            list_of_files.append(modelfile_location)
+
+        from IPython.display import FileLink, display
+
+        for file_location in list_of_files:
+            if file_location is not None:
+                display(FileLink(file_location))
+
+        logger.info("Unsloth: Click the above links to download the files.")
+
     if push_to_hub:
         print("Unsloth: Uploading GGUF to Huggingface Hub...")
 
@@ -1841,6 +1857,9 @@ def unsloth_push_to_hub_gguf(
     del arguments["quantization_method"]
     del arguments["first_conversion"]
 
+    if IS_KAGGLE_ENVIRONMENT:
+        arguments["save_directory"] = os.path.join(KAGGLE_TMP, arguments["save_directory"])
+
     # Fix tokenizer adding an extra BOS token at the front
     fix_bos_token, old_chat_template = fix_tokenizer_bos_token(tokenizer)
 
@@ -1938,6 +1957,10 @@ def unsloth_push_to_hub_gguf(
             if username not in new_save_directory else \
             new_save_directory.lstrip('/.')
 
+        if IS_KAGGLE_ENVIRONMENT:
+            # Take last 2 parts of the link
+            link = "/".join(link.split("/")[-2:])
+
         print(f"Saved GGUF to https://huggingface.co/{link}")
     pass
 
@@ -1947,6 +1970,10 @@ def unsloth_push_to_hub_gguf(
             self, repo_id, token,
             "GGUF converted", "gguf", modelfile_location, old_username, private,
         )
+        if IS_KAGGLE_ENVIRONMENT:
+            # Take last 2 parts of the link
+            link = "/".join(link.split("/")[-2:])
+
         print(f"Saved Ollama Modelfile to https://huggingface.co/{link}")
     pass