consciousAI/cai-stellaris-text-embeddings fails #1660

Muennighoff · 2025-01-01T05:05:10Z

from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('consciousAI/cai-stellaris-text-embeddings')
embeddings = model.encode(sentences)
print(embeddings)

modules.json: 100%
 349/349 [00:00<00:00, 3.19kB/s]
config_sentence_transformers.json: 100%
 116/116 [00:00<00:00, 633B/s]
README.md: 100%
 41.5k/41.5k [00:00<00:00, 182kB/s]
sentence_bert_config.json: 100%
 53.0/53.0 [00:00<00:00, 700B/s]
config.json: 100%
 660/660 [00:00<00:00, 7.02kB/s]
pytorch_model.bin: 100%
 211M/211M [00:06<00:00, 27.3MB/s]
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
[/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py](https://localhost:8080/#) in load_state_dict(checkpoint_file, is_quantized, map_location, weights_only)
    534         weights_only_kwarg = {"weights_only": weights_only} if is_torch_greater_or_equal_than_1_13 else {}
--> 535         return torch.load(
    536             checkpoint_file,

11 frames
[/usr/local/lib/python3.10/dist-packages/torch/serialization.py](https://localhost:8080/#) in load(f, map_location, pickle_module, weights_only, mmap, **pickle_load_args)
   1325             overall_storage = None
-> 1326             with _open_zipfile_reader(opened_file) as opened_zipfile:
   1327                 if _is_torchscript_zip(opened_zipfile):

[/usr/local/lib/python3.10/dist-packages/torch/serialization.py](https://localhost:8080/#) in __init__(self, name_or_buffer)
    670     def __init__(self, name_or_buffer) -> None:
--> 671         super().__init__(torch._C.PyTorchFileReader(name_or_buffer))
    672 

RuntimeError: PytorchStreamReader failed reading zip archive: failed finding central directory

During handling of the above exception, another exception occurred:

UnicodeDecodeError                        Traceback (most recent call last)
[/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py](https://localhost:8080/#) in load_state_dict(checkpoint_file, is_quantized, map_location, weights_only)
    543             with open(checkpoint_file) as f:
--> 544                 if f.read(7) == "version":
    545                     raise OSError(

[/usr/lib/python3.10/codecs.py](https://localhost:8080/#) in decode(self, input, final)
    321         data = self.buffer + input
--> 322         (result, consumed) = self._buffer_decode(data, self.errors, final)
    323         # keep undecoded input until the next call

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 64: invalid start byte

During handling of the above exception, another exception occurred:

OSError                                   Traceback (most recent call last)
[<ipython-input-4-8d5bad5a9f0f>](https://localhost:8080/#) in <cell line: 4>()
      2 sentences = ["This is an example sentence", "Each sentence is converted"]
      3 
----> 4 model = SentenceTransformer('consciousAI/cai-stellaris-text-embeddings')
      5 embeddings = model.encode(sentences)
      6 print(embeddings)

[/usr/local/lib/python3.10/dist-packages/sentence_transformers/SentenceTransformer.py](https://localhost:8080/#) in __init__(self, model_name_or_path, modules, device, prompts, default_prompt_name, similarity_fn_name, cache_folder, trust_remote_code, revision, local_files_only, token, use_auth_token, truncate_dim, model_kwargs, tokenizer_kwargs, config_kwargs, model_card_data, backend)
    306                 local_files_only=local_files_only,
    307             ):
--> 308                 modules, self.module_kwargs = self._load_sbert_model(
    309                     model_name_or_path,
    310                     token=token,

[/usr/local/lib/python3.10/dist-packages/sentence_transformers/SentenceTransformer.py](https://localhost:8080/#) in _load_sbert_model(self, model_name_or_path, token, cache_folder, revision, trust_remote_code, local_files_only, model_kwargs, tokenizer_kwargs, config_kwargs)
   1726                 # Otherwise we fall back to the load method
   1727                 try:
-> 1728                     module = module_class(model_name_or_path, cache_dir=cache_folder, backend=self.backend, **kwargs)
   1729                 except TypeError:
   1730                     module = module_class.load(model_name_or_path)

[/usr/local/lib/python3.10/dist-packages/sentence_transformers/models/Transformer.py](https://localhost:8080/#) in __init__(self, model_name_or_path, max_seq_length, model_args, tokenizer_args, config_args, cache_dir, do_lower_case, tokenizer_name_or_path, backend)
     76 
     77         config = self._load_config(model_name_or_path, cache_dir, backend, config_args)
---> 78         self._load_model(model_name_or_path, config, cache_dir, backend, **model_args)
     79 
     80         if max_seq_length is not None and "model_max_length" not in tokenizer_args:

[/usr/local/lib/python3.10/dist-packages/sentence_transformers/models/Transformer.py](https://localhost:8080/#) in _load_model(self, model_name_or_path, config, cache_dir, backend, **model_args)
    136                 self._load_mt5_model(model_name_or_path, config, cache_dir, **model_args)
    137             else:
--> 138                 self.auto_model = AutoModel.from_pretrained(
    139                     model_name_or_path, config=config, cache_dir=cache_dir, **model_args
    140                 )

[/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py](https://localhost:8080/#) in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
    562         elif type(config) in cls._model_mapping.keys():
    563             model_class = _get_model_class(config, cls._model_mapping)
--> 564             return model_class.from_pretrained(
    565                 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
    566             )

[/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py](https://localhost:8080/#) in from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
   4034             if not is_sharded and state_dict is None:
   4035                 # Time to load the checkpoint
-> 4036                 state_dict = load_state_dict(resolved_archive_file, weights_only=weights_only)
   4037 
   4038             # set dtype to instantiate the model under:

[/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py](https://localhost:8080/#) in load_state_dict(checkpoint_file, is_quantized, map_location, weights_only)
    554                     ) from e
    555         except (UnicodeDecodeError, ValueError):
--> 556             raise OSError(
    557                 f"Unable to load weights from pytorch checkpoint file for '{checkpoint_file}' "
    558                 f"at '{checkpoint_file}'. "

OSError: Unable to load weights from pytorch checkpoint file for '/root/.cache/huggingface/hub/models--consciousAI--cai-stellaris-text-embeddings/snapshots/c000ec4b29588daf0f4a0b2ad4e72ee807d8efc0/pytorch_model.bin' at '/root/.cache/huggingface/hub/models--consciousAI--cai-stellaris-text-embeddings/snapshots/c000ec4b29588daf0f4a0b2ad4e72ee807d8efc0/pytorch_model.bin'. If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True.

The text was updated successfully, but these errors were encountered:

isaac-chung · 2025-01-01T11:20:20Z

I'm able to reproduce this error, but it does not seem like the error is on our side. I started a discussion on the HF model page: https://huggingface.co/consciousAI/cai-stellaris-text-embeddings/discussions/1. In the meantime, should we comment this model out?

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

consciousAI/cai-stellaris-text-embeddings fails #1660

consciousAI/cai-stellaris-text-embeddings fails #1660

Muennighoff commented Jan 1, 2025

isaac-chung commented Jan 1, 2025

consciousAI/cai-stellaris-text-embeddings fails #1660

consciousAI/cai-stellaris-text-embeddings fails #1660

Comments

Muennighoff commented Jan 1, 2025

isaac-chung commented Jan 1, 2025