Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

consciousAI/cai-stellaris-text-embeddings fails #1660

Open
Muennighoff opened this issue Jan 1, 2025 · 1 comment
Open

consciousAI/cai-stellaris-text-embeddings fails #1660

Muennighoff opened this issue Jan 1, 2025 · 1 comment

Comments

@Muennighoff
Copy link
Contributor

from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('consciousAI/cai-stellaris-text-embeddings')
embeddings = model.encode(sentences)
print(embeddings)

modules.json: 100%
 349/349 [00:00<00:00, 3.19kB/s]
config_sentence_transformers.json: 100%
 116/116 [00:00<00:00, 633B/s]
README.md: 100%
 41.5k/41.5k [00:00<00:00, 182kB/s]
sentence_bert_config.json: 100%
 53.0/53.0 [00:00<00:00, 700B/s]
config.json: 100%
 660/660 [00:00<00:00, 7.02kB/s]
pytorch_model.bin: 100%
 211M/211M [00:06<00:00, 27.3MB/s]
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
[/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py](https://localhost:8080/#) in load_state_dict(checkpoint_file, is_quantized, map_location, weights_only)
    534         weights_only_kwarg = {"weights_only": weights_only} if is_torch_greater_or_equal_than_1_13 else {}
--> 535         return torch.load(
    536             checkpoint_file,

11 frames
[/usr/local/lib/python3.10/dist-packages/torch/serialization.py](https://localhost:8080/#) in load(f, map_location, pickle_module, weights_only, mmap, **pickle_load_args)
   1325             overall_storage = None
-> 1326             with _open_zipfile_reader(opened_file) as opened_zipfile:
   1327                 if _is_torchscript_zip(opened_zipfile):

[/usr/local/lib/python3.10/dist-packages/torch/serialization.py](https://localhost:8080/#) in __init__(self, name_or_buffer)
    670     def __init__(self, name_or_buffer) -> None:
--> 671         super().__init__(torch._C.PyTorchFileReader(name_or_buffer))
    672 

RuntimeError: PytorchStreamReader failed reading zip archive: failed finding central directory

During handling of the above exception, another exception occurred:

UnicodeDecodeError                        Traceback (most recent call last)
[/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py](https://localhost:8080/#) in load_state_dict(checkpoint_file, is_quantized, map_location, weights_only)
    543             with open(checkpoint_file) as f:
--> 544                 if f.read(7) == "version":
    545                     raise OSError(

[/usr/lib/python3.10/codecs.py](https://localhost:8080/#) in decode(self, input, final)
    321         data = self.buffer + input
--> 322         (result, consumed) = self._buffer_decode(data, self.errors, final)
    323         # keep undecoded input until the next call

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 64: invalid start byte

During handling of the above exception, another exception occurred:

OSError                                   Traceback (most recent call last)
[<ipython-input-4-8d5bad5a9f0f>](https://localhost:8080/#) in <cell line: 4>()
      2 sentences = ["This is an example sentence", "Each sentence is converted"]
      3 
----> 4 model = SentenceTransformer('consciousAI/cai-stellaris-text-embeddings')
      5 embeddings = model.encode(sentences)
      6 print(embeddings)

[/usr/local/lib/python3.10/dist-packages/sentence_transformers/SentenceTransformer.py](https://localhost:8080/#) in __init__(self, model_name_or_path, modules, device, prompts, default_prompt_name, similarity_fn_name, cache_folder, trust_remote_code, revision, local_files_only, token, use_auth_token, truncate_dim, model_kwargs, tokenizer_kwargs, config_kwargs, model_card_data, backend)
    306                 local_files_only=local_files_only,
    307             ):
--> 308                 modules, self.module_kwargs = self._load_sbert_model(
    309                     model_name_or_path,
    310                     token=token,

[/usr/local/lib/python3.10/dist-packages/sentence_transformers/SentenceTransformer.py](https://localhost:8080/#) in _load_sbert_model(self, model_name_or_path, token, cache_folder, revision, trust_remote_code, local_files_only, model_kwargs, tokenizer_kwargs, config_kwargs)
   1726                 # Otherwise we fall back to the load method
   1727                 try:
-> 1728                     module = module_class(model_name_or_path, cache_dir=cache_folder, backend=self.backend, **kwargs)
   1729                 except TypeError:
   1730                     module = module_class.load(model_name_or_path)

[/usr/local/lib/python3.10/dist-packages/sentence_transformers/models/Transformer.py](https://localhost:8080/#) in __init__(self, model_name_or_path, max_seq_length, model_args, tokenizer_args, config_args, cache_dir, do_lower_case, tokenizer_name_or_path, backend)
     76 
     77         config = self._load_config(model_name_or_path, cache_dir, backend, config_args)
---> 78         self._load_model(model_name_or_path, config, cache_dir, backend, **model_args)
     79 
     80         if max_seq_length is not None and "model_max_length" not in tokenizer_args:

[/usr/local/lib/python3.10/dist-packages/sentence_transformers/models/Transformer.py](https://localhost:8080/#) in _load_model(self, model_name_or_path, config, cache_dir, backend, **model_args)
    136                 self._load_mt5_model(model_name_or_path, config, cache_dir, **model_args)
    137             else:
--> 138                 self.auto_model = AutoModel.from_pretrained(
    139                     model_name_or_path, config=config, cache_dir=cache_dir, **model_args
    140                 )

[/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py](https://localhost:8080/#) in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
    562         elif type(config) in cls._model_mapping.keys():
    563             model_class = _get_model_class(config, cls._model_mapping)
--> 564             return model_class.from_pretrained(
    565                 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
    566             )

[/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py](https://localhost:8080/#) in from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
   4034             if not is_sharded and state_dict is None:
   4035                 # Time to load the checkpoint
-> 4036                 state_dict = load_state_dict(resolved_archive_file, weights_only=weights_only)
   4037 
   4038             # set dtype to instantiate the model under:

[/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py](https://localhost:8080/#) in load_state_dict(checkpoint_file, is_quantized, map_location, weights_only)
    554                     ) from e
    555         except (UnicodeDecodeError, ValueError):
--> 556             raise OSError(
    557                 f"Unable to load weights from pytorch checkpoint file for '{checkpoint_file}' "
    558                 f"at '{checkpoint_file}'. "

OSError: Unable to load weights from pytorch checkpoint file for '/root/.cache/huggingface/hub/models--consciousAI--cai-stellaris-text-embeddings/snapshots/c000ec4b29588daf0f4a0b2ad4e72ee807d8efc0/pytorch_model.bin' at '/root/.cache/huggingface/hub/models--consciousAI--cai-stellaris-text-embeddings/snapshots/c000ec4b29588daf0f4a0b2ad4e72ee807d8efc0/pytorch_model.bin'. If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True.
@isaac-chung
Copy link
Collaborator

I'm able to reproduce this error, but it does not seem like the error is on our side. I started a discussion on the HF model page: https://huggingface.co/consciousAI/cai-stellaris-text-embeddings/discussions/1. In the meantime, should we comment this model out?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants