Skip to content

Commit 40d6a27

Browse files
authored
only download models from huggingface (#888)
1 parent 013c974 commit 40d6a27

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+272
-353
lines changed

examples/classification/gpt_imdb_finetune.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
"source": [
8787
"from mindnlp.transformers import GPTTokenizer\n",
8888
"# tokenizer\n",
89-
"gpt_tokenizer = GPTTokenizer.from_pretrained('openai-gpt', from_pt=True)\n",
89+
"gpt_tokenizer = GPTTokenizer.from_pretrained('openai-gpt')\n",
9090
"\n",
9191
"# add sepcial token: <PAD>\n",
9292
"special_tokens_dict = {\n",
@@ -137,7 +137,7 @@
137137
"from mindspore.experimental.optim import Adam\n",
138138
"\n",
139139
"# set bert config and define parameters for training\n",
140-
"model = GPTForSequenceClassification.from_pretrained('openai-gpt', from_pt=True, num_labels=2)\n",
140+
"model = GPTForSequenceClassification.from_pretrained('openai-gpt', num_labels=2)\n",
141141
"model.config.pad_token_id = gpt_tokenizer.pad_token_id\n",
142142
"model.resize_token_embeddings(model.config.vocab_size + 3)\n",
143143
"\n",

llm/finetune/graphormer/graphormer_finetune.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def main(args):
6969
auto_load=True)
7070

7171
# Load model
72-
model = GraphormerForGraphClassification.from_pretrained("clefourrier/graphormer-base-pcqm4mv2", from_pt=True)
72+
model = GraphormerForGraphClassification.from_pretrained("clefourrier/graphormer-base-pcqm4mv2")
7373

7474
# Initiate the optimizer
7575
optimizer = nn.AdamWeightDecay(model.trainable_params(),

llm/inference/chatglm2/cli_demo.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
import platform
33
from mindnlp.transformers import ChatGLM2Tokenizer, ChatGLM2ForConditionalGeneration
44

5-
tokenizer = ChatGLM2Tokenizer.from_pretrained("THUDM/chatglm2-6b", from_pt=True)
6-
model = ChatGLM2ForConditionalGeneration.from_pretrained("THUDM/chatglm2-6b", from_pt=True)
5+
tokenizer = ChatGLM2Tokenizer.from_pretrained("THUDM/chatglm2-6b")
6+
model = ChatGLM2ForConditionalGeneration.from_pretrained("THUDM/chatglm2-6b")
77
model = model.set_train(False)
88

99
os_name = platform.system()

llm/inference/chatglm3/cli_demo.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
import platform
33
from mindnlp.transformers import ChatGLM3Tokenizer, ChatGLM3ForConditionalGeneration
44

5-
tokenizer = ChatGLM3Tokenizer.from_pretrained("THUDM/chatglm3-6b", from_pt=True)
6-
model = ChatGLM3ForConditionalGeneration.from_pretrained("THUDM/chatglm3-6b", from_pt=True)
5+
tokenizer = ChatGLM3Tokenizer.from_pretrained("THUDM/chatglm3-6b")
6+
model = ChatGLM3ForConditionalGeneration.from_pretrained("THUDM/chatglm3-6b")
77
model = model.set_train(False)
88

99
os_name = platform.system()

llm/inference/pangu/pangu_generate.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from mindnlp.transformers import AutoTokenizer, AutoModelForCausalLM
22

3-
tokenizer = AutoTokenizer.from_pretrained("sunzeyeah/pangu-350M-sft", from_pt=True)
4-
model = AutoModelForCausalLM.from_pretrained("sunzeyeah/pangu-350M-sft", from_pt=True)
3+
tokenizer = AutoTokenizer.from_pretrained("sunzeyeah/pangu-350M-sft")
4+
model = AutoModelForCausalLM.from_pretrained("sunzeyeah/pangu-350M-sft")
55

66
prompt = "我不能确定对方是不是喜欢我,我却想分分秒秒跟他在一起,有谁能告诉我如何能想他少一点<sep>回答:"
77
inputs = tokenizer(prompt, add_special_tokens=False, return_token_type_ids=False, return_tensors="ms")

llm/inference/phi_2/streamlit_app.py

-2
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,10 @@
44
# Load the Phi 2 model and tokenizer
55
tokenizer = AutoTokenizer.from_pretrained(
66
"microsoft/phi-2",
7-
from_pt=True
87
)
98

109
model = AutoModelForCausalLM.from_pretrained(
1110
"microsoft/phi-2",
12-
from_pt=True
1311
)
1412

1513
# Streamlit UI

mindnlp/transformers/modeling_utils.py

+20-57
Original file line numberDiff line numberDiff line change
@@ -800,7 +800,7 @@ def from_pretrained( # pylint: disable=too-many-locals
800800
"""from_pretrained"""
801801
state_dict = kwargs.pop("state_dict", None)
802802
cache_dir = kwargs.pop("cache_dir", None)
803-
from_pt = kwargs.pop("from_pt", True)
803+
_ = kwargs.pop("from_pt", True)
804804
force_download = kwargs.pop("force_download", False)
805805
resume_download = kwargs.pop("resume_download", False)
806806
proxies = kwargs.pop("proxies", None)
@@ -839,7 +839,7 @@ def from_pretrained( # pylint: disable=too-many-locals
839839
pretrained_model_name_or_path = str(pretrained_model_name_or_path)
840840
is_local = os.path.isdir(pretrained_model_name_or_path)
841841
if is_local:
842-
if from_pt and os.path.isfile(
842+
if os.path.isfile(
843843
os.path.join(pretrained_model_name_or_path, subfolder, PT_WEIGHTS_NAME)
844844
):
845845
# Load from a PyTorch checkpoint
@@ -858,7 +858,7 @@ def from_pretrained( # pylint: disable=too-many-locals
858858
archive_file = os.path.join(
859859
pretrained_model_name_or_path, subfolder, _add_variant(SAFE_WEIGHTS_NAME, variant)
860860
)
861-
elif from_pt and os.path.isfile(
861+
elif os.path.isfile(
862862
os.path.join(pretrained_model_name_or_path, subfolder, _add_variant(PT_WEIGHTS_INDEX_NAME, variant))
863863
):
864864
# Load from a sharded PyTorch checkpoint
@@ -901,11 +901,12 @@ def from_pretrained( # pylint: disable=too-many-locals
901901
elif is_remote_url(pretrained_model_name_or_path):
902902
filename = pretrained_model_name_or_path
903903
resolved_archive_file = download_url(pretrained_model_name_or_path)
904-
elif from_pt:
904+
else:
905905
if use_safetensors is not False:
906906
filename = _add_variant(SAFE_WEIGHTS_NAME, variant)
907907
else:
908-
filename = _add_variant(PT_WEIGHTS_NAME, variant)
908+
filename = _add_variant(WEIGHTS_NAME, variant)
909+
909910
try:
910911
# Load from URL or cache if already cached
911912
cached_file_kwargs = {
@@ -935,68 +936,30 @@ def from_pretrained( # pylint: disable=too-many-locals
935936
if resolved_archive_file is not None:
936937
is_sharded = True
937938
use_safetensors = True
938-
else:
939-
# This repo has no safetensors file of any kind, we switch to PyTorch.
940-
filename = _add_variant(PT_WEIGHTS_NAME, variant)
941-
resolved_archive_file = cached_file(
942-
pretrained_model_name_or_path, filename, **cached_file_kwargs
943-
)
944939

945940
if resolved_archive_file is None:
946-
filename = _add_variant(PT_WEIGHTS_NAME, variant)
941+
filename = _add_variant(WEIGHTS_NAME, variant)
947942
resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs)
948943

949-
if resolved_archive_file is None and filename == _add_variant(PT_WEIGHTS_NAME, variant):
944+
if resolved_archive_file is None and filename == _add_variant(WEIGHTS_NAME, variant):
950945
# Maybe the checkpoint is sharded, we try to grab the index name in this case.
951946
resolved_archive_file = cached_file(
952947
pretrained_model_name_or_path,
953-
_add_variant(PT_WEIGHTS_INDEX_NAME, variant),
948+
_add_variant(WEIGHTS_INDEX_NAME, variant),
954949
**cached_file_kwargs,
955950
)
956951
if resolved_archive_file is not None:
957952
is_sharded = True
958953

959954
if resolved_archive_file is None:
960-
raise EnvironmentError(
961-
f"{pretrained_model_name_or_path} does not appear to have a file named"
962-
f" {_add_variant(SAFE_WEIGHTS_NAME, variant)}, {_add_variant(PT_WEIGHTS_NAME, variant)}"
963-
)
964-
except EnvironmentError:
965-
# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted
966-
# to the original exception.
967-
raise
968-
except Exception as exc:
969-
# For any other exception, we throw a generic error.
970-
raise EnvironmentError(
971-
f"Can't load the model for '{pretrained_model_name_or_path}'. If you were trying to load it"
972-
", make sure you don't have a local directory with the"
973-
f" same name. Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a"
974-
f" directory containing a file named {_add_variant(SAFE_WEIGHTS_NAME, variant)},"
975-
f" {_add_variant(PT_WEIGHTS_NAME, variant)}."
976-
) from exc
977-
else:
978-
# set correct filename
979-
filename = _add_variant(WEIGHTS_NAME, variant)
980-
try:
981-
# Load from URL or cache if already cached
982-
cached_file_kwargs = {
983-
"cache_dir": cache_dir,
984-
"force_download": force_download,
985-
"proxies": proxies,
986-
"resume_download": resume_download,
987-
"local_files_only": local_files_only,
988-
"subfolder": subfolder,
989-
"_raise_exceptions_for_missing_entries": False,
990-
'token': token
991-
}
992-
993-
resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs)
955+
filename = _add_variant(PT_WEIGHTS_NAME, variant)
956+
resolved_archive_file = cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs)
994957

995-
if resolved_archive_file is None and filename == _add_variant(WEIGHTS_NAME, variant):
958+
if resolved_archive_file is None and filename == _add_variant(PT_WEIGHTS_NAME, variant):
996959
# Maybe the checkpoint is sharded, we try to grab the index name in this case.
997960
resolved_archive_file = cached_file(
998961
pretrained_model_name_or_path,
999-
_add_variant(WEIGHTS_INDEX_NAME, variant),
962+
_add_variant(PT_WEIGHTS_INDEX_NAME, variant),
1000963
**cached_file_kwargs,
1001964
)
1002965
if resolved_archive_file is not None:
@@ -1005,7 +968,7 @@ def from_pretrained( # pylint: disable=too-many-locals
1005968
if resolved_archive_file is None:
1006969
raise EnvironmentError(
1007970
f"{pretrained_model_name_or_path} does not appear to have a file named"
1008-
f" {_add_variant(WEIGHTS_NAME, variant)}."
971+
f" {_add_variant(SAFE_WEIGHTS_NAME, variant)}, {_add_variant(PT_WEIGHTS_NAME, variant)}"
1009972
)
1010973
except EnvironmentError:
1011974
# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted
@@ -1017,7 +980,8 @@ def from_pretrained( # pylint: disable=too-many-locals
1017980
f"Can't load the model for '{pretrained_model_name_or_path}'. If you were trying to load it"
1018981
", make sure you don't have a local directory with the"
1019982
f" same name. Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a"
1020-
f" directory containing a file named {_add_variant(WEIGHTS_NAME, variant)}."
983+
f" directory containing a file named {_add_variant(WEIGHTS_NAME, variant)}, {_add_variant(SAFE_WEIGHTS_NAME, variant)},"
984+
f" {_add_variant(PT_WEIGHTS_NAME, variant)}."
1021985
) from exc
1022986

1023987
if is_local:
@@ -1091,8 +1055,8 @@ def empty_initializer(init, shape=None, dtype=mindspore.float32):
10911055
# These are all the pointers of shared tensors.
10921056
tied_params = [names for _, names in ptrs.items() if len(names) > 1]
10931057

1094-
def load_ckpt(resolved_archive_file, from_pt=False):
1095-
if from_pt and 'ckpt' not in resolved_archive_file:
1058+
def load_ckpt(resolved_archive_file):
1059+
if 'ckpt' not in resolved_archive_file:
10961060
if use_safetensors:
10971061
from safetensors.numpy import load_file
10981062
origin_state_dict = load_file(resolved_archive_file)
@@ -1214,14 +1178,14 @@ def load_param_into_net(model: nn.Cell, param_dict: dict, prefix: str):
12141178
if is_sharded:
12151179
all_keys_unexpected = []
12161180
for name in tqdm(converted_filenames, desc="Loading checkpoint shards"):
1217-
state_dict = load_ckpt(name, from_pt)
1181+
state_dict = load_ckpt(name)
12181182
keys_unexpected, keys_missing = load_param_into_net(model, state_dict, cls.base_model_prefix)
12191183
all_keys_unexpected.extend(keys_unexpected)
12201184
del state_dict
12211185
gc.collect()
12221186
loaded_keys = sharded_metadata["all_checkpoint_keys"]
12231187
else:
1224-
state_dict = load_ckpt(resolved_archive_file, from_pt)
1188+
state_dict = load_ckpt(resolved_archive_file)
12251189
loaded_keys = list(state_dict.keys())
12261190
all_keys_unexpected, keys_missing = load_param_into_net(model, state_dict, cls.base_model_prefix)
12271191
else:
@@ -1266,7 +1230,6 @@ def load_param_into_net(model: nn.Cell, param_dict: dict, prefix: str):
12661230
# Set model in evaluation mode to deactivate DropOut modules by default
12671231
model.set_train(False)
12681232

1269-
kwargs['from_pt'] = from_pt
12701233
# If it is a model with generation capabilities, attempt to load the generation config
12711234
if model.can_generate() and pretrained_model_name_or_path is not None:
12721235
try:

mindnlp/transformers/models/auto/auto_factory.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def from_config(cls, config, **kwargs):
6969
@classmethod
7070
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
7171
config = kwargs.pop("config", None)
72-
from_pt = kwargs.get('from_pt', True)
72+
_ = kwargs.get('from_pt', True)
7373
token = kwargs.get('token', None)
7474
if not isinstance(config, PretrainedConfig):
7575
kwargs_orig = copy.deepcopy(kwargs)
@@ -92,7 +92,6 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
9292
if kwargs_orig.get("quantization_config", None) is not None:
9393
kwargs["quantization_config"] = kwargs_orig["quantization_config"]
9494

95-
kwargs['from_pt'] = from_pt
9695
kwargs['token'] = token
9796
if type(config) in cls._model_mapping.keys():
9897
model_class = _get_model_class(config, cls._model_mapping)

mindnlp/transformers/models/auto/tokenization_auto.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
from collections import OrderedDict
2828
from typing import Dict, Optional, Union
2929

30-
from mindnlp.configs import MS_URL_BASE, HF_URL_BASE
3130
from mindnlp.utils import cached_file, is_sentencepiece_available, is_tokenizers_available, logging
3231
from ...configuration_utils import PretrainedConfig, EncoderDecoderConfig
3332
from ...tokenization_utils import PreTrainedTokenizer # pylint: disable=cyclic-import
@@ -553,8 +552,7 @@ def get_tokenizer_config(
553552
tokenizer_config = get_tokenizer_config("tokenizer-test")
554553
```"""
555554

556-
from_pt = kwargs.get('from_pt', False)
557-
endpoint = HF_URL_BASE if from_pt else MS_URL_BASE
555+
_ = kwargs.get('from_pt', False)
558556
resolved_config_file = cached_file(
559557
pretrained_model_name_or_path,
560558
TOKENIZER_CONFIG_FILE,

mindnlp/transformers/pipelines/base.py

-2
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,6 @@ def load_model(
230230
all_traceback = {}
231231
for model_class in class_tuple:
232232
kwargs = model_kwargs.copy()
233-
if model.endswith(".bin") or model.endswith(".safetensors") or model.endswith(".pth"):
234-
kwargs["from_pt"] = True
235233
try:
236234
model = model_class.from_pretrained(model, **kwargs)
237235
model = model.set_train(False)

tests/ut/transformers/models/auto/test_configuration_auto.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def test_config_model_type_from_local_file(self):
5050
self.assertIsInstance(config, RobertaConfig)
5151

5252
def test_config_model_type_from_model_identifier(self):
53-
config = AutoConfig.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER, from_pt=True)
53+
config = AutoConfig.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER)
5454
self.assertIsInstance(config, RobertaConfig)
5555

5656
def test_config_for_model_str(self):

tests/ut/transformers/models/auto/test_modeling_auto.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -233,13 +233,13 @@ def test_token_classification_model_from_pretrained(self):
233233

234234

235235
def test_from_pretrained_identifier(self):
236-
model = AutoModelWithLMHead.from_pretrained(SMALL_MODEL_IDENTIFIER, from_pt=True)
236+
model = AutoModelWithLMHead.from_pretrained(SMALL_MODEL_IDENTIFIER)
237237
self.assertIsInstance(model, BertForMaskedLM)
238238
self.assertEqual(model.num_parameters(), 14410)
239239
self.assertEqual(model.num_parameters(only_trainable=True), 14410)
240240

241241
def test_from_identifier_from_model_type(self):
242-
model = AutoModelWithLMHead.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER, from_pt=True)
242+
model = AutoModelWithLMHead.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER)
243243
self.assertIsInstance(model, RobertaForMaskedLM)
244244
self.assertEqual(model.num_parameters(), 14410)
245245
self.assertEqual(model.num_parameters(only_trainable=True), 14410)
@@ -321,7 +321,7 @@ def test_model_file_not_found(self):
321321
with self.assertRaises(
322322
EnvironmentError,
323323
):
324-
_ = AutoModel.from_pretrained("hf-internal-testing/config-no-model", from_pt=True)
324+
_ = AutoModel.from_pretrained("hf-internal-testing/config-no-model")
325325

326326
# def test_cached_model_has_minimum_calls_to_head(self):
327327
# # Make sure we have cached the model.

tests/ut/transformers/models/autoformer/test_modeling_autoformer.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ class AutoformerModelIntegrationTests(unittest.TestCase):
420420
@unittest.skip('Mindspore cannot load torch .pt file.')
421421
def test_inference_no_head(self):
422422
model = AutoformerModel.from_pretrained(
423-
"huggingface/autoformer-tourism-monthly",from_pt=True)
423+
"huggingface/autoformer-tourism-monthly")
424424
batch = prepare_batch()
425425

426426
output = model(
@@ -446,7 +446,7 @@ def test_inference_no_head(self):
446446
@unittest.skip('Mindspore cannot load torch .pt file.')
447447
def test_inference_head(self):
448448
model = AutoformerForPrediction.from_pretrained(
449-
"huggingface/autoformer-tourism-monthly", from_pt=True)
449+
"huggingface/autoformer-tourism-monthly")
450450
batch = prepare_batch("val-batch.pt")
451451
output = model(
452452
past_values=batch["past_values"],
@@ -466,7 +466,7 @@ def test_inference_head(self):
466466
@unittest.skip('Mindspore cannot load torch .pt file.')
467467
def test_seq_to_seq_generation(self):
468468
model = AutoformerForPrediction.from_pretrained(
469-
"huggingface/autoformer-tourism-monthly", from_pt=True)
469+
"huggingface/autoformer-tourism-monthly")
470470
batch = prepare_batch("val-batch.pt")
471471
outputs = model.generate(
472472
static_categorical_features=batch["static_categorical_features"],

0 commit comments

Comments
 (0)