You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
model = FastLanguageModel.get_peft_model(
model,
r = 8,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 64,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
random_state = 3407,
max_seq_length = 1024,
)
dpo_trainer = DPOTrainer(
model = model,
ref_model = None,
args = DPOConfig(
per_device_train_batch_size = 1,
gradient_accumulation_steps = 4,
warmup_ratio = 0.1,
num_train_epochs = 3,
fp16 = not is_bfloat16_supported(),
bf16 = is_bfloat16_supported(),
logging_steps = 1,
optim = "adamw_8bit",
seed = 42,
output_dir = "outputs",
),
beta = 0.1,
train_dataset = train_dataset['train'],
eval_dataset = None,
tokenizer = tokenizer,
max_length = 1024,
max_prompt_length = 512,
)
dpo_trainer.train() 运行后报错:{'loss': 0.6931, 'grad_norm': 17.303083419799805, 'learning_rate': 2.5e-07, 'rewards/chosen': 0.0, 'rewards/rejected': 0.0, 'rewards/accuracies': 0.0, 'rewards/margins': 0.0, 'logps/chosen': -140.07949829101562, 'logps/rejected': -86.94802856445312, 'logits/chosen': -2.1795122623443604, 'logits/rejected': -1.971570611000061, 'epoch': 0.08}
3%|██▏ | 1/36 [00:11<06:41, 11.47s/it]Traceback (most recent call last):
File "/workspace/unsloth/dpo_demo_2.py", line 63, in
dpo_trainer.train()
File "", line 157, in train
File "", line 374, in _fast_inner_training_loop
File "", line 31, in _unsloth_training_step
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1273, in compute_loss
loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1225, in get_batch_loss_metrics
model_output = self.concatenated_forward(model, batch)
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1152, in concatenated_forward
outputs = model(input_ids=input_ids, attention_mask=attention_mask, **model_kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py", line 820, in forward
return model_forward(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py", line 808, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/usr/local/lib/python3.10/dist-packages/torch/amp/autocast_mode.py", line 16, in decorate_autocast
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/eval_frame.py", line 451, in _fn
return fn(*args, **kwargs)
File "/workspace/unsloth/unsloth/models/llama.py", line 1119, in PeftModelForCausalLM_fast_forward
return self.base_model(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/peft/tuners/tuners_utils.py", line 188, in forward
return self.model.forward(*args, **kwargs)
File "/workspace/unsloth/unsloth/models/llama.py", line 981, in _CausalLM_fast_forward
outputs = self.model(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/unsloth/unsloth/models/llama.py", line 619, in LlamaModel_fast_forward
inputs_embeds = self.embed_tokens(input_ids)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1582, in _call_impl
result = forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/sparse.py", line 163, in forward
return F.embedding(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py", line 2264, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)
3%|▎ | 1/36 [00:14<08:20, 14.29s/it] 请问是什么原因? 如果按照报错的内容修改unsloth/models/llama.py文件,修改为inputs_embeds = self.embed_tokens(input_ids.long()) 继续报错:{'loss': 0.6931, 'grad_norm': 17.303083419799805, 'learning_rate': 2.5e-07, 'rewards/chosen': 0.0, 'rewards/rejected': 0.0, 'rewards/accuracies': 0.0, 'rewards/margins': 0.0, 'logps/chosen': -140.07949829101562, 'logps/rejected': -86.94802856445312, 'logits/chosen': -2.1795122623443604, 'logits/rejected': -1.971570611000061, 'epoch': 0.08}
3%|██▏ | 1/36 [00:10<05:54, 10.12s/it]Traceback (most recent call last):
File "/workspace/unsloth/dpo_demo_2.py", line 63, in
dpo_trainer.train()
File "", line 157, in train
File "", line 374, in _fast_inner_training_loop
File "", line 31, in _unsloth_training_step
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1273, in compute_loss
loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1225, in get_batch_loss_metrics
model_output = self.concatenated_forward(model, batch)
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1176, in concatenated_forward
per_token_logps = torch.gather(logits.log_softmax(-1), dim=2, index=labels.unsqueeze(2)).squeeze(2)
RuntimeError: gather(): Expected dtype int64 for index
3%|▎ | 1/36 [00:12<07:20, 12.59s/it] so,how to train a model by dpo? dataset:{"system": "You are an AI assistant tailored to address queries about Unsloth.", "question": "What are the supported architectures for LLM fine-tuning by Unsloth?", "chosen": "Unsloth supports the Llama and Mistral architectures for LLM fine-tuning, ensuring compatibility with a range of model structures.", "rejected": "Unsloth does not support any specific architectures for LLM fine-tuning."}
{"system": "You are an AI assistant tailored to address queries about Unsloth.", "question": "How does Unsloth contribute to making LLM fine-tuning 2x faster?", "chosen": "Unsloth accelerates LLM fine-tuning by overwriting certain parts of the modeling code with optimized operations and manually deriving backpropagation steps, resulting in a 2x speedup without sacrificing accuracy.", "rejected": "Unsloth slows down LLM fine-tuning by introducing unnecessary complexities and approximations, leading to decreased efficiency."}
`
thank you very much!
The text was updated successfully, but these errors were encountered:
训练的脚本:
`from unsloth import FastLanguageModel, PatchDPOTrainer,FastQwen2Model
from unsloth import is_bfloat16_supported
PatchDPOTrainer()
import torch
from transformers import TrainingArguments
from trl import DPOTrainer,DPOConfig
from datasets import load_dataset
train_dataset = load_dataset("json", data_files='/workspace/unsloth/dataset.jsonl')
model_name_or_path = "/LLMs/Qwen2-1.5B-Instruct/"
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = model_name_or_path,
max_seq_length = 1024,
dtype = torch.bfloat16,
load_in_4bit = False,
)
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
Do model patching and add fast LoRA weights
model = FastLanguageModel.get_peft_model(
model,
r = 8,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 64,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
random_state = 3407,
max_seq_length = 1024,
)
dpo_trainer = DPOTrainer(
model = model,
ref_model = None,
args = DPOConfig(
per_device_train_batch_size = 1,
gradient_accumulation_steps = 4,
warmup_ratio = 0.1,
num_train_epochs = 3,
fp16 = not is_bfloat16_supported(),
bf16 = is_bfloat16_supported(),
logging_steps = 1,
optim = "adamw_8bit",
seed = 42,
output_dir = "outputs",
),
beta = 0.1,
train_dataset = train_dataset['train'],
eval_dataset = None,
tokenizer = tokenizer,
max_length = 1024,
max_prompt_length = 512,
)
dpo_trainer.train()
运行后报错:
{'loss': 0.6931, 'grad_norm': 17.303083419799805, 'learning_rate': 2.5e-07, 'rewards/chosen': 0.0, 'rewards/rejected': 0.0, 'rewards/accuracies': 0.0, 'rewards/margins': 0.0, 'logps/chosen': -140.07949829101562, 'logps/rejected': -86.94802856445312, 'logits/chosen': -2.1795122623443604, 'logits/rejected': -1.971570611000061, 'epoch': 0.08}3%|██▏ | 1/36 [00:11<06:41, 11.47s/it]Traceback (most recent call last):
File "/workspace/unsloth/dpo_demo_2.py", line 63, in
dpo_trainer.train()
File "", line 157, in train
File "", line 374, in _fast_inner_training_loop
File "", line 31, in _unsloth_training_step
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1273, in compute_loss
loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1225, in get_batch_loss_metrics
model_output = self.concatenated_forward(model, batch)
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1152, in concatenated_forward
outputs = model(input_ids=input_ids, attention_mask=attention_mask, **model_kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py", line 820, in forward
return model_forward(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py", line 808, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/usr/local/lib/python3.10/dist-packages/torch/amp/autocast_mode.py", line 16, in decorate_autocast
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/eval_frame.py", line 451, in _fn
return fn(*args, **kwargs)
File "/workspace/unsloth/unsloth/models/llama.py", line 1119, in PeftModelForCausalLM_fast_forward
return self.base_model(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/peft/tuners/tuners_utils.py", line 188, in forward
return self.model.forward(*args, **kwargs)
File "/workspace/unsloth/unsloth/models/llama.py", line 981, in _CausalLM_fast_forward
outputs = self.model(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/unsloth/unsloth/models/llama.py", line 619, in LlamaModel_fast_forward
inputs_embeds = self.embed_tokens(input_ids)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1582, in _call_impl
result = forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/sparse.py", line 163, in forward
return F.embedding(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py", line 2264, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)
3%|▎ | 1/36 [00:14<08:20, 14.29s/it]
请问是什么原因? 如果按照报错的内容修改unsloth/models/llama.py文件,修改为
inputs_embeds = self.embed_tokens(input_ids.long())继续报错:
{'loss': 0.6931, 'grad_norm': 17.303083419799805, 'learning_rate': 2.5e-07, 'rewards/chosen': 0.0, 'rewards/rejected': 0.0, 'rewards/accuracies': 0.0, 'rewards/margins': 0.0, 'logps/chosen': -140.07949829101562, 'logps/rejected': -86.94802856445312, 'logits/chosen': -2.1795122623443604, 'logits/rejected': -1.971570611000061, 'epoch': 0.08}3%|██▏ | 1/36 [00:10<05:54, 10.12s/it]Traceback (most recent call last):
File "/workspace/unsloth/dpo_demo_2.py", line 63, in
dpo_trainer.train()
File "", line 157, in train
File "", line 374, in _fast_inner_training_loop
File "", line 31, in _unsloth_training_step
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1273, in compute_loss
loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1225, in get_batch_loss_metrics
model_output = self.concatenated_forward(model, batch)
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1176, in concatenated_forward
per_token_logps = torch.gather(logits.log_softmax(-1), dim=2, index=labels.unsqueeze(2)).squeeze(2)
RuntimeError: gather(): Expected dtype int64 for index
3%|▎ | 1/36 [00:12<07:20, 12.59s/it]
so,how to train a model by dpo? dataset:
{"system": "You are an AI assistant tailored to address queries about Unsloth.", "question": "What are the supported architectures for LLM fine-tuning by Unsloth?", "chosen": "Unsloth supports the Llama and Mistral architectures for LLM fine-tuning, ensuring compatibility with a range of model structures.", "rejected": "Unsloth does not support any specific architectures for LLM fine-tuning."}{"system": "You are an AI assistant tailored to address queries about Unsloth.", "question": "How does Unsloth contribute to making LLM fine-tuning 2x faster?", "chosen": "Unsloth accelerates LLM fine-tuning by overwriting certain parts of the modeling code with optimized operations and manually deriving backpropagation steps, resulting in a 2x speedup without sacrificing accuracy.", "rejected": "Unsloth slows down LLM fine-tuning by introducing unnecessary complexities and approximations, leading to decreased efficiency."}
`
thank you very much!
The text was updated successfully, but these errors were encountered: