Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

how to train a model by dpo? #1497

Open
ex-yanminmin001 opened this issue Jan 3, 2025 · 0 comments
Open

how to train a model by dpo? #1497

ex-yanminmin001 opened this issue Jan 3, 2025 · 0 comments

Comments

@ex-yanminmin001
Copy link

训练的脚本:
`from unsloth import FastLanguageModel, PatchDPOTrainer,FastQwen2Model
from unsloth import is_bfloat16_supported
PatchDPOTrainer()
import torch
from transformers import TrainingArguments
from trl import DPOTrainer,DPOConfig
from datasets import load_dataset

train_dataset = load_dataset("json", data_files='/workspace/unsloth/dataset.jsonl')
model_name_or_path = "/LLMs/Qwen2-1.5B-Instruct/"

model, tokenizer = FastLanguageModel.from_pretrained(
model_name = model_name_or_path,
max_seq_length = 1024,
dtype = torch.bfloat16,
load_in_4bit = False,
)
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

Do model patching and add fast LoRA weights

model = FastLanguageModel.get_peft_model(
model,
r = 8,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 64,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
random_state = 3407,
max_seq_length = 1024,
)

dpo_trainer = DPOTrainer(
model = model,
ref_model = None,
args = DPOConfig(
per_device_train_batch_size = 1,
gradient_accumulation_steps = 4,
warmup_ratio = 0.1,
num_train_epochs = 3,
fp16 = not is_bfloat16_supported(),
bf16 = is_bfloat16_supported(),
logging_steps = 1,
optim = "adamw_8bit",
seed = 42,
output_dir = "outputs",
),
beta = 0.1,
train_dataset = train_dataset['train'],
eval_dataset = None,
tokenizer = tokenizer,
max_length = 1024,
max_prompt_length = 512,
)
dpo_trainer.train()
运行后报错:{'loss': 0.6931, 'grad_norm': 17.303083419799805, 'learning_rate': 2.5e-07, 'rewards/chosen': 0.0, 'rewards/rejected': 0.0, 'rewards/accuracies': 0.0, 'rewards/margins': 0.0, 'logps/chosen': -140.07949829101562, 'logps/rejected': -86.94802856445312, 'logits/chosen': -2.1795122623443604, 'logits/rejected': -1.971570611000061, 'epoch': 0.08}
3%|██▏ | 1/36 [00:11<06:41, 11.47s/it]Traceback (most recent call last):
File "/workspace/unsloth/dpo_demo_2.py", line 63, in
dpo_trainer.train()
File "", line 157, in train
File "", line 374, in _fast_inner_training_loop
File "", line 31, in _unsloth_training_step
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1273, in compute_loss
loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1225, in get_batch_loss_metrics
model_output = self.concatenated_forward(model, batch)
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1152, in concatenated_forward
outputs = model(input_ids=input_ids, attention_mask=attention_mask, **model_kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py", line 820, in forward
return model_forward(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py", line 808, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/usr/local/lib/python3.10/dist-packages/torch/amp/autocast_mode.py", line 16, in decorate_autocast
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/eval_frame.py", line 451, in _fn
return fn(*args, **kwargs)
File "/workspace/unsloth/unsloth/models/llama.py", line 1119, in PeftModelForCausalLM_fast_forward
return self.base_model(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/peft/tuners/tuners_utils.py", line 188, in forward
return self.model.forward(*args, **kwargs)
File "/workspace/unsloth/unsloth/models/llama.py", line 981, in _CausalLM_fast_forward
outputs = self.model(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/workspace/unsloth/unsloth/models/llama.py", line 619, in LlamaModel_fast_forward
inputs_embeds = self.embed_tokens(input_ids)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1582, in _call_impl
result = forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/sparse.py", line 163, in forward
return F.embedding(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py", line 2264, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)
3%|▎ | 1/36 [00:14<08:20, 14.29s/it]
请问是什么原因? 如果按照报错的内容修改unsloth/models/llama.py文件,修改为inputs_embeds = self.embed_tokens(input_ids.long()) 继续报错:{'loss': 0.6931, 'grad_norm': 17.303083419799805, 'learning_rate': 2.5e-07, 'rewards/chosen': 0.0, 'rewards/rejected': 0.0, 'rewards/accuracies': 0.0, 'rewards/margins': 0.0, 'logps/chosen': -140.07949829101562, 'logps/rejected': -86.94802856445312, 'logits/chosen': -2.1795122623443604, 'logits/rejected': -1.971570611000061, 'epoch': 0.08}
3%|██▏ | 1/36 [00:10<05:54, 10.12s/it]Traceback (most recent call last):
File "/workspace/unsloth/dpo_demo_2.py", line 63, in
dpo_trainer.train()
File "", line 157, in train
File "", line 374, in _fast_inner_training_loop
File "", line 31, in _unsloth_training_step
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1273, in compute_loss
loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train")
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1225, in get_batch_loss_metrics
model_output = self.concatenated_forward(model, batch)
File "/usr/local/lib/python3.10/dist-packages/trl-0.14.0.dev0-py3.10.egg/trl/trainer/dpo_trainer.py", line 1176, in concatenated_forward
per_token_logps = torch.gather(logits.log_softmax(-1), dim=2, index=labels.unsqueeze(2)).squeeze(2)
RuntimeError: gather(): Expected dtype int64 for index
3%|▎ | 1/36 [00:12<07:20, 12.59s/it]
so,how to train a model by dpo? dataset:{"system": "You are an AI assistant tailored to address queries about Unsloth.", "question": "What are the supported architectures for LLM fine-tuning by Unsloth?", "chosen": "Unsloth supports the Llama and Mistral architectures for LLM fine-tuning, ensuring compatibility with a range of model structures.", "rejected": "Unsloth does not support any specific architectures for LLM fine-tuning."}
{"system": "You are an AI assistant tailored to address queries about Unsloth.", "question": "How does Unsloth contribute to making LLM fine-tuning 2x faster?", "chosen": "Unsloth accelerates LLM fine-tuning by overwriting certain parts of the modeling code with optimized operations and manually deriving backpropagation steps, resulting in a 2x speedup without sacrificing accuracy.", "rejected": "Unsloth slows down LLM fine-tuning by introducing unnecessary complexities and approximations, leading to decreased efficiency."}
`
thank you very much!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant