Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
f81ef6e
General disagg fix for prefill-only model (#698)
ochougul Jan 6, 2026
c57392d
Adding Vae Decoder in Wan (#688)
mohiso22 Jan 9, 2026
75367b1
Evaluating the values of CCL lists for different scenarios (#710)
vjanfaza Jan 9, 2026
1e63710
Updating 2-layer instruction for Wan (#715)
tv-karthikeya Jan 12, 2026
1ef9935
Updated finetune docs for MULTI NODE Training (#717)
quic-akuruvil Jan 13, 2026
c76d5ea
Adding support for multi-node DDP training (#708)
smedhe Jan 13, 2026
7a39933
Updating MDP partition config: prioritizing dump over load (#720)
asmigosw Jan 13, 2026
08bce2c
Updated docs (#722)
quic-akuruvil Jan 13, 2026
8b00c1b
HOTFIX: changes in alpaca and grammar dataset utils (#724)
smedhe Jan 13, 2026
b074af0
Fixing the default value of CCL in infer.py (#725)
vjanfaza Jan 15, 2026
5fdde19
Adding support for multi-node PP+DDP (#726)
smedhe Jan 16, 2026
1f2ac51
Added default NPI file (#657)
quic-akuruvil Jan 19, 2026
dcbb7be
Release 1.21 docs (#718)
tv-karthikeya Jan 19, 2026
1ec3975
HOTFIX : Added support for repeat kv heads aligned Bias scaling for A…
quic-dhirajku Jan 20, 2026
e61a1a3
Removed OpenGVLab/InternVL2_5-1B and OpenGVLab/InternVL3_5-1B (#736)
quic-rishinr Jan 20, 2026
47a0fec
Qeff versioning (#741)
quic-rishinr Jan 20, 2026
3a8e5e9
Revert "Qeff versioning" (#746)
quic-rishinr Jan 21, 2026
0ffa4ea
Fix for Qwen 2.5 VL with subfunction (#733)
abhishek-singh591 Jan 21, 2026
32f30c0
Fixed torch patch for subfunction with VLMs (#750)
abhishek-singh591 Jan 22, 2026
eb74758
Added support of subfunction for VLMs (#699)
abhishek-singh591 Jan 23, 2026
742b7bd
Updated reduce sum calculation to use einsum for gpt_oss (#754)
asmigosw Jan 27, 2026
5a129c7
Updating pytest config for InternVL (#758)
tv-karthikeya Jan 28, 2026
b777e8b
Wan support to skip compilation (#734)
tv-karthikeya Jan 28, 2026
75bf976
Fixing SW issue in Gemma3 (#740)
qcdipankar Jan 28, 2026
3751f7e
Fix documentation of Multinode FT (#764)
quic-akuruvil Jan 29, 2026
27ebe8e
Adding support for gemma3 in continous batching script for CI (#763)
qcdipankar Jan 30, 2026
536e3fc
Subfunction Fix (#766)
abhishek-singh591 Feb 1, 2026
f64f703
Mainline version update (#752)
quic-rishinr Feb 2, 2026
1a3e09c
Updated compile from qaic-exec to qaic-compile (#703)
asmigosw Feb 3, 2026
e8e5c43
Fix for Diffusers subfunction (#759)
tv-karthikeya Feb 9, 2026
fc42332
Added One hot fix for MOE model with subfunction (#777)
abhishek-singh591 Feb 12, 2026
544327a
Adding support of QEFFAutoModelForSequenceClassification (#729)
quic-amitraj Feb 13, 2026
facae5f
CI test optimization (#751)
quic-rishinr Feb 13, 2026
cd25784
Merge remote-tracking branch 'upstream/ft_experimental' into final_hf
tchawada Feb 17, 2026
3f6315c
Adding qaic validation in config manager, default value to prompt_func
tchawada Feb 17, 2026
9015bf6
Adding qaic validation in config manager, default value to prompt_func
tchawada Feb 17, 2026
fb28705
Adding a function to check whether NSP for given QAIC is free or not
tchawada Feb 18, 2026
8cbe49e
Adding integrated test for HF_trainer stack
tchawada Feb 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions QEfficient/cloud/finetune_experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ def _create_callbacks(self) -> List[Any]:

# callback_config.callbacks is a dictionary of callback configurations
for callback_name, callback_kwargs in callback_config["callbacks"].items():
if callback_kwargs is None:
callback_kwargs = {}
try:
callback_instance = ComponentFactory.create_callback(callback_name, **callback_kwargs)
callbacks.append(callback_instance)
Expand Down
4 changes: 2 additions & 2 deletions QEfficient/finetune/experimental/configs/sample_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ training:
type: "sft"
gradient_accumulation_steps: 1
num_train_epochs: 1
torch_compile: True
torch_compile: False

# Optimizer configuration
optimizers:
optimizer_name: "adamw"
optimizer_name: "AdamW"
lr: 5e-5

scheduler:
Expand Down
92 changes: 73 additions & 19 deletions QEfficient/finetune/experimental/core/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import sys
from dataclasses import asdict, dataclass, field, fields, is_dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Mapping, Optional, Union

import yaml
from transformers.hf_argparser import HfArgumentParser
Expand All @@ -24,12 +24,35 @@
logger = Logger(__name__)


def is_NSP_free():
import re
import subprocess

import torch

device_count = torch.qaic.device_count() # Get the number of available devices

for device_idx in range(device_count):
qid_idx = torch.qaic.get_device_info(device_idx).qid_index
command = ["/opt/qti-aic/tools/qaic-util", "-q", "-d", f"{device_idx}"]
result = subprocess.run(command, capture_output=True, text=True)
text = result.stdout
match = re.search(r"Nsp Free:\s*(\d+)", text)
if match:
nsp_free = int(match.group(1))
# Check if NSP free is 16 (indicating no other processes are using it)
if nsp_free != 16:
raise RuntimeError(f"QAIC device {qid_idx} does not have 16 NSP free")
else:
logger.info(f"QAIC device {qid_idx} has {nsp_free} NSP free")


@dataclass
class OptimizerConfig:
"""Configuration for optimizers."""

optimizer_name: str = field(
default="adamw",
default="AdamW",
metadata={"help": "The name of the optimizer to use."},
)
lr: float = field(
Expand Down Expand Up @@ -125,11 +148,11 @@ class DatasetConfig:
metadata={"help": "Template for formatting prompts (e.g., 'User: {input} Assistant: ')."},
)
prompt_func: str = field(
default=None,
default="QEfficient.finetune.experimental.preprocessing.alpaca_func:create_alpaca_prompt",
metadata={"help": "Function for formatting prompts (e.g., 'User: {input} Assistant: ')."},
)
completion_template: str = field(
default=None,
default="{output}",
metadata={"help": "Template for formatting output completions (e.g., '{output}')."},
)
completion_func: str = field(
Expand Down Expand Up @@ -581,6 +604,39 @@ def load_config(self, config_path: Union[str, Path]) -> None:
raise ValueError(f"Unsupported configuration file format: {config_path.suffix}")
self.update_config(config_dict)

def _merge_dataclass_inplace(self, dc_obj: Any, updates: Dict[str, Any], parent_path: str = "") -> None:
"""
Recursively merge 'updates' (dict) into the dataclass instance 'dc_obj',
preserving defaults by updating nested dataclasses/dicts in place.
"""
if not is_dataclass(dc_obj):
raise TypeError("dc_obj must be a dataclass instance")
field_names = {f.name for f in fields(dc_obj)}
for key, value in updates.items():
path = f"{parent_path}.{key}" if parent_path else key

if key not in field_names:
self._stash_top_level_extra(parent_path or "__root__", key, value)
continue

current = getattr(dc_obj, key)

# Case A: current is dataclass, incoming is dict -> deep merge
if is_dataclass(current) and isinstance(value, Mapping):
self._merge_dataclass_inplace(current, value, path)

# Case B: both dicts -> shallow update
elif isinstance(current, dict) and isinstance(value, Mapping):
current.update(value)

# Case C: both lists -> by default replace; switch to extend if desired
elif isinstance(current, list) and isinstance(value, list):
setattr(dc_obj, key, value)

# Case D: simple assignment
else:
setattr(dc_obj, key, value)

def _ensure_extra_params(self, obj) -> Dict[str, Any]:
"""Ensure obj.extra_params exists and is a dict; return it."""
ep = getattr(obj, "extra_params", None)
Expand Down Expand Up @@ -615,21 +671,7 @@ def update_config(self, config_dict: Dict[str, Any]) -> None:
else:
self._stash_top_level_extra(key, "__all__", value)
continue

if isinstance(value, dict) and is_dataclass(target):
known = {f.name for f in fields(target)}
for nested_key, nested_value in value.items():
if nested_key in known:
setattr(target, nested_key, nested_value)
else:
self._stash_top_level_extra(key, nested_key, nested_value)
continue

if isinstance(value, dict) and isinstance(target, dict):
target.update(value)
continue
setattr(self.config, key, value)

self._merge_dataclass_inplace(target, value, parent_path=key)
else:
ep = self._ensure_extra_params(self.config)
ep[key] = value
Expand Down Expand Up @@ -673,6 +715,18 @@ def validate_config(self) -> None:
training_device = model.get("device", "qaic")
if training_device not in valid_devices:
self._push(errors, training_device not in valid_devices, f"training.device must be one of {valid_devices}.")
if training_device == "qaic":
try:
import torch_qaic # noqa: F401

logger.log_rank_zero("torch_qaic package found. Using QAIC devices.")
is_NSP_free()

except ImportError as e:
logger.log_rank_zero(
f"Unable to import 'torch_qaic' package due to exception: {e}. Moving ahead without the torch_qaic extension.",
level=0,
)
# PEFT validation
if model.get("use_peft"):
pc = model.get("peft_config", {})
Expand Down
16 changes: 7 additions & 9 deletions QEfficient/finetune/experimental/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,9 @@ def __init__(
if self.json_file_path not in (None, ""):
if not os.path.isfile(self.json_file_path):
raise FileNotFoundError(f"JSON file not found or invalid: '{self.json_file_path}'")
if (self.prompt_template is None and self.prompt_func_path is None) or (
self.prompt_template is not None and self.prompt_func_path is not None
):
if self.prompt_template is None and self.prompt_func_path is None:
raise RuntimeError("Either provide prompt_template or prompt_func in the config.")
if (self.completion_template is None and self.completion_func_path is None) or (
self.completion_template is not None and self.completion_func_path is not None
):
if self.completion_template is None and self.completion_func_path is None:
raise RuntimeError("Either provide completion_template or completion_func in the config.")

# Call parent class __init__ which will call _initialize_dataset
Expand Down Expand Up @@ -134,11 +130,13 @@ def _initialize_dataset(self):
if db.info.splits is not None:
available_splits = list(db.info.splits.keys())

if self.split not in available_splits:
if self.split not in available_splits and self.split == "train":
raise ValueError(f"Split {self.split} is not available for dataset {self.dataset_name}.")

load_split = self.split
if self.split not in available_splits:
load_split = "train"
# FIXME: Add streaming support for larger datasets.
self.dataset = load_dataset(self.dataset_name, split=self.split, **load_kwargs)
self.dataset = load_dataset(self.dataset_name, split=load_split, **load_kwargs)

if len(available_splits) == 1:
self.dataset = apply_train_test_split(self.dataset, self.split_ratio, self.split, self.seed)
Expand Down
104 changes: 104 additions & 0 deletions QEfficient/finetune/experimental/core/utils/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# -----------------------------------------------------------------------------
#
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
# SPDX-License-Identifier: BSD-3-Clause
#
# -----------------------------------------------------------------------------

"""
Constants used across test files in the experimental finetuning pipeline.
"""

from enum import Enum

# ============================================================================
# Enums
# ============================================================================


class TaskType(str, Enum):
"""Task types for model training."""

CAUSAL_LM = "CAUSAL_LM"
SEQ_CLS = "SEQ_CLS"
SEQ_2_SEQ_LM = "SEQ_2_SEQ_LM"


class DatasetType(str, Enum):
"""Dataset types for training."""

SFT_DATASET = "sft_dataset"
SEQ_COMPLETION = "seq_completion"
SEQ_CLASSIFICATION = "seq_classification"


class AutoClassName(str, Enum):
"""Auto class names for model loading."""

CAUSAL_LM = "AutoModelForCausalLM"
SEQ_CLS = "AutoModelForSequenceClassification"
SEQ_2_SEQ_LM = "AutoModelForSeq2SeqLM"


# ============================================================================
# Test Seeds and Ratios
# ============================================================================

TEST_SEED = 42
TEST_SPLIT_RATIO = 0.8

# ============================================================================
# PEFT/LoRA Configuration
# ============================================================================

TEST_LORA_R = 8
TEST_LORA_ALPHA = 16
TEST_LORA_DROPOUT = 0.1
TEST_LORA_TARGET_MODULES_LLAMA = ["q_proj", "v_proj"]
TEST_LORA_TARGET_MODULES_BERT = ["query", "value"]
TEST_LORA_BIAS = "none"

# ============================================================================
# Training Parameters
# ============================================================================

TEST_LEARNING_RATE = 5e-5
TEST_WEIGHT_DECAY = 0.01
TEST_WARMUP_STEPS = 5
TEST_NUM_TRAIN_EPOCHS = 1
TEST_MAX_STEPS = 5
TEST_LOGGING_STEPS = 1
TEST_PER_DEVICE_BATCH_SIZE = 1
TEST_MAX_SEQ_LENGTH_CAUSAL = 256
TEST_MAX_SEQ_LENGTH_SEQ_CLS = 128
TEST_MAX_LENGTH = 128
TEST_NUM_HIDDEN_LAYERS = 2

# ============================================================================
# Dataset Paths and Names
# ============================================================================

# HuggingFace Dataset Names
HF_DATASET_ALPACA = "tatsu-lab/alpaca"
HF_DATASET_GSM8K = "openai/gsm8k"
HF_DATASET_GSM8K_CONFIG = "main"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it mentioned as main?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are two subsets of gsm8k dataset, to load main , we need this configuration

HF_DATASET_IMDB = "stanfordnlp/imdb"

# Dataset subset size for testing
TEST_DATASET_SUBSET_SIZE = 10

# ============================================================================
# Model Names
# ============================================================================

TEST_MODEL_LLAMA = "meta-llama/Llama-3.2-1B"
TEST_MODEL_SMOLLM = "HuggingFaceTB/SmolLM-135M"

# ============================================================================
# Optimizer Parameters
# ============================================================================

OPT_LEARNING_RATE = 1e-4
OPT_ADAM_BETAS = (0.9, 0.999)
OPT_ADAM_EPS = 1e-8
OPT_SGD_MOMENTUM = 0.9
24 changes: 24 additions & 0 deletions QEfficient/finetune/experimental/preprocessing/alpaca_func.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# -----------------------------------------------------------------------------
#
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
# SPDX-License-Identifier: BSD-3-Clause
#
# -----------------------------------------------------------------------------
def prompt_no_input(row):
return (
"Below is an instruction that describes a task. "
"Write a response that appropriately completes the request.\n\n"
"### Instruction:\n{instruction}\n\n### Response:\n"
).format_map(row)


def prompt_input(row):
return (
"Below is an instruction that describes a task, paired with an input that provides further context. "
"Write a response that appropriately completes the request.\n\n"
"### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
).format_map(row)


def create_alpaca_prompt(row):
return prompt_no_input(row) if row["input"] == "" else prompt_input(row)
Loading