quic · tchawada · Jan 6, 2026 · Jan 9, 2026 · Jan 9, 2026 · Jan 12, 2026
@@ -140,6 +140,8 @@ def _create_callbacks(self) -> List[Any]:
 
         # callback_config.callbacks is a dictionary of callback configurations
         for callback_name, callback_kwargs in callback_config["callbacks"].items():
+            if callback_kwargs is None:
+                callback_kwargs = {}
             try:
                 callback_instance = ComponentFactory.create_callback(callback_name, **callback_kwargs)
                 callbacks.append(callback_instance)

@@ -30,11 +30,11 @@ training:
   type: "sft"
   gradient_accumulation_steps: 1
   num_train_epochs: 1
-  torch_compile: True
+  torch_compile: False
 
 # Optimizer configuration
 optimizers:
-  optimizer_name: "adamw"
+  optimizer_name: "AdamW"
   lr: 5e-5
 
 scheduler:

@@ -14,7 +14,7 @@
 import sys
 from dataclasses import asdict, dataclass, field, fields, is_dataclass
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Mapping, Optional, Union
 
 import yaml
 from transformers.hf_argparser import HfArgumentParser
@@ -24,12 +24,35 @@
 logger = Logger(__name__)
 
 
+def is_NSP_free():
+    import re
+    import subprocess
+
+    import torch
+
+    device_count = torch.qaic.device_count()  # Get the number of available devices
+
+    for device_idx in range(device_count):
+        qid_idx = torch.qaic.get_device_info(device_idx).qid_index
+        command = ["/opt/qti-aic/tools/qaic-util", "-q", "-d", f"{device_idx}"]
+        result = subprocess.run(command, capture_output=True, text=True)
+        text = result.stdout
+        match = re.search(r"Nsp Free:\s*(\d+)", text)
+        if match:
+            nsp_free = int(match.group(1))
+        # Check if NSP free is 16 (indicating no other processes are using it)
+        if nsp_free != 16:
+            raise RuntimeError(f"QAIC device {qid_idx} does not have 16 NSP free")
+        else:
+            logger.info(f"QAIC device {qid_idx} has {nsp_free} NSP free")
+
+
 @dataclass
 class OptimizerConfig:
     """Configuration for optimizers."""
 
     optimizer_name: str = field(
-        default="adamw",
+        default="AdamW",
         metadata={"help": "The name of the optimizer to use."},
     )
     lr: float = field(
@@ -125,11 +148,11 @@ class DatasetConfig:
         metadata={"help": "Template for formatting prompts (e.g., 'User: {input} Assistant: ')."},
     )
     prompt_func: str = field(
-        default=None,
+        default="QEfficient.finetune.experimental.preprocessing.alpaca_func:create_alpaca_prompt",
         metadata={"help": "Function for formatting prompts (e.g., 'User: {input} Assistant: ')."},
     )
     completion_template: str = field(
-        default=None,
+        default="{output}",
         metadata={"help": "Template for formatting output completions (e.g., '{output}')."},
     )
     completion_func: str = field(
@@ -581,6 +604,39 @@ def load_config(self, config_path: Union[str, Path]) -> None:
             raise ValueError(f"Unsupported configuration file format: {config_path.suffix}")
         self.update_config(config_dict)
 
+    def _merge_dataclass_inplace(self, dc_obj: Any, updates: Dict[str, Any], parent_path: str = "") -> None:
+        """
+        Recursively merge 'updates' (dict) into the dataclass instance 'dc_obj',
+        preserving defaults by updating nested dataclasses/dicts in place.
+        """
+        if not is_dataclass(dc_obj):
+            raise TypeError("dc_obj must be a dataclass instance")
+        field_names = {f.name for f in fields(dc_obj)}
+        for key, value in updates.items():
+            path = f"{parent_path}.{key}" if parent_path else key
+
+            if key not in field_names:
+                self._stash_top_level_extra(parent_path or "__root__", key, value)
+                continue
+
+            current = getattr(dc_obj, key)
+
+            # Case A: current is dataclass, incoming is dict -> deep merge
+            if is_dataclass(current) and isinstance(value, Mapping):
+                self._merge_dataclass_inplace(current, value, path)
+
+            # Case B: both dicts -> shallow update
+            elif isinstance(current, dict) and isinstance(value, Mapping):
+                current.update(value)
+
+            # Case C: both lists -> by default replace; switch to extend if desired
+            elif isinstance(current, list) and isinstance(value, list):
+                setattr(dc_obj, key, value)
+
+            # Case D: simple assignment
+            else:
+                setattr(dc_obj, key, value)
+
     def _ensure_extra_params(self, obj) -> Dict[str, Any]:
         """Ensure obj.extra_params exists and is a dict; return it."""
         ep = getattr(obj, "extra_params", None)
@@ -615,21 +671,7 @@ def update_config(self, config_dict: Dict[str, Any]) -> None:
                     else:
                         self._stash_top_level_extra(key, "__all__", value)
                     continue
-
-                if isinstance(value, dict) and is_dataclass(target):
-                    known = {f.name for f in fields(target)}
-                    for nested_key, nested_value in value.items():
-                        if nested_key in known:
-                            setattr(target, nested_key, nested_value)
-                        else:
-                            self._stash_top_level_extra(key, nested_key, nested_value)
-                    continue
-
-                if isinstance(value, dict) and isinstance(target, dict):
-                    target.update(value)
-                    continue
-                setattr(self.config, key, value)
-
+                self._merge_dataclass_inplace(target, value, parent_path=key)
             else:
                 ep = self._ensure_extra_params(self.config)
                 ep[key] = value
@@ -673,6 +715,18 @@ def validate_config(self) -> None:
         training_device = model.get("device", "qaic")
         if training_device not in valid_devices:
             self._push(errors, training_device not in valid_devices, f"training.device must be one of {valid_devices}.")
+        if training_device == "qaic":
+            try:
+                import torch_qaic  # noqa: F401
+
+                logger.log_rank_zero("torch_qaic package found. Using QAIC devices.")
+                is_NSP_free()
+
+            except ImportError as e:
+                logger.log_rank_zero(
+                    f"Unable to import 'torch_qaic' package due to exception: {e}. Moving ahead without the torch_qaic extension.",
+                    level=0,
+                )
         # PEFT validation
         if model.get("use_peft"):
             pc = model.get("peft_config", {})

@@ -96,13 +96,9 @@ def __init__(
         if self.json_file_path not in (None, ""):
             if not os.path.isfile(self.json_file_path):
                 raise FileNotFoundError(f"JSON file not found or invalid: '{self.json_file_path}'")
-        if (self.prompt_template is None and self.prompt_func_path is None) or (
-            self.prompt_template is not None and self.prompt_func_path is not None
-        ):
+        if self.prompt_template is None and self.prompt_func_path is None:
             raise RuntimeError("Either provide prompt_template or prompt_func in the config.")
-        if (self.completion_template is None and self.completion_func_path is None) or (
-            self.completion_template is not None and self.completion_func_path is not None
-        ):
+        if self.completion_template is None and self.completion_func_path is None:
             raise RuntimeError("Either provide completion_template or completion_func in the config.")
 
         # Call parent class __init__ which will call _initialize_dataset
@@ -134,11 +130,13 @@ def _initialize_dataset(self):
             if db.info.splits is not None:
                 available_splits = list(db.info.splits.keys())
 
-            if self.split not in available_splits:
+            if self.split not in available_splits and self.split == "train":
                 raise ValueError(f"Split {self.split} is not available for dataset {self.dataset_name}.")
-
+            load_split = self.split
+            if self.split not in available_splits:
+                load_split = "train"
             # FIXME: Add streaming support for larger datasets.
-            self.dataset = load_dataset(self.dataset_name, split=self.split, **load_kwargs)
+            self.dataset = load_dataset(self.dataset_name, split=load_split, **load_kwargs)
 
             if len(available_splits) == 1:
                 self.dataset = apply_train_test_split(self.dataset, self.split_ratio, self.split, self.seed)

@@ -0,0 +1,104 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------
+
+"""
+Constants used across test files in the experimental finetuning pipeline.
+"""
+
+from enum import Enum
+
+# ============================================================================
+# Enums
+# ============================================================================
+
+
+class TaskType(str, Enum):
+    """Task types for model training."""
+
+    CAUSAL_LM = "CAUSAL_LM"
+    SEQ_CLS = "SEQ_CLS"
+    SEQ_2_SEQ_LM = "SEQ_2_SEQ_LM"
+
+
+class DatasetType(str, Enum):
+    """Dataset types for training."""
+
+    SFT_DATASET = "sft_dataset"
+    SEQ_COMPLETION = "seq_completion"
+    SEQ_CLASSIFICATION = "seq_classification"
+
+
+class AutoClassName(str, Enum):
+    """Auto class names for model loading."""
+
+    CAUSAL_LM = "AutoModelForCausalLM"
+    SEQ_CLS = "AutoModelForSequenceClassification"
+    SEQ_2_SEQ_LM = "AutoModelForSeq2SeqLM"
+
+
+# ============================================================================
+# Test Seeds and Ratios
+# ============================================================================
+
+TEST_SEED = 42
+TEST_SPLIT_RATIO = 0.8
+
+# ============================================================================
+# PEFT/LoRA Configuration
+# ============================================================================
+
+TEST_LORA_R = 8
+TEST_LORA_ALPHA = 16
+TEST_LORA_DROPOUT = 0.1
+TEST_LORA_TARGET_MODULES_LLAMA = ["q_proj", "v_proj"]
+TEST_LORA_TARGET_MODULES_BERT = ["query", "value"]
+TEST_LORA_BIAS = "none"
+
+# ============================================================================
+# Training Parameters
+# ============================================================================
+
+TEST_LEARNING_RATE = 5e-5
+TEST_WEIGHT_DECAY = 0.01
+TEST_WARMUP_STEPS = 5
+TEST_NUM_TRAIN_EPOCHS = 1
+TEST_MAX_STEPS = 5
+TEST_LOGGING_STEPS = 1
+TEST_PER_DEVICE_BATCH_SIZE = 1
+TEST_MAX_SEQ_LENGTH_CAUSAL = 256
+TEST_MAX_SEQ_LENGTH_SEQ_CLS = 128
+TEST_MAX_LENGTH = 128
+TEST_NUM_HIDDEN_LAYERS = 2
+
+# ============================================================================
+# Dataset Paths and Names
+# ============================================================================
+
+# HuggingFace Dataset Names
+HF_DATASET_ALPACA = "tatsu-lab/alpaca"
+HF_DATASET_GSM8K = "openai/gsm8k"
+HF_DATASET_GSM8K_CONFIG = "main"
+HF_DATASET_IMDB = "stanfordnlp/imdb"
+
+# Dataset subset size for testing
+TEST_DATASET_SUBSET_SIZE = 10
+
+# ============================================================================
+# Model Names
+# ============================================================================
+
+TEST_MODEL_LLAMA = "meta-llama/Llama-3.2-1B"
+TEST_MODEL_SMOLLM = "HuggingFaceTB/SmolLM-135M"
+
+# ============================================================================
+# Optimizer Parameters
+# ============================================================================
+
+OPT_LEARNING_RATE = 1e-4
+OPT_ADAM_BETAS = (0.9, 0.999)
+OPT_ADAM_EPS = 1e-8
+OPT_SGD_MOMENTUM = 0.9
@@ -0,0 +1,24 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------
+def prompt_no_input(row):
+    return (
+        "Below is an instruction that describes a task. "
+        "Write a response that appropriately completes the request.\n\n"
+        "### Instruction:\n{instruction}\n\n### Response:\n"
+    ).format_map(row)
+
+
+def prompt_input(row):
+    return (
+        "Below is an instruction that describes a task, paired with an input that provides further context. "
+        "Write a response that appropriately completes the request.\n\n"
+        "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
+    ).format_map(row)
+
+
+def create_alpaca_prompt(row):
+    return prompt_no_input(row) if row["input"] == "" else prompt_input(row)