fix qwen convert error (#288)

LJ-underdog · pre-commit-ci[bot] · web-flow · commit d4e528975547 · 2024-06-11T14:41:55.000+08:00
* fix qwen convert error Signed-off-by: intellinjun <jun.lin@intel.com> * fix chatglm2 test issue Signed-off-by: intellinjun <jun.lin@intel.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: intellinjun <jun.lin@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/neural_speed/convert/convert_quantized_qwen.py b/neural_speed/convert/convert_quantized_qwen.py
@@ -66,7 +66,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
     f.write(struct.pack("i", hparams["intermediate_size"]))  # dummy data
     f.write(struct.pack("i", hparams["num_attention_heads"]))
     f.write(struct.pack("i", hparams["num_key_value_heads"] if "num_key_value_heads" in hparams
-                        else ["num_attention_heads"]))  # multi-query attention
+                        else hparams["num_attention_heads"]))  # multi-query attention
     f.write(struct.pack("i", hparams["num_hidden_layers"]))
     f.write(
         struct.pack(
diff --git a/neural_speed/convert/convert_qwen.py b/neural_speed/convert/convert_qwen.py
@@ -104,7 +104,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
     fout.write(struct.pack("i", hparams["intermediate_size"]))  # dummy data
     fout.write(struct.pack("i", hparams["num_attention_heads"]))
     fout.write(struct.pack("i", hparams["num_key_value_heads"] if "num_key_value_heads" in hparams
-                           else ["num_attention_heads"]))  # multi-query attention
+                           else hparams["num_attention_heads"]))  # multi-query attention
     fout.write(struct.pack("i", hparams["num_hidden_layers"]))
     fout.write(
         struct.pack(
diff --git a/scripts/huggingface.py b/scripts/huggingface.py
@@ -592,7 +592,12 @@ def _create_model(
                 from neural_speed import Model
                 self._model = Model()
                 if init_from_bin != "default_none":
-                    self._model.init_from_bin(model_type= self.config.model_type,model_path=init_from_bin, max_request_num=batch_size)
+                    if self.config.model_type == "chatglm" and "chatglm2" in self.config._name_or_path:
+                        model_type = "chatglm2"
+                    else:
+                        model_type = self.config.model_type
+
+                    self._model.init_from_bin(model_type=model_type,model_path=init_from_bin, max_request_num=batch_size)
                 else:
                     self._model.init(pretrained, weight_dtype=weight_dtype, compute_dtype=compute_dtype,
                                         alg=alg,