fix baichuan, chatglm1&2&3 acc issue (#285)

zhentaoyu · web-flow · commit ef42ce1d4405 · 2024-06-07T09:49:00.000+08:00
Signed-off-by: Yu Zhentao &lt;zhentao.yu@intel.com&gt;
diff --git a/neural_speed/models/baichuan/baichuan.cpp b/neural_speed/models/baichuan/baichuan.cpp
@@ -317,7 +317,7 @@ static bool baichuan_model_eval_internal(model_context* ctx, const model_input*
   }
 
   lctx.use_buf(ctx0, -1);
-  if (embd->ne[0] > 1) {
+  if (!lctx.logits_all && embd->ne[0] > 1) {
     inpL = ne_view_1d(ctx0, inpL, n_embd, (embd->ne[0] - 1) * n_embd * ne_element_size(inpL));
   }
 
diff --git a/neural_speed/models/chatglm/chatglm2.cpp b/neural_speed/models/chatglm/chatglm2.cpp
@@ -337,7 +337,7 @@ static bool chatglm_model_eval_internal(model_context* ctx, const model_input* i
   }
 
   lctx.use_buf(ctx0, -1);
-  if (embd->ne[0] > 1) {
+  if (!lctx.logits_all && embd->ne[0] > 1) {
     inpL = ne_view_1d(ctx0, inpL, n_embd, (embd->ne[0] - 1) * n_embd * ne_element_size(inpL));
   }
   // lm_head
diff --git a/scripts/huggingface.py b/scripts/huggingface.py
@@ -395,6 +395,13 @@ def __init__(
             self._rank = 0
             self._world_size = 1
 
+        self.model_type = self._config.model_type
+        if self.model_type == "chatglm" and "chatglm2" in self._config._name_or_path:
+            self.model_type = "chatglm2"
+        if self.model_type == "chatglm" and "chatglm3" in self._config._name_or_path:
+            # due to the same model architecture.
+            self.model_type = "chatglm2"
+
     @property
     def config(self):
         # return the associated transformers.AutoConfig for the given pretrained model.
@@ -949,6 +956,11 @@ def tok_encode(
         if left_truncate_len:
             encoding = encoding[-left_truncate_len:]  # pylint: disable=E1130
 
+        if self.model_type == "chatglm":
+            # hacky code for chatGLM
+            # remove gmask_token_id and bos_token_id for slicing input and label
+            encoding = encoding[:-2]
+
         return encoding
 
     def tok_batch_encode(
@@ -1296,6 +1308,11 @@ def _lookup_one_token_cont(req: Tuple[Tuple[str, str], List[int], List[int]]):
                         dtype=torch.long,
                         device=self.device,
                     )
+                    # hacky code for chatGLM1 inputs
+                    # it will add [130001, 130004] tokens in the end (gmask_token_id + bos_token_id)
+                    if self.model_type == "chatglm":
+                        bos = torch.tensor([self.tokenizer.gmask_token_id, self.tokenizer.bos_token_id])
+                        inp = torch.cat((inp, bos), -1)
                     (inplen,) = inp.shape
                 elif self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM:
                     inp = torch.tensor(

Original file line number	Diff line number	Diff line change
`@@ -317,7 +317,7 @@ static bool baichuan_model_eval_internal(model_context* ctx, const model_input*`
`317`	`317`	`}`
`318`	`318`
`319`	`319`	`lctx.use_buf(ctx0, -1);`
`320`		`- if (embd->ne[0] > 1) {`
	`320`	`+ if (!lctx.logits_all && embd->ne[0] > 1) {`
`321`	`321`	`inpL = ne_view_1d(ctx0, inpL, n_embd, (embd->ne[0] - 1) * n_embd * ne_element_size(inpL));`
`322`	`322`	`}`
`323`	`323`
Original file line number	Diff line number	Diff line change
`@@ -337,7 +337,7 @@ static bool chatglm_model_eval_internal(model_context* ctx, const model_input* i`
`337`	`337`	`}`
`338`	`338`
`339`	`339`	`lctx.use_buf(ctx0, -1);`
`340`		`- if (embd->ne[0] > 1) {`
	`340`	`+ if (!lctx.logits_all && embd->ne[0] > 1) {`
`341`	`341`	`inpL = ne_view_1d(ctx0, inpL, n_embd, (embd->ne[0] - 1) * n_embd * ne_element_size(inpL));`
`342`	`342`	`}`
`343`	`343`	`// lm_head`