Add Tokens test to test the feed_prompt method

rustformers · Jul 6, 2023 · e9b80be · e9b80be
1 parent a892f97
commit e9b80be
Show file tree

Hide file tree

Showing 8 changed files with 142 additions and 8 deletions.
diff --git a/binaries/llm-test/configs/bloom.json b/binaries/llm-test/configs/bloom.json
@@ -9,6 +9,12 @@
  "output_disabled": "When a llama rides a crab, ,.-\n\n/? '， , ; A；A = (b)，d e orm\n“t” + “p。n unus et les el duetant alle that are by no ... ”\n( ? ) – ‘?\n!!\n«…..’,\nS.\n\n‘l」之 attergoir à dit-on pas .. 。。 ..\n– La leçon se confond quelquefois con ce qui es vée par occident .\n( 2 ) .\nLa protestation del paysan mécontent regardait pendre eussent mœurs faillite forteresse rivières lieues forteressemelés inquiétudes crackdown brawl slaughter massacresokea .\n» » … « …\n. . . \" \" ….",
  "maximum_token_count": 128
  }
+ },
+ {
+ "Tokens": {
+ "input": "Rustformers is",
+ "output": 15
+ }
  }
  ]
-}
+}
diff --git a/binaries/llm-test/configs/gptj.json b/binaries/llm-test/configs/gptj.json
@@ -9,6 +9,12 @@
  "output_disabled": "\"When a llama rides a crab, \nit's not the same as when an elephant does it.\" - John Steinbeck, East of Eden.\n\n \"The best way to predict your future is by looking at history.\"- Robert Kiyosaki (author). Rich Dad Poor dad : what 10 rules for success really mean and how you can apply them in life! The rich dads guidebook on personal finance: How To Become A Millionaire In Less Than 5 years! http://www..richdadpoordaddyguidebooksalexanderkimballblogcom/the_bestwaytopredictyourfutureislookingathistory/. You will learn about money management",
  "maximum_token_count": 128
  }
+ },
+ {
+ "Tokens": {
+ "input": "Rustformers is",
+ "output": 257
+ }
  }
  ]
-}
+}
diff --git a/binaries/llm-test/configs/gptneox.json b/binaries/llm-test/configs/gptneox.json
@@ -9,6 +9,12 @@
  "output_disabled": "<|padding|>When a llama rides a crab, \n“The Greatest Show on Earth” is the title of an 1875 book by Phineas Taylor Barnum, who founded and operated The circus. He was born in Bethel Connecticut to Meshack (Meshake) Bowman Jr., from New York City; his mother’s name has not been recorded but she may have had some Native American ancestry as well.[2] His father died when he[3][4], at age three,[5]: 9–10 (p1), 11-12—was left with relatives until they could find him work or send for them back home where there",
  "maximum_token_count": 128
  }
+ },
+ {
+ "Tokens": {
+ "input": "Rustformers is",
+ "output": 247
+ }
  }
  ]
-}
+}
diff --git a/binaries/llm-test/configs/llama.json b/binaries/llm-test/configs/llama.json
@@ -9,6 +9,12 @@
  "output": "When a llama rides a crab, 10-year olds are the ones who get to eat.\nTheir parents have been told that they will be eating for another year or two before their children can enjoy it again – and then only if there is enough food left over from Christmas dinner!",
  "maximum_token_count": 128
  }
+ },
+ {
+ "Tokens": {
+ "input": "Rustformers is",
+ "output": 260
+ }
  }
  ]
-}
+}
diff --git a/binaries/llm-test/configs/mpt.json b/binaries/llm-test/configs/mpt.json
@@ -9,6 +9,12 @@
  "output": "When a llama rides a crab,  the llama is called the \"crab rider\".\nThe crabs are very popular in South America, especially Brazil. They have been used as transportation for many years and they can carry up to five people at once!",
  "maximum_token_count": 128
  }
+ },
+ {
+ "Tokens": {
+ "input": "Rustformers is",
+ "output": 247
+ }
  }
  ]
-}
+}
diff --git a/binaries/llm-test/src/inference.rs b/binaries/llm-test/src/inference.rs
@@ -1,12 +1,14 @@
-//! Test cases for [crate::TestCase::Inference] tests.
+//! Tests the model's inference APIs.
+//!
+//! See [crate::TestCase::Inference].
 
 use std::{convert::Infallible, sync::Arc};
 
 use llm::InferenceStats;
 
 use crate::{ModelConfig, TestCaseReport, TestCaseReportInner, TestCaseReportMeta};
 
-pub(super) fn can_infer(
+pub(crate) fn can_infer(
  model: &dyn llm::Model,
  model_config: &ModelConfig,
  input: &str,

diff --git a/binaries/llm-test/src/main.rs b/binaries/llm-test/src/main.rs
@@ -2,6 +2,7 @@
 
 mod common;
 mod inference;
+mod tokens;
 
 use anyhow::Context;
 use clap::Parser;
@@ -123,6 +124,10 @@ enum TestCase {
  output: Option<String>,
  maximum_token_count: usize,
  },
+ Tokens {
+ input: String,
+ output: usize,
+ },
 }
 
 #[derive(Serialize)]
@@ -145,13 +150,14 @@ enum TestCaseReportMeta {
 }
 
 #[derive(Serialize)]
-enum TestCaseReportInner {
+pub enum TestCaseReportInner {
  Inference {
  input: String,
  expect_output: Option<String>,
  actual_output: String,
  inference_stats: Option<InferenceStats>,
  },
+ Tokens(tokens::TokensReport),
 }
 
 async fn test_model(
@@ -269,6 +275,9 @@ async fn test_model(
  output.as_deref(),
  *maximum_token_count,
  )?),
+ TestCase::Tokens { input, output } => {
+ test_case_reports.push(tokens::can_feed(&model, input, *output));
+ }
  }
  }
  let first_error: Option<String> =

diff --git a/binaries/llm-test/src/tokens.rs b/binaries/llm-test/src/tokens.rs
@@ -0,0 +1,93 @@
+//! Tests the model's token manipulation APIs:
+//!
+//! * [llm::InferenceSession::feed_prompt()]
+//!
+//! See [crate::TestCase::Tokens].
+
+use std::convert::Infallible;
+
+use llm::{InferenceFeedback, Model, OutputRequest};
+use serde::Serialize;
+
+use crate::{TestCaseReport, TestCaseReportMeta};
+
+/// Tests that the model performs as expected when feeding tokens
+pub(crate) fn can_feed(model: &impl Model, input: &str, expected_output: usize) -> TestCaseReport {
+ let mut report = TokensReport::default();
+
+ let mut session = model.start_session(Default::default());
+ let mut output = OutputRequest {
+ all_logits: Some(vec![]),
+ ..Default::default()
+ };
+
+ let feed_prompt = &mut |prompt: &str| {
+ session.feed_prompt(model, &Default::default(), prompt, &mut output, |x| {
+ always_continue(x)
+ })
+ };
+
+ if let Err(err) = feed_prompt(input) {
+ return report.failure(&err.to_string());
+ };
+
+ let top_token;
+ match output.all_logits {
+ Some(logits) => {
+ let start = logits.len() - model.tokenizer().len();
+ let mut iter = logits[start..].iter().enumerate();
+ let Some((mut max_idx, mut max)) = iter.next() else {
+ return report.failure("Could not find any logits for last token.");
+ };
+ for (idx, score) in iter {
+ if score > max {
+ max = score;
+ max_idx = idx;
+ }
+ }
+ top_token = max_idx;
+ }
+ None => return report.failure("Model did not output any logits."),
+ }
+
+ report.output = top_token;
+
+ if top_token != expected_output {
+ let tokenizer = model.tokenizer();
+ let top_token_str = String::from_utf8_lossy(&tokenizer.token(top_token)).to_string();
+ let expected_str = String::from_utf8_lossy(&tokenizer.token(expected_output)).to_string();
+ return report.failure(&format!(
+ "Expected top token to be {expected_output} ({expected_str}), \
+ but was {top_token} ({top_token_str})"
+ ));
+ }
+
+ report.success()
+}
+
+fn always_continue(_: &[u8]) -> Result<InferenceFeedback, Infallible> {
+ Ok(InferenceFeedback::Continue)
+}
+
+#[derive(Serialize, Default)]
+pub struct TokensReport {
+ output: usize,
+}
+
+impl TokensReport {
+ fn failure(self, msg: &str) -> TestCaseReport {
+ TestCaseReport {
+ meta: TestCaseReportMeta::Error {
+ error: msg.to_owned(),
+ },
+ report: crate::TestCaseReportInner::Tokens(self),
+ }
+ }
+
+ fn success(self) -> TestCaseReport {
+ TestCaseReport {
+ meta: TestCaseReportMeta::Success,
+ report: crate::TestCaseReportInner::Tokens(self),
+ }
+ }
+}