Skip to content
This repository has been archived by the owner on Jun 24, 2024. It is now read-only.

Commit

Permalink
Add Tokens test to test the feed_prompt method
Browse files Browse the repository at this point in the history
  • Loading branch information
steventrouble committed Jul 7, 2023
1 parent 2646aba commit b2238c2
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 8 deletions.
8 changes: 7 additions & 1 deletion binaries/llm-test/configs/bloom.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
"output_disabled": "When a llama rides a crab, ,.-\n\n/? ', , ; A;A = (b),d e orm\n“t” + “p。n unus et les el duetant alle that are by no ... ”\n( ? ) – ‘?\n!!\n«…..’,\nS.\n\n‘l」之 attergoir à dit-on pas .. 。。 ..\n– La leçon se confond quelquefois con ce qui es vée par occident .\n( 2 ) .\nLa protestation del paysan mécontent regardait pendre eussent mœurs faillite forteresse rivières lieues forteressemelés inquiétudes crackdown brawl slaughter massacresokea .\n» » … « …\n. . . \" \" ….",
"maximum_token_count": 128
}
},
{
"Tokens": {
"input": "Rustformers is",
"output": 15
}
}
]
}
}
8 changes: 7 additions & 1 deletion binaries/llm-test/configs/gptj.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
"output_disabled": "\"When a llama rides a crab, \nit's not the same as when an elephant does it.\" - John Steinbeck, East of Eden.\n\n \"The best way to predict your future is by looking at history.\"- Robert Kiyosaki (author). Rich Dad Poor dad : what 10 rules for success really mean and how you can apply them in life! The rich dads guidebook on personal finance: How To Become A Millionaire In Less Than 5 years! http://www..richdadpoordaddyguidebooksalexanderkimballblogcom/the_bestwaytopredictyourfutureislookingathistory/. You will learn about money management",
"maximum_token_count": 128
}
},
{
"Tokens": {
"input": "Rustformers is",
"output": 257
}
}
]
}
}
8 changes: 7 additions & 1 deletion binaries/llm-test/configs/gptneox.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
"output_disabled": "<|padding|>When a llama rides a crab, \n“The Greatest Show on Earth” is the title of an 1875 book by Phineas Taylor Barnum, who founded and operated The circus. He was born in Bethel Connecticut to Meshack (Meshake) Bowman Jr., from New York City; his mother’s name has not been recorded but she may have had some Native American ancestry as well.[2] His father died when he[3][4], at age three,[5]: 9–10 (p1), 11-12​—was left with relatives until they could find him work or send for them back home where there",
"maximum_token_count": 128
}
},
{
"Tokens": {
"input": "Rustformers is",
"output": 247
}
}
]
}
}
8 changes: 7 additions & 1 deletion binaries/llm-test/configs/llama.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
"output": "When a llama rides a crab, 10-year olds are the ones who get to eat.\nTheir parents have been told that they will be eating for another year or two before their children can enjoy it again – and then only if there is enough food left over from Christmas dinner!",
"maximum_token_count": 128
}
},
{
"Tokens": {
"input": "Rustformers is",
"output": 260
}
}
]
}
}
8 changes: 7 additions & 1 deletion binaries/llm-test/configs/mpt.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
"output": "When a llama rides a crab,  the llama is called the \"crab rider\".\nThe crabs are very popular in South America, especially Brazil. They have been used as transportation for many years and they can carry up to five people at once!",
"maximum_token_count": 128
}
},
{
"Tokens": {
"input": "Rustformers is",
"output": 247
}
}
]
}
}
6 changes: 4 additions & 2 deletions binaries/llm-test/src/inference.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
//! Test cases for [crate::TestCase::Inference] tests.
//! Tests the model's inference APIs.
//!
//! See [crate::TestCase::Inference].

use std::{convert::Infallible, sync::Arc};

use llm::InferenceStats;

use crate::{ModelConfig, TestCaseReport, TestCaseReportInner, TestCaseReportMeta};

pub(super) fn can_infer(
pub(crate) fn can_infer(
model: &dyn llm::Model,
model_config: &ModelConfig,
input: &str,
Expand Down
11 changes: 10 additions & 1 deletion binaries/llm-test/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

mod common;
mod inference;
mod tokens;

use anyhow::Context;
use clap::Parser;
Expand Down Expand Up @@ -123,6 +124,10 @@ enum TestCase {
output: Option<String>,
maximum_token_count: usize,
},
Tokens {
input: String,
output: usize,
},
}

#[derive(Serialize)]
Expand All @@ -145,13 +150,14 @@ enum TestCaseReportMeta {
}

#[derive(Serialize)]
enum TestCaseReportInner {
pub enum TestCaseReportInner {
Inference {
input: String,
expect_output: Option<String>,
actual_output: String,
inference_stats: Option<InferenceStats>,
},
Tokens(tokens::TokensReport),
}

async fn test_model(
Expand Down Expand Up @@ -269,6 +275,9 @@ async fn test_model(
output.as_deref(),
*maximum_token_count,
)?),
TestCase::Tokens { input, output } => {
test_case_reports.push(tokens::can_feed(&model, input, *output));
}
}
}
let first_error: Option<String> =
Expand Down
93 changes: 93 additions & 0 deletions binaries/llm-test/src/tokens.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
//! Tests the model's token manipulation APIs:
//!
//! * [llm::InferenceSession::feed_prompt()]
//!
//! See [crate::TestCase::Tokens].

use std::convert::Infallible;

use llm::{InferenceFeedback, Model, OutputRequest};
use serde::Serialize;

use crate::{TestCaseReport, TestCaseReportMeta};

/// Tests that the model performs as expected when feeding tokens
pub(crate) fn can_feed(model: &impl Model, input: &str, expected_output: usize) -> TestCaseReport {
let mut report = TokensReport::default();

let mut session = model.start_session(Default::default());
let mut output = OutputRequest {
all_logits: Some(vec![]),
..Default::default()
};

let feed_prompt = &mut |prompt: &str| {
session.feed_prompt(model, &Default::default(), prompt, &mut output, |x| {
always_continue(x)
})
};

if let Err(err) = feed_prompt(input) {
return report.failure(&err.to_string());
};

let top_token;
match output.all_logits {
Some(logits) => {
let start = logits.len() - model.tokenizer().len();
let mut iter = logits[start..].iter().enumerate();
let Some((mut max_idx, mut max)) = iter.next() else {
return report.failure("Could not find any logits for last token.");
};
for (idx, score) in iter {
if score > max {
max = score;
max_idx = idx;
}
}
top_token = max_idx;
}
None => return report.failure("Model did not output any logits."),
}

report.output = top_token;

if top_token != expected_output {
let tokenizer = model.tokenizer();
let top_token_str = String::from_utf8_lossy(&tokenizer.token(top_token)).to_string();
let expected_str = String::from_utf8_lossy(&tokenizer.token(expected_output)).to_string();
return report.failure(&format!(
"Expected top token to be {expected_output} ({expected_str}), \
but was {top_token} ({top_token_str})"
));
}

report.success()
}

fn always_continue(_: &[u8]) -> Result<InferenceFeedback, Infallible> {
Ok(InferenceFeedback::Continue)
}

#[derive(Serialize, Default)]
pub struct TokensReport {
output: usize,
}

impl TokensReport {
fn failure(self, msg: &str) -> TestCaseReport {
TestCaseReport {
meta: TestCaseReportMeta::Error {
error: msg.to_owned(),
},
report: crate::TestCaseReportInner::Tokens(self),
}
}

fn success(self) -> TestCaseReport {
TestCaseReport {
meta: TestCaseReportMeta::Success,
report: crate::TestCaseReportInner::Tokens(self),
}
}
}

0 comments on commit b2238c2

Please sign in to comment.