This repository has been archived by the owner on Jun 24, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 360
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #334 from steventrouble/main
Add ability to delete tokens (undo feed)
- Loading branch information
Showing
20 changed files
with
498 additions
and
156 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
//! Tests that are run on every model, regardless of config. | ||
|
||
pub(super) fn can_send<M: llm::KnownModel + 'static>(model: M) -> anyhow::Result<M> { | ||
let model = std::thread::spawn(move || model) | ||
.join() | ||
.map_err(|e| anyhow::anyhow!("Failed to join thread: {e:?}")); | ||
|
||
log::info!("`can_send` test passed!"); | ||
|
||
model | ||
} | ||
|
||
pub(super) fn can_roundtrip_hyperparameters<M: llm::KnownModel + 'static>( | ||
model: &M, | ||
) -> anyhow::Result<()> { | ||
fn test_hyperparameters<M: llm::Hyperparameters>(hyperparameters: &M) -> anyhow::Result<()> { | ||
let mut data = vec![]; | ||
hyperparameters.write_ggml(&mut data)?; | ||
let new_hyperparameters = | ||
<M as llm::Hyperparameters>::read_ggml(&mut std::io::Cursor::new(data))?; | ||
|
||
assert_eq!(hyperparameters, &new_hyperparameters); | ||
|
||
log::info!("`can_roundtrip_hyperparameters` test passed!"); | ||
|
||
Ok(()) | ||
} | ||
|
||
test_hyperparameters(model.hyperparameters()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
//! Tests the model's token manipulation APIs: | ||
//! | ||
//! * [llm::InferenceSession::feed_prompt()] | ||
//! | ||
//! See [crate::TestCase::Tokens]. | ||
|
||
use std::convert::Infallible; | ||
|
||
use llm::{InferenceFeedback, InferenceSession, Model, OutputRequest}; | ||
use serde::Serialize; | ||
|
||
use crate::{TestCaseReport, TestCaseReportMeta}; | ||
|
||
/// Tests that models can delete tokens without changing the model's behavior. | ||
pub(crate) fn can_delete(model: &impl Model) -> TestCaseReport { | ||
let report = DeleteReport::default(); | ||
let mut session = model.start_session(Default::default()); | ||
let mut output = OutputRequest { | ||
all_logits: Some(vec![]), | ||
..Default::default() | ||
}; | ||
|
||
// Feed some tokens | ||
if let Err(err) = feed_prompt("The llama lived on the", &mut session, model, &mut output) { | ||
return report.failure(&err.to_string()); | ||
} | ||
|
||
// Add token and get the logits | ||
if let Err(err) = feed_prompt(" ", &mut session, model, &mut output) { | ||
return report.failure(&err.to_string()); | ||
} | ||
let Some(original_logits) = output.all_logits.clone() else { | ||
return report.failure("Model did not return logits."); | ||
}; | ||
|
||
// Rewind, then re-add. Verify logits are the same. | ||
if let Err(err) = session.rewind(model, 1) { | ||
return report.failure(&err.to_string()); | ||
} | ||
if let Err(err) = feed_prompt(" ", &mut session, model, &mut output) { | ||
return report.failure(&err.to_string()); | ||
} | ||
let Some(redone_logits) = output.all_logits.clone() else { | ||
return report.failure("Second run of model did not return logits."); | ||
}; | ||
|
||
// Compare the logits | ||
for (idx, (&original, redone)) in original_logits.iter().zip(redone_logits).enumerate() { | ||
if original > redone + f32::EPSILON || original < redone - f32::EPSILON { | ||
return report.failure(&format!( | ||
"Expected logits to be the same after delete, but differed at {idx}, \ | ||
expected {original}, but was {redone}." | ||
)); | ||
} | ||
} | ||
|
||
log::info!("`can_delete` test passed!"); | ||
report.success() | ||
} | ||
|
||
fn feed_prompt( | ||
prompt: &str, | ||
session: &mut InferenceSession, | ||
model: &impl Model, | ||
output: &mut OutputRequest, | ||
) -> Result<(), llm::InferenceError> { | ||
session.feed_prompt(model, &Default::default(), prompt, output, always_continue) | ||
} | ||
|
||
fn always_continue(_: &[u8]) -> Result<InferenceFeedback, Infallible> { | ||
Ok(InferenceFeedback::Continue) | ||
} | ||
|
||
#[derive(Serialize, Default)] | ||
pub struct DeleteReport { | ||
output: usize, | ||
} | ||
|
||
impl DeleteReport { | ||
fn failure(self, msg: &str) -> TestCaseReport { | ||
TestCaseReport { | ||
meta: TestCaseReportMeta::Error { | ||
error: msg.to_owned(), | ||
}, | ||
report: crate::TestCaseReportInner::Delete(self), | ||
} | ||
} | ||
|
||
fn success(self) -> TestCaseReport { | ||
TestCaseReport { | ||
meta: TestCaseReportMeta::Success, | ||
report: crate::TestCaseReportInner::Delete(self), | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
//! Tests the model's inference APIs. | ||
//! | ||
//! See [crate::TestCase::Inference]. | ||
|
||
use std::{convert::Infallible, sync::Arc}; | ||
|
||
use llm::InferenceStats; | ||
|
||
use crate::{ModelConfig, TestCaseReport, TestCaseReportInner, TestCaseReportMeta}; | ||
|
||
pub(crate) fn can_infer( | ||
model: &dyn llm::Model, | ||
model_config: &ModelConfig, | ||
input: &str, | ||
expected_output: Option<&str>, | ||
maximum_token_count: usize, | ||
) -> anyhow::Result<TestCaseReport> { | ||
let mut session = model.start_session(Default::default()); | ||
let (actual_output, res) = run_inference( | ||
model, | ||
model_config, | ||
&mut session, | ||
input, | ||
maximum_token_count, | ||
); | ||
|
||
// Process the results | ||
Ok(TestCaseReport { | ||
meta: match &res { | ||
Ok(_) => match expected_output { | ||
Some(expected_output) => { | ||
if expected_output == actual_output { | ||
log::info!("`can_infer` test passed!"); | ||
TestCaseReportMeta::Success | ||
} else { | ||
TestCaseReportMeta::Error { | ||
error: "The output did not match the expected output.".to_string(), | ||
} | ||
} | ||
} | ||
None => { | ||
log::info!("`can_infer` test passed (no expected output)!"); | ||
TestCaseReportMeta::Success | ||
} | ||
}, | ||
Err(err) => TestCaseReportMeta::Error { | ||
error: err.to_string(), | ||
}, | ||
}, | ||
report: TestCaseReportInner::Inference { | ||
input: input.into(), | ||
expect_output: expected_output.map(|s| s.to_string()), | ||
actual_output, | ||
inference_stats: res.ok(), | ||
}, | ||
}) | ||
} | ||
|
||
fn run_inference( | ||
model: &dyn llm::Model, | ||
model_config: &ModelConfig, | ||
session: &mut llm::InferenceSession, | ||
input: &str, | ||
maximum_token_count: usize, | ||
) -> (String, Result<InferenceStats, llm::InferenceError>) { | ||
let mut actual_output: String = String::new(); | ||
let res = session.infer::<Infallible>( | ||
model, | ||
&mut rand::rngs::mock::StepRng::new(0, 1), | ||
&llm::InferenceRequest { | ||
prompt: input.into(), | ||
parameters: &llm::InferenceParameters { | ||
n_threads: model_config.threads, | ||
n_batch: 1, | ||
sampler: Arc::new(DeterministicSampler), | ||
}, | ||
play_back_previous_tokens: false, | ||
maximum_token_count: Some(maximum_token_count), | ||
}, | ||
&mut Default::default(), | ||
|r| match r { | ||
llm::InferenceResponse::PromptToken(t) | llm::InferenceResponse::InferredToken(t) => { | ||
actual_output += &t; | ||
Ok(llm::InferenceFeedback::Continue) | ||
} | ||
_ => Ok(llm::InferenceFeedback::Continue), | ||
}, | ||
); | ||
|
||
(actual_output, res) | ||
} | ||
|
||
#[derive(Debug)] | ||
struct DeterministicSampler; | ||
impl llm::Sampler for DeterministicSampler { | ||
fn sample( | ||
&self, | ||
previous_tokens: &[llm::TokenId], | ||
logits: &[f32], | ||
_rng: &mut dyn rand::RngCore, | ||
) -> llm::TokenId { | ||
// Takes the most likely element from the logits, except if they've appeared in `previous_tokens` | ||
// at all | ||
let mut logits = logits.to_vec(); | ||
for &token in previous_tokens { | ||
logits[token as usize] = f32::NEG_INFINITY; | ||
} | ||
|
||
logits | ||
.iter() | ||
.enumerate() | ||
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()) | ||
.unwrap() | ||
.0 as llm::TokenId | ||
} | ||
} |
Oops, something went wrong.