Address PR comments

rustformers · Jul 9, 2023 · 2badcd9 · 2badcd9
1 parent 2e35b46
commit 2badcd9
Show file tree

Hide file tree

Showing 11 changed files with 30 additions and 39 deletions.
diff --git a/binaries/llm-test/src/delete.rs b/binaries/llm-test/src/delete.rs
@@ -11,9 +11,6 @@ use serde::Serialize;
 
 use crate::{TestCaseReport, TestCaseReportMeta};
 
-/// Error tolerance for the float comparisons.
-const TOLERANCE: f32 = 1e-7;
-
 /// Tests that models can delete tokens without changing the model's behavior.
 pub(crate) fn can_delete(model: &impl Model) -> TestCaseReport {
  let report = DeleteReport::default();
@@ -36,8 +33,8 @@ pub(crate) fn can_delete(model: &impl Model) -> TestCaseReport {
  return report.failure("Model did not return logits.");
  };
 
- // Delete, then re-add. Verify logits are the same.
- if let Err(err) = session.delete_tokens(model, 1) {
+ // Rewind, then re-add. Verify logits are the same.
+ if let Err(err) = session.rewind(model, 1) {
  return report.failure(&err.to_string());
  }
  if let Err(err) = feed_prompt(" ", &mut session, model, &mut output) {
@@ -49,15 +46,15 @@ pub(crate) fn can_delete(model: &impl Model) -> TestCaseReport {
 
  // Compare the logits
  for (idx, (&original, redone)) in original_logits.iter().zip(redone_logits).enumerate() {
- if original > redone + TOLERANCE || original < redone - TOLERANCE {
+ if original > redone + f32::EPSILON || original < redone - f32::EPSILON {
  return report.failure(&format!(
  "Expected logits to be the same after delete, but differed at {idx}, \
  expected {original}, but was {redone}."
  ));
  }
  }
 
- log::info!("`can_delete` test passed (no expected output)!");
+ log::info!("`can_delete` test passed!");
  report.success()
 }
 
@@ -67,9 +64,7 @@ fn feed_prompt(
  model: &impl Model,
  output: &mut OutputRequest,
 ) -> Result<(), llm::InferenceError> {
- session.feed_prompt(model, &Default::default(), prompt, output, |x| {
- always_continue(x)
- })
+ session.feed_prompt(model, &Default::default(), prompt, output, always_continue)
 }
 
 fn always_continue(_: &[u8]) -> Result<InferenceFeedback, Infallible> {

diff --git a/binaries/llm-test/src/tokens.rs b/binaries/llm-test/src/tokens.rs
@@ -55,7 +55,7 @@ pub(crate) fn can_feed(model: &impl Model, input: &str, expected_output: usize)
  ));
  }
 
- log::info!("`can_feed` test passed (no expected output)!");
+ log::info!("`can_feed` test passed!");
  report.success()
 }
 

diff --git a/crates/llm-base/src/inference_session.rs b/crates/llm-base/src/inference_session.rs
@@ -334,17 +334,13 @@ impl InferenceSession {
  }
 
  /// Removes `num` tokens from the end of the buffer. Roughly the inverse of `feed_prompt`.
- pub fn delete_tokens(
- &mut self,
- model: &dyn Model,
- num: usize,
- ) -> Result<Vec<TokenId>, DeleteError> {
- if !model.supports_delete() {
- return Err(DeleteError::UnsupportedArchitecture);
+ pub fn rewind(&mut self, model: &dyn Model, num: usize) -> Result<Vec<TokenId>, RewindError> {
+ if !model.supports_rewind() {
+ return Err(RewindError::UnsupportedArchitecture);
  }
 
  if num >= self.n_past {
- return Err(DeleteError::NotEnoughTokens);
+ return Err(RewindError::NotEnoughTokens);
  }
 
  // Remove the tokens from self.tokens.
@@ -670,7 +666,7 @@ pub enum InferenceError {
 
 #[derive(Error, Debug)]
 /// Errors encountered during the snapshot process.
-pub enum DeleteError {
+pub enum RewindError {
  /// Tried deleting more tokens than were available
  #[error("tried deleting more tokens than were available")]
  NotEnoughTokens,

diff --git a/crates/llm-base/src/lib.rs b/crates/llm-base/src/lib.rs
@@ -23,9 +23,9 @@ pub use ggml;
 pub use ggml::Type as ElementType;
 
 pub use inference_session::{
- feed_prompt_callback, DeleteError, GraphOutputs, InferenceError, InferenceFeedback,
- InferenceRequest, InferenceResponse, InferenceSession, InferenceSessionConfig,
- InferenceSnapshot, InferenceSnapshotRef, InferenceStats, ModelKVMemoryType, SnapshotError,
+ feed_prompt_callback, GraphOutputs, InferenceError, InferenceFeedback, InferenceRequest,
+ InferenceResponse, InferenceSession, InferenceSessionConfig, InferenceSnapshot,
+ InferenceSnapshotRef, InferenceStats, ModelKVMemoryType, RewindError, SnapshotError,
 };
 pub use loader::{
  load, load_progress_callback_stdout, ContainerType, FileType, FileTypeFormat, FormatMagic,

diff --git a/crates/llm-base/src/model/mod.rs b/crates/llm-base/src/model/mod.rs
@@ -88,7 +88,7 @@ pub trait KnownModel: Send + Sync {
  fn skip_quantize_tensors() -> Vec<Regex>;
 
  /// Returns whether the model supports deleting tokens.
- fn supports_delete(&self) -> bool {
+ fn supports_rewind(&self) -> bool {
  // Assume we can't delete unless otherwise specified
  false
  }
@@ -126,7 +126,7 @@ pub trait Model: Send + Sync {
  fn eot_token_id(&self) -> TokenId;
 
  /// Returns whether the model supports deleting tokens.
- fn supports_delete(&self) -> bool;
+ fn supports_rewind(&self) -> bool;
 }
 impl<H: Hyperparameters, M: KnownModel<Hyperparameters = H>> Model for M {
  fn start_session(&self, config: InferenceSessionConfig) -> InferenceSession {
@@ -159,8 +159,8 @@ impl<H: Hyperparameters, M: KnownModel<Hyperparameters = H>> Model for M {
  KnownModel::eot_token_id(self)
  }
 
- fn supports_delete(&self) -> bool {
- KnownModel::supports_delete(self)
+ fn supports_rewind(&self) -> bool {
+ KnownModel::supports_rewind(self)
  }
 }
 

diff --git a/crates/llm/src/lib.rs b/crates/llm/src/lib.rs
@@ -78,13 +78,13 @@ use std::{
 // This is the "user-facing" API, and GGML may not always be our backend.
 pub use llm_base::{
  feed_prompt_callback, ggml::format as ggml_format, load, load_progress_callback_stdout,
- quantize, samplers, DeleteError, ElementType, FileType, FileTypeFormat, FormatMagic,
- Hyperparameters, InferenceError, InferenceFeedback, InferenceParameters, InferenceRequest,
- InferenceResponse, InferenceSession, InferenceSessionConfig, InferenceSnapshot,
- InferenceSnapshotRef, InferenceStats, InvalidTokenBias, KnownModel, LoadError, LoadProgress,
- Loader, Model, ModelKVMemoryType, ModelParameters, OutputRequest, Prompt, QuantizeError,
- QuantizeProgress, Sampler, SnapshotError, TokenBias, TokenId, TokenUtf8Buffer,
- TokenizationError, Tokenizer, TokenizerSource,
+ quantize, samplers, ElementType, FileType, FileTypeFormat, FormatMagic, Hyperparameters,
+ InferenceError, InferenceFeedback, InferenceParameters, InferenceRequest, InferenceResponse,
+ InferenceSession, InferenceSessionConfig, InferenceSnapshot, InferenceSnapshotRef,
+ InferenceStats, InvalidTokenBias, KnownModel, LoadError, LoadProgress, Loader, Model,
+ ModelKVMemoryType, ModelParameters, OutputRequest, Prompt, QuantizeError, QuantizeProgress,
+ RewindError, Sampler, SnapshotError, TokenBias, TokenId, TokenUtf8Buffer, TokenizationError,
+ Tokenizer, TokenizerSource,
 };
 
 use serde::Serialize;

diff --git a/crates/models/bloom/src/lib.rs b/crates/models/bloom/src/lib.rs
@@ -397,7 +397,7 @@ impl KnownModel for Bloom {
  vec![]
  }
 
- fn supports_delete(&self) -> bool {
+ fn supports_rewind(&self) -> bool {
  true
  }
 }

diff --git a/crates/models/gptj/src/lib.rs b/crates/models/gptj/src/lib.rs
@@ -319,7 +319,7 @@ impl KnownModel for GptJ {
  vec![]
  }
 
- fn supports_delete(&self) -> bool {
+ fn supports_rewind(&self) -> bool {
  true
  }
 }

diff --git a/crates/models/gptneox/src/lib.rs b/crates/models/gptneox/src/lib.rs
@@ -365,7 +365,7 @@ impl KnownModel for GptNeoX {
  vec![]
  }
 
- fn supports_delete(&self) -> bool {
+ fn supports_rewind(&self) -> bool {
  true
  }
 }

diff --git a/crates/models/llama/src/lib.rs b/crates/models/llama/src/lib.rs
@@ -349,7 +349,7 @@ impl KnownModel for Llama {
  vec![]
  }
 
- fn supports_delete(&self) -> bool {
+ fn supports_rewind(&self) -> bool {
  true
  }
 }

diff --git a/crates/models/mpt/src/lib.rs b/crates/models/mpt/src/lib.rs
@@ -299,7 +299,7 @@ impl KnownModel for Mpt {
  vec![]
  }
 
- fn supports_delete(&self) -> bool {
+ fn supports_rewind(&self) -> bool {
  true
  }
 }