Clippy

EricLBuehler · Aug 20, 2024 · 9629478 · 9629478
1 parent a7995e1
commit 9629478
Show file tree

Hide file tree

Showing 22 changed files with 210 additions and 266 deletions.
diff --git a/src/backend/cache.rs b/src/backend/cache.rs
@@ -142,7 +142,7 @@ pub unsafe fn copy_blocks(
  COPY_BLOCKS_KERNEL_NAME,
  key_caches.first().unwrap().dtype(),
  None,
- &dev,
+ dev,
  ));
 
  try_api!(unsafe {

diff --git a/src/backend/paged_attention.rs b/src/backend/paged_attention.rs
@@ -261,7 +261,7 @@ impl candle::CustomOp1 for PagedAttention {
 ///
 /// * `q` - Query tensor with shape `(num_sequences, num_heads_q, head_size)`.
 /// * `key_cache` - Key cache paged tensor of shape `(num_blocks, num_heads_kv, head_size / x, block_size, x)`
-/// with `x` being the size of an element in bytes.
+///  with `x` being the size of an element in bytes.
 /// * `value_cache` - Value cache paged tensor of shape `(num_blocks, num_heads_kv, head_size, block_size)`.
 /// * `block_tables` - Padded table associating blocks to each sequence of shape `(num_sequences, max_context_len // block_size)`
 /// * `context_lens` - Tensor associating lengths to each sequence of shape `(num_sequences)`
@@ -440,7 +440,7 @@ fn update_cache<
 /// * `key` - Key tensor of shape `(num_tokens, num_heads, head_size)`.
 /// * `value` - Value tensor of shape `(num_tokens, num_heads, head_size)`.
 /// * `key_cache` - Key cache paged tensor of shape `(num_blocks, num_heads, head_size / x, block_size, x)`
-/// with `x` being the size of an element in bytes.
+///  with `x` being the size of an element in bytes.
 /// * `value_cache` - Value cache paged tensor of shape `(num_blocks, num_heads, head_size, block_size)`.
 /// * `slot_mapping` - Mapping associating a slot to each token of shape `(num_tokens)`.
 pub fn reshape_and_cache(

diff --git a/src/lib.rs b/src/lib.rs
@@ -1,4 +1,6 @@
 #![warn(clippy::cast_lossless)]
+use std::fmt::Display;
+
 use candle::Result;
 use candle_core as candle;
 use clap::Subcommand;
@@ -190,30 +192,12 @@ pub enum ModelSelected {
  },
 }
 
-impl ToString for ModelSelected {
- fn to_string(&self) -> String {
+impl Display for ModelSelected {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  match self {
- ModelSelected::Llama {
- repeat_last_n: _,
- temperature: _,
- penalty: _,
- max_gen_tokens: _,
- quant: _,
- } => "llama".to_string(),
- ModelSelected::Llama3 {
- repeat_last_n: _,
- temperature: _,
- penalty: _,
- max_gen_tokens: _,
- quant: _,
- } => "llama3".to_string(),
- ModelSelected::Phi2 {
- repeat_last_n: _,
- temperature: _,
- penalty: _,
- max_gen_tokens: _,
- quant: _,
- } => "phi2".to_string(),
+ ModelSelected::Llama { .. } => write!(f, "llama"),
+ ModelSelected::Llama3 { .. } => write!(f, "llama3"),
+ ModelSelected::Phi2 { .. } => write!(f, "phi2"),
  ModelSelected::Phi3 {
  repeat_last_n: _,
  temperature: _,
@@ -222,7 +206,7 @@ impl ToString for ModelSelected {
  penalty: _,
  max_gen_tokens: _,
  quant: _,
- } => "phi3".to_string(),
+ } => write!(f, "phi3"),
  ModelSelected::Qwen2 {
  repeat_last_n: _,
  temperature: _,
@@ -231,35 +215,11 @@ impl ToString for ModelSelected {
  penalty: _,
  max_gen_tokens: _,
  quant: _,
- } => "qwen2".to_string(),
- ModelSelected::Gemma {
- repeat_last_n: _,
- temperature: _,
- penalty: _,
- max_gen_tokens: _,
- quant: _,
- } => "gemma".to_string(),
- ModelSelected::Mistral {
- repeat_last_n: _,
- temperature: _,
- penalty: _,
- max_gen_tokens: _,
- quant: _,
- } => "mistral".to_string(),
- ModelSelected::Yi {
- repeat_last_n: _,
- temperature: _,
- penalty: _,
- max_gen_tokens: _,
- quant: _,
- } => "yi".to_string(),
- ModelSelected::StableLM {
- repeat_last_n: _,
- temperature: _,
- penalty: _,
- max_gen_tokens: _,
- quant: _,
- } => "stablelm".to_string(),
+ } => write!(f, "qwen2"),
+ ModelSelected::Gemma { .. } => write!(f, "gemma"),
+ ModelSelected::Mistral { .. } => write!(f, "mistral"),
+ ModelSelected::Yi { .. } => write!(f, "yi"),
+ ModelSelected::StableLM { .. } => write!(f, "stablelm"),
  }
  }
 }
@@ -321,8 +281,8 @@ pub fn get_model_loader(
  ),
  "llama".to_string(),
  )),
- if model_id.is_some() {
- model_id.unwrap()
+ if let Some(model_id) = model_id {
+ model_id
  } else {
  "meta-llama/Llama-2-7b-chat-hf".to_string()
  },
@@ -346,8 +306,8 @@ pub fn get_model_loader(
  ),
  "llama3".to_string(),
  )),
- if model_id.is_some() {
- model_id.unwrap()
+ if let Some(model_id) = model_id {
+ model_id
  } else {
  "meta-llama/Meta-Llama-3.1-8B-Instruct".to_string()
  },
@@ -371,8 +331,8 @@ pub fn get_model_loader(
  ),
  "phi2".to_string(),
  )),
- if model_id.is_some() {
- model_id.unwrap()
+ if let Some(model_id) = model_id {
+ model_id
  } else {
  "microsoft/microsoft/phi-2".to_string()
  },
@@ -398,8 +358,8 @@ pub fn get_model_loader(
  ),
  "phi3".to_string(),
  )),
- if model_id.is_some() {
- model_id.unwrap()
+ if let Some(model_id) = model_id {
+ model_id
  } else {
  "microsoft/Phi-3-mini-4k-instruct".to_string()
  },
@@ -425,8 +385,8 @@ pub fn get_model_loader(
  ),
  "qwen2".to_string(),
  )),
- if model_id.is_some() {
- model_id.unwrap()
+ if let Some(model_id) = model_id {
+ model_id
  } else {
  "Qwen/Qwen1.5-1.8B-Chat".to_string()
  },
@@ -450,8 +410,8 @@ pub fn get_model_loader(
  ),
  "gemma".to_string(),
  )),
- if model_id.is_some() {
- model_id.unwrap()
+ if let Some(model_id) = model_id {
+ model_id
  } else {
  "google/gemma-2b-it".to_string()
  },
@@ -475,8 +435,8 @@ pub fn get_model_loader(
  ),
  "mistral".to_string(),
  )),
- if model_id.is_some() {
- model_id.unwrap()
+ if let Some(model_id) = model_id {
+ model_id
  } else {
  "mistralai/Mistral-7B-Instruct-v0.3".to_string()
  },
@@ -501,8 +461,8 @@ pub fn get_model_loader(
  ),
  "yi".to_string(),
  )),
- if model_id.is_some() {
- model_id.unwrap()
+ if let Some(model_id) = model_id {
+ model_id
  } else {
  "01-ai/Yi-6B-Chat".to_string()
  },
@@ -527,8 +487,8 @@ pub fn get_model_loader(
  ),
  "stablelm".to_string(),
  )),
- if model_id.is_some() {
- model_id.unwrap()
+ if let Some(model_id) = model_id {
+ model_id
  } else {
  "stabilityai/stablelm-zephyr-3b".to_string()
  },

diff --git a/src/main.rs b/src/main.rs
@@ -4,7 +4,6 @@ use axum::{
  Router,
 };
 use candle_core::{DType, Device};
-use candle_examples;
 use candle_vllm::openai::openai_server::chat_completions;
 use candle_vllm::openai::pipelines::llm_engine::LLMEngine;
 use candle_vllm::openai::pipelines::pipeline::DefaultModelPaths;

diff --git a/src/openai/conversation/default_conversation.rs b/src/openai/conversation/default_conversation.rs
@@ -235,7 +235,7 @@ impl Conversation for DefaultConversation {
  if let Some(message) = message {
  accum += &format!("[INST] {message} [/INST]");
  } else {
- accum += &format!("[INST] [/INST]");
+ accum += "[INST] [/INST]";
  }
  } else if _role.clone() == self.roles.1 {
  //assistant message
@@ -260,8 +260,7 @@ impl Conversation for DefaultConversation {
  "<|start_header_id|>user<|end_header_id|>\n\n {message} <|eot_id|>"
  );
  } else {
- accum +=
- &format!("<|start_header_id|>user<|end_header_id|>\n\n <|eot_id|>");
+ accum += "<|start_header_id|>user<|end_header_id|>\n\n <|eot_id|>";
  }
  } else if _role.clone() == self.roles.1 {
  //assistant message
@@ -284,7 +283,7 @@ impl Conversation for DefaultConversation {
  if let Some(message) = message {
  accum += &format!("<|user|> {message}<|end|>");
  } else {
- accum += &format!("<|user|> <|end|");
+ accum += "<|user|> <|end|";
  }
  } else if _role.clone() == self.roles.1 {
  //assistant message
@@ -307,7 +306,7 @@ impl Conversation for DefaultConversation {
  if let Some(message) = message {
  accum += &format!("<|im_start|>user\n {message} <|im_end|>");
  } else {
- accum += &format!("<|im_start|> <|im_end|>");
+ accum += "<|im_start|> <|im_end|>";
  }
  } else if _role.clone() == self.roles.1 {
  //assistant message
@@ -323,7 +322,7 @@ impl Conversation for DefaultConversation {
 
  SeparatorStyle::Gemma => {
  let mut accum = "".to_string();
- for (_, message) in self.messages.iter().enumerate() {
+ for message in self.messages.iter() {
  let Message((_role, message)) = message;
  if let Some(message) = message {
  accum +=
@@ -345,7 +344,7 @@ impl Conversation for DefaultConversation {
  if let Some(message) = message {
  accum += &format!("<|user|>user\n {message}<|endoftext|>");
  } else {
- accum += &format!("<|user|> <|endoftext|>");
+ accum += "<|user|> <|endoftext|>";
  }
  } else if _role.clone() == self.roles.1 {
  //assistant message

diff --git a/src/openai/models/gemma.rs b/src/openai/models/gemma.rs
@@ -61,7 +61,7 @@ impl GemmaConfig {
  eos_token_id: super::TokenID(Either::Left(Some(self.eos_token_id as u32))),
  max_seq_len: self.max_position_embeddings.unwrap_or(4096),
  sliding_window: None,
- hidden_act: hidden_act,
+ hidden_act,
  tie_word_embeddings: false,
  rope_scaling: None,
  original_max_position_embeddings: None,
@@ -111,7 +111,7 @@ impl RotaryEmbedding {
  &self,
  q: &Tensor,
  k: &Tensor,
- input_positions: &Vec<Vec<usize>>,
+ input_positions: &[Vec<usize>],
  ) -> Result<(Tensor, Tensor)> {
  let (b_sz, _h, seq_len, _n_embd) = q.dims4()?;
  let mut q_embeds = Vec::new();
@@ -255,7 +255,7 @@ impl Attention {
  &mut self,
  xs: &Tensor,
  attention_mask: Option<&Tensor>,
- input_positions: &Vec<Vec<usize>>,
+ input_positions: &[Vec<usize>],
  cache: Option<(&Tensor, &Tensor)>,
  input_metadata: &mut InputMetadata,
  ) -> Result<Tensor> {
@@ -350,7 +350,7 @@ impl DecoderLayer {
  &mut self,
  xs: &Tensor,
  attention_mask: Option<&Tensor>,
- input_positions: &Vec<Vec<usize>>,
+ input_positions: &[Vec<usize>],
  cache: Option<(&Tensor, &Tensor)>,
  input_metadata: &mut InputMetadata,
  ) -> Result<Tensor> {
@@ -401,7 +401,7 @@ impl Gemma {
  norm,
  lm_head,
  device: device.clone(),
- dtype: dtype,
+ dtype,
  hidden_size: cfg.hidden_size,
  cfg: cfg.clone(),
  })
@@ -419,7 +419,7 @@ impl Gemma {
  pub fn forward(
  &mut self,
  input_ids: &Tensor,
- input_positions: &Vec<Vec<usize>>,
+ input_positions: &[Vec<usize>],
  kv_caches: Option<&Vec<(Tensor, Tensor)>>,
  input_metadata: &mut InputMetadata,
  ) -> Result<Tensor> {

diff --git a/src/openai/models/linear.rs b/src/openai/models/linear.rs
@@ -202,7 +202,7 @@ impl QLinear {
  Self {
  inner: QMatMul::QTensor(Arc::new(w)),
  bias: bx,
- dtype: dtype,
+ dtype,
  }
  }
 
@@ -225,10 +225,7 @@ impl QLinear {
  _ => panic!("Unsupported GGML data type!"),
  };
  let qtensor = QTensor::quantize(weight, ggml_dtype).unwrap();
- let qbias = match linear.bias() {
- Some(b) => Some(b.clone()),
- _ => None,
- };
+ let qbias = linear.bias().cloned();
 
  QLinear::from_qparts_x(qtensor, qbias, dtype)
  }