Skip to content

Commit

Permalink
Cuda quantization padding fix.
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurentMazare committed Sep 25, 2024
1 parent fd3b53f commit 5221146
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
7 changes: 5 additions & 2 deletions candle-core/src/quantized/cuda.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ fn ceil_div(p: usize, q: usize) -> usize {
}

fn pad(p: usize, q: usize) -> usize {
ceil_div(p, q) * q
// Overallocate by q rather than just padding by q as this should pad the last row
// and we don't have enough information here to know how many elements to add :(
// ceil_div(p, q) * q
p + q
}

fn quantize_q8_1(
Expand Down Expand Up @@ -439,7 +442,7 @@ impl QCudaStorage {
}
_ => crate::bail!("only f32 can be quantized"),
};
let src_len = src.len();
let src_len = pad(src.len(), MATRIX_ROW_PADDING);
let src = crate::Storage::Cpu(crate::CpuStorage::F32(src));
let mut qcpu_storage = crate::Device::Cpu.qzeros(src_len, self.dtype)?;
qcpu_storage.quantize(&src)?;
Expand Down
2 changes: 1 addition & 1 deletion candle-core/src/quantized/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub(super) fn group_for_quantization<'a, 'b, T: super::k_quants::GgmlType>(
let actual_blocks = ys.len();

// Validate that the input is the right size
if expected_blocks != actual_blocks {
if actual_blocks < expected_blocks {
crate::bail!("quantize {dtype:?}: expected {expected_blocks} blocks but only {actual_blocks} were provided!")
}

Expand Down

0 comments on commit 5221146

Please sign in to comment.