diff --git a/backend-comparison/benches/reduce.rs b/backend-comparison/benches/reduce.rs
index 94d298d8a1..df365f2306 100644
--- a/backend-comparison/benches/reduce.rs
+++ b/backend-comparison/benches/reduce.rs
@@ -18,7 +18,6 @@ struct ReduceBenchmark<B: Backend> {
 impl<B: Backend> ReduceBenchmark<B> {
     pub fn new(instruction: Instruction, device: B::Device) -> Self {
         let shape = Shape::new([4096, 512, 64]);
-        // let shape = Shape::new([128, 128, 64]);
         let tensor = Tensor::random(shape.clone(), Distribution::Default, &device);
         Self {
             instruction,
diff --git a/crates/burn-jit/src/kernel/reduce/base.rs b/crates/burn-jit/src/kernel/reduce/base.rs
index 8b6279efac..9ec677ee93 100644
--- a/crates/burn-jit/src/kernel/reduce/base.rs
+++ b/crates/burn-jit/src/kernel/reduce/base.rs
@@ -9,7 +9,7 @@ use burn_tensor::{Shape, TensorData};
 pub use cubecl::reduce::instructions::{ArgMax, ArgMin, Mean, Prod, Sum};
 use cubecl::reduce::shared_sum;
 
-/// Specialize reduce function to computhe the sum of all elements of the `input` tensor and return
+/// Specialize reduce function to compute the sum of all elements of the `input` tensor and return
 /// the value into a single-element tensor of shape `1 x 1 x 1 x ...` with the same rank as `input`.
 ///
 /// This is expected to be faster for larger tensors than calling [reduce] with the `Sum` instruction.