tracel-ai · wingertge · Jan 25, 2025 · Jan 26, 2025 · Jan 26, 2025 · Jan 28, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -153,11 +153,11 @@ ahash = { version = "0.8.11", default-features = false }
 portable-atomic-util = { version = "0.2.4", features = ["alloc"] }
 
 ### For the main burn branch. ###
-cubecl = { git = "https://github.com/tracel-ai/cubecl", default-features = false, rev = "ff34667accfe077d4a1cd48ae419868e142acfd6" }
-cubecl-common = { git = "https://github.com/tracel-ai/cubecl", default-features = false, rev = "ff34667accfe077d4a1cd48ae419868e142acfd6" }
+# cubecl = { git = "https://github.com/tracel-ai/cubecl", default-features = false, rev = "ff34667accfe077d4a1cd48ae419868e142acfd6" }
+# cubecl-common = { git = "https://github.com/tracel-ai/cubecl", default-features = false, rev = "ff34667accfe077d4a1cd48ae419868e142acfd6" }
 ### For local development. ###
-# cubecl = { path = "../cubecl/crates/cubecl", default-features = false }
-# cubecl-common = { path = "../cubecl/crates/cubecl-common", default-features = false }
+cubecl = { path = "../cubecl/crates/cubecl", default-features = false }
+cubecl-common = { path = "../cubecl/crates/cubecl-common", default-features = false }
 ### For the release. ###
 # cubecl = { version = "0.4.0", default-features = false }
 # cubecl-common = { version = "0.4.0", default-features = false }

diff --git a/crates/burn-cuda/Cargo.toml b/crates/burn-cuda/Cargo.toml
@@ -12,8 +12,8 @@ repository = "https://github.com/tracel-ai/burn/tree/main/crates/burn-cuda"
 version.workspace = true
 
 [features]
-default = ["fusion", "autotune", "burn-jit/default", "cubecl/default"]
 autotune = ["burn-jit/autotune"]
+default = ["fusion", "autotune", "burn-jit/default", "cubecl/default"]
 doc = ["burn-jit/doc"]
 fusion = ["burn-fusion", "burn-jit/fusion"]
 std = ["burn-jit/std", "cubecl/std"]

diff --git a/crates/burn-jit/Cargo.toml b/crates/burn-jit/Cargo.toml
@@ -13,20 +13,23 @@ version.workspace = true
 
 [features]
 autotune = []
-default = ["autotune", "std", "fusion", "cubecl/default"]
+default = ["autotune", "std", "fusion", "vision", "cubecl/default"]
 doc = ["default"]
 export_tests = [
     "burn-tensor-testgen",
     "serial_test",
     "burn-autodiff/export_tests",
     "burn-tensor/export_tests",
+    "burn-vision/export_tests",
     "burn-ndarray",
     "fusion",
+    "vision",
     "paste",
 ]
 fusion = ["burn-fusion"]
 fusion-experimental = ["fusion"]
 std = ["cubecl/std", "burn-tensor/std"]
+vision = ["burn-vision"]
 
 template = []
 
@@ -37,6 +40,7 @@ burn-tensor = { path = "../burn-tensor", version = "0.17.0", default-features =
     "cubecl",
     "repr",
 ] }
+burn-vision = { path = "../burn-vision", version = "0.17.0", optional = true }
 cubecl = { workspace = true, features = ["linalg", "reduce"] }
 
 bytemuck = { workspace = true }

diff --git a/crates/burn-jit/src/fusion/matmul/args.rs b/crates/burn-jit/src/fusion/matmul/args.rs
@@ -247,7 +247,7 @@ impl CubeType for FusedMatmulState {
 }
 
 impl Init for FusedMatmulStateExpand {
-    fn init(self, _context: &mut CubeContext) -> Self {
+    fn init(self, _context: &mut Scope) -> Self {
         self
     }
 }
diff --git a/crates/burn-jit/src/fusion/on_write/ir.rs b/crates/burn-jit/src/fusion/on_write/ir.rs
@@ -45,13 +45,13 @@ impl CubeType for Arg {
 }
 
 impl Init for Arg {
-    fn init(self, _context: &mut CubeContext) -> Self {
+    fn init(self, _context: &mut Scope) -> Self {
         self
     }
 }
 
 impl IntoRuntime for Arg {
-    fn __expand_runtime_method(self, _context: &mut CubeContext) -> Self::ExpandType {
+    fn __expand_runtime_method(self, _context: &mut Scope) -> Self::ExpandType {
         self
     }
 }

diff --git a/crates/burn-jit/src/kernel/conv/conv2d/gemm/launch.rs b/crates/burn-jit/src/kernel/conv/conv2d/gemm/launch.rs
@@ -7,7 +7,7 @@ use burn_tensor::{
 use cubecl::{
     flex32,
     ir::{Elem, FloatKind},
-    linalg::matmul::{self},
+    linalg::matmul::{self, kernels::MatmulLaunchError},
     tensor_line_size, tf32, Feature,
 };
 use half::{bf16, f16};
@@ -195,18 +195,14 @@ where
     let cube_count = Alg::cube_count(&selection, &problem);
 
     let advanced_config = Default::default();
-    let config = match Alg::make_config(
+    let config = Alg::make_config(
         config_input,
         &problem,
         &cube_dim,
         &cube_count,
         &advanced_config,
-    ) {
-        Ok(val) => val,
-        Err(err) => {
-            panic!("Can't launch conv kernel because of an invalid config: {err}")
-        }
-    };
+    )
+    .map_err(MatmulLaunchError::InvalidConfig)?;
 
     let bias = bias.unwrap_or_else(|| {
         empty_device::<R, SP::EG>(input.client.clone(), input.device.clone(), Shape::new([1]))

diff --git a/crates/burn-jit/src/kernel/conv/conv2d/im2col.rs b/crates/burn-jit/src/kernel/conv/conv2d/im2col.rs
@@ -98,25 +98,38 @@ fn im2col_kernel<F: Float>(
 }
 
 #[cfg(not(test))]
-pub(crate) fn batches_per_run(batch_size: usize, out_h: usize, out_w: usize) -> Option<usize> {
-    let cube_count_per_batch = (out_h * out_w).div_ceil(burn_common::PLANE_DIM_APPROX);
+pub(crate) fn batches_per_run(
+    batch_size: usize,
+    out_h: usize,
+    out_w: usize,
+) -> Result<usize, ConvLaunchError> {
+    use cubecl::linalg::matmul::kernels::MatmulAvailabilityError;
+
+    let cube_count_per_batch = (out_h * out_w).div_ceil(cubecl::PLANE_DIM_APPROX);
     let max_cube_count = u16::MAX as usize;
     let max_simultaneous = (max_cube_count / cube_count_per_batch).min(batch_size);
     if max_simultaneous == 0 {
-        return None;
+        return Err(MatmulAvailabilityError::CubeCountTooBig(CubeCount::Static(
+            cube_count_per_batch as u32,
+            1,
+            1,
+        ))
+        .into());
     }
-    Some(
-        (0..=max_simultaneous)
-            .rev()
-            .find(|per_run| batch_size % per_run == 0)
-            .expect("Logically not possible"),
-    )
+    Ok((0..=max_simultaneous)
+        .rev()
+        .find(|per_run| batch_size % per_run == 0)
+        .expect("Logically not possible"))
 }
 
 #[cfg(test)]
 #[allow(unused)]
-pub(crate) fn batches_per_run(batch_size: usize, out_h: usize, out_w: usize) -> Option<usize> {
-    Some(1)
+pub(crate) fn batches_per_run(
+    batch_size: usize,
+    out_h: usize,
+    out_w: usize,
+) -> Result<usize, ConvLaunchError> {
+    Ok(1)
 }
 
 fn im2col<R: JitRuntime, E: FloatElement>(
@@ -214,8 +227,7 @@ pub fn conv2d_im2col<R: JitRuntime, E: FloatElement>(
         return execute_1x1_kernel::<R, E>(input, weight, bias, options);
     }
 
-    let batches_per_run = batches_per_run(batch_size, out_h, out_w)
-        .expect("Image too large to run even one batch at once");
+    let batches_per_run = batches_per_run(batch_size, out_h, out_w)?;
     let matmul_shape = Shape::new([groups, out_c_per_group, batches_per_run * out_h * out_w]);
 
     let mut out = if batches_per_run != batch_size {

diff --git a/crates/burn-jit/src/kernel/conv/error.rs b/crates/burn-jit/src/kernel/conv/error.rs
@@ -1,5 +1,8 @@
 use core::fmt::Debug;
-use cubecl::{linalg::matmul::kernels::MatmulLaunchError, tune::AutotuneError};
+use cubecl::{
+    linalg::matmul::kernels::{MatmulAvailabilityError, MatmulLaunchError},
+    tune::AutotuneError,
+};
 
 pub enum ConvLaunchError {
     Matmul(MatmulLaunchError),
@@ -30,6 +33,12 @@ impl From<MatmulLaunchError> for ConvLaunchError {
     }
 }
 
+impl From<MatmulAvailabilityError> for ConvLaunchError {
+    fn from(value: MatmulAvailabilityError) -> Self {
+        Self::Matmul(MatmulLaunchError::Unavailable(value))
+    }
+}
+
 #[allow(clippy::from_over_into)]
 impl Into<AutotuneError> for ConvLaunchError {
     fn into(self) -> AutotuneError {

diff --git a/crates/burn-jit/src/kernel/mod.rs b/crates/burn-jit/src/kernel/mod.rs
@@ -37,6 +37,10 @@ pub mod quantization;
 /// Reduction algorithms
 pub mod reduce;
 
+/// Vision algorithms
+#[cfg(feature = "vision")]
+pub mod vision;
+
 pub(crate) use clamp::*;
 pub(crate) use comparison::*;
 pub(crate) use index::*;