Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cumsum #2099

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions burn-book/src/building-blocks/tensor.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ Those operations are available for numeric tensor kinds: `Float` and `Int`.
| `tensor.sub_scalar(scalar)` or `tensor - scalar` | `tensor - scalar` |
| `tensor.sum()` | `tensor.sum()` |
| `tensor.sum_dim(dim)` | `tensor.sum(dim, keepdim=True)` |
| `tensor.cumsum(dim)` | `tensor.cumsum(dim)` |
| `tensor.topk(k, dim)` | `tensor.topk(k, dim).values` |
| `tensor.topk_with_indices(k, dim)` | `tensor.topk(k, dim)` |
| `tensor.tril(diagonal)` | `torch.tril(tensor, diagonal)` |
Expand Down
4 changes: 4 additions & 0 deletions crates/burn-autodiff/src/ops/int_tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ impl<B: Backend, C: CheckpointStrategy> IntTensorOps<Self> for Autodiff<B, C> {
B::int_sum_dim(tensor, dim)
}

fn int_cumsum<const D: usize>(tensor: IntTensor<B, D>, dim: usize) -> IntTensor<B, D> {
B::int_cumsum(tensor, dim)
}

fn int_mean<const D: usize>(tensor: IntTensor<B, D>) -> IntTensor<B, 1> {
B::int_mean(tensor)
}
Expand Down
38 changes: 38 additions & 0 deletions crates/burn-autodiff/src/ops/tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1578,6 +1578,44 @@ impl<B: Backend, C: CheckpointStrategy> FloatTensorOps<Self> for Autodiff<B, C>
}
}

fn float_cumsum<const D: usize>(
tensor: FloatTensor<Self, D>,
dim: usize,
) -> FloatTensor<Self, D> {
#[derive(Debug)]
struct CumSum;

impl<B: Backend, const D: usize> Backward<B, D, 1> for CumSum {
type State = usize;

fn backward(
self,
ops: Ops<Self::State, 1>,
grads: &mut Gradients,
_checkpointer: &mut Checkpointer,
) {
let dim = ops.state;

unary::<B, D, D, _>(ops.parents, ops.node, grads, |grad| {
let cumsum = B::float_cumsum(grad.clone(), dim);
B::float_flip(cumsum.clone(), &[dim])
});
}
}

match CumSum
.prepare::<C>([tensor.node])
.compute_bound()
.stateful()
{
OpsKind::Tracked(prep) => prep.finish(
dim,
B::float_cumsum(tensor.primitive, dim),
),
OpsKind::UnTracked(prep) => prep.finish(B::float_cumsum(tensor.primitive, dim)),
}
}

fn float_argmax<const D: usize>(tensor: FloatTensor<Self, D>, dim: usize) -> IntTensor<B, D> {
B::float_argmax(tensor.primitive, dim)
}
Expand Down
20 changes: 20 additions & 0 deletions crates/burn-autodiff/src/tests/cumsum.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#[burn_tensor_testgen::testgen(ad_cumsum)]
mod tests {
use super::*;
use burn_tensor::{loss, Tensor, TensorData};

#[test]
fn should_diff_cumsum() {
let device = Default::default();
let tensor_0 = TestAutodiffTensor::<2>::from_data([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]], &device).require_grad();

let dim = 1;
let tensor_1 = tensor_0.clone().cumsum(dim);

let grads = tensor_1.backward();

let grad_0 = tensor_0.grad(&grads).unwrap();
let grad_0_expected = TensorData::from([[3., 2., 1.], [3., 2., 1.]]);
grad_0.into_data().assert_approx_eq(&grad_0_expected, 2);
}
}
3 changes: 3 additions & 0 deletions crates/burn-autodiff/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ mod conv_transpose2d;
mod conv_transpose3d;
mod cos;
mod cross_entropy;

mod cumsum;
mod div;
mod erf;
mod exp;
Expand Down Expand Up @@ -103,6 +105,7 @@ macro_rules! testgen_all {
burn_autodiff::testgen_ad_cat!();
burn_autodiff::testgen_ad_cos!();
burn_autodiff::testgen_ad_cross_entropy_loss!();
burn_autodiff::testgen_ad_cumsum!();
burn_autodiff::testgen_ad_div!();
burn_autodiff::testgen_ad_erf!();
burn_autodiff::testgen_ad_exp!();
Expand Down
4 changes: 4 additions & 0 deletions crates/burn-candle/src/ops/int_tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,10 @@ impl<F: FloatCandleElement, I: IntCandleElement> IntTensorOps<Self> for Candle<F
CandleTensor::new(tensor.tensor.sum_keepdim(dim).unwrap())
}

fn int_cumsum<const D: usize>(tensor: IntTensor<Self, D>, dim: usize) -> IntTensor<Self, D> {
CandleTensor::new(tensor.tensor.cumsum(dim).unwrap())
}

fn int_prod<const D: usize>(tensor: IntTensor<Self, D>) -> IntTensor<Self, 1> {
todo!("prod is not implemented for Candle IntTensor (see https://github.com/tracel-ai/burn/issues/1454)")
}
Expand Down
7 changes: 7 additions & 0 deletions crates/burn-candle/src/ops/tensor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,13 @@ impl<F: FloatCandleElement, I: IntCandleElement> FloatTensorOps<Self> for Candle
CandleTensor::new(tensor.tensor.sum_keepdim(dim).unwrap())
}

fn float_cumsum<const D: usize>(
tensor: FloatTensor<Self, D>,
dim: usize,
) -> FloatTensor<Self, D> {
CandleTensor::new(tensor.tensor.cumsum(dim).unwrap())
}

fn float_mean_dim<const D: usize>(
tensor: FloatTensor<Self, D>,
dim: usize,
Expand Down
26 changes: 26 additions & 0 deletions crates/burn-fusion/src/ops/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1335,6 +1335,32 @@ impl<B: FusionBackend> FloatTensorOps<Self> for Fusion<B> {
out
}

fn float_cumsum<const D: usize>(
tensor: FloatTensor<Self, D>,
dim: usize,
) -> FloatTensor<Self, D> {
scalar_float_ops!(CumSumOps, B::float_cumsum, usize, noconvert);

let stream = tensor.stream;
let shape = tensor.shape.clone();
let out = tensor
.client
.tensor_uninitialized(shape, B::FloatElem::dtype());

let desc = ScalarOperationDescription {
lhs: tensor.into_description(),
rhs: dim,
out: out.to_description_out(),
};
out.client.register(
vec![stream],
OperationDescription::NumericFloat(NumericOperationDescription::CumSum(desc.clone())),
CumSumOps::<B, D>::new(desc),
);

out
}

fn float_mean<const D: usize>(tensor: FloatTensor<Self, D>) -> FloatTensor<Self, 1> {
unary_float_ops!(MeanOps, B::float_mean, reduce);

Expand Down
7 changes: 7 additions & 0 deletions crates/burn-fusion/src/stream/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,13 @@ impl<E: Element> RelativeOpsScalar<E> for NumericOperationDescription<E> {
out: desc.out.to_relative(converter),
})
}
NumericOperationDescription::CumSum(desc) => {
NumericOperationDescription::CumSum(ScalarOperationDescription {
lhs: desc.lhs.to_relative(converter),
rhs: desc.rhs, // Dim should stay the same.
out: desc.out.to_relative(converter),
})
}
NumericOperationDescription::Prod(desc) => {
NumericOperationDescription::Prod(UnaryOperationDescription {
input: desc.input.to_relative(converter),
Expand Down
91 changes: 91 additions & 0 deletions crates/burn-jit/src/kernel/accumulate/base.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#[cfg(feature = "autotune")]
use crate::kernel::accumulate::accumulate_dim_autotune;
use crate::{element::JitElement, tensor::JitTensor, JitRuntime};

use super::{
naive::{base::AccumulateDimNaive, shader::accumulate_dim_naive},
shared::{base::AccumulateDimShared, shader::accumulate_dim_shared},
};

#[allow(dead_code)]
pub(crate) trait AccumulateDimAlgorithm<E: JitElement>:
AccumulateDimNaive<E> + AccumulateDimShared<E>
{
}

/// Creates an empty output tensor with accumulate output shape
pub fn init_accumulate_output<R: JitRuntime, EI: JitElement, EO: JitElement, const D: usize>(
input: &JitTensor<R, EI, D>,
) -> JitTensor<R, EO, D> {
let mut shape_out = input.shape.clone();

// Create output handle
let num_elems_output = shape_out.num_elements();
let handle = input
.client
.empty(num_elems_output * core::mem::size_of::<EO>());
JitTensor::new_contiguous(
input.client.clone(),
input.device.clone(),
shape_out.clone(),
handle,
)
}

#[derive(Copy, Clone, Debug)]
#[allow(missing_docs)]
pub enum AccumulateStrategy {
Naive,
SharedMemory,
#[cfg(feature = "autotune")]
Autotune,
}

impl Default for AccumulateStrategy {
fn default() -> Self {
// if autotune is enabled, default to autotune
#[cfg(feature = "autotune")]
return AccumulateStrategy::Autotune;

#[cfg(not(feature = "autotune"))]
AccumulateStrategy::Naive
}
}

#[cfg(feature = "autotune")]
#[cfg(not(feature = "autotune"))]
impl Default for AccumulateStrategy {
fn default() -> Self {
AccumulateStrategy::Naive
}
}

macro_rules! accumulate_operation {
($name:ident, $ops:ident) => {
pub(crate) struct $ops;
impl<E: JitElement> AccumulateDimAlgorithm<E> for $ops {}

/// Executes the accumulate operation with the given strategy.
pub fn $name<R: JitRuntime, EI: JitElement, EO: JitElement, const D: usize>(
tensor: JitTensor<R, EI, D>,
dim: usize,
strategy: AccumulateStrategy,
) -> JitTensor<R, EO, D> {
match strategy {
AccumulateStrategy::Naive => {
let output = init_accumulate_output(&tensor, dim);
accumulate_dim_naive::<$ops, R, EI, EO, D>(tensor, output, dim)
}
AccumulateStrategy::SharedMemory => {
let output = init_accumulate_output(&tensor, dim);
accumulate_dim_shared::<$ops, R, EI, EO, D>(tensor, output, dim)
}
#[cfg(feature = "autotune")]
AccumulateStrategy::Autotune => accumulate_dim_autotune::<$ops, R, EI, EO, D>(tensor, dim),
}
}
};
}

// Autotunable reduce operation variants
accumulate_operation!(cumsum, CumSum);
7 changes: 7 additions & 0 deletions crates/burn-jit/src/kernel/accumulate/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//! Code for accumulate kernels
//!
//! Accumulate is similar to reduce but the output shape is the same as the input shape.
//! Each element in the output contains the accumulated value up to that point.
mod base;
mod naive;
mod shared;
32 changes: 32 additions & 0 deletions crates/burn-jit/src/kernel/accumulate/naive/base.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use cubecl::ir::{Item, Scope, Variable};

use crate::JitElement;

/// Specifies the accumulate dim algorithm in use
pub trait AccumulateDimNaive<E: JitElement>: Send + Sync + 'static {
/// The accumulator
type Accumulator: Copy;

/// Initialization for naive algorithm
fn initialize_naive(
scope: &mut Scope,
input_item: Item,
output_item: Item,
) -> Self::Accumulator;

/// Inner loop for naive algorithm
fn inner_loop_naive(
scope: &mut Scope,
accumulator: Self::Accumulator,
current_value: Variable,
i: Variable,
);

/// Assignation for naive algorithm
fn assign_naive(
scope: &mut Scope,
output: Variable,
accumulator: Self::Accumulator,
shape_reduce_dim: Variable,
);
}
2 changes: 2 additions & 0 deletions crates/burn-jit/src/kernel/accumulate/naive/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub(crate) mod base;
pub(crate) mod shader;
Loading
Loading