Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] C++ layer to implement Triu #3631

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions csrc/ops/arith.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,51 @@ TensorView* eye(Val* size, DataType dtype) {
return eye(size, size, dtype);
}

TensorView* triu(TensorView* tv, Val* offset) {
NVF_CHECK(
offset->getDataType() == DataType::Int, "offset must have type Int");

NVF_CHECK(
tv->nDims() >= 2,
"triu is only supported for 2+D tensors, but got ",
tv->nDims(),
"D tensor");

// Let's say we want a triu of a 2D tensor of shape [2, 4]
// We broadcast the iota of the outer dim
// [0 [0, 0, 0, 0]
// 1] -> [1, 1, 1, 1]
// We broadcast the iota of the inner dim
// [0, 1, 2, 3] -> [0, 1, 2, 3]
// [0, 1, 2, 3]
// Using LE on the bcast tensors we get the mask
//[0, 0, 0, 0] LE [0, 1, 2, 3]
//[1, 1, 1, 1] [0, 1, 2, 3]
// Gives:
//[1, 1, 1, 1]
//[0, 1, 1, 1]
// If triu has an offset of k, we shift/subtract the iota of the columns by k
// before broadcasting and comparing with the iota of the rows.
auto dims = tv->domain()->logical().size();
auto tv_rows = iota(
tv->domain()->logical()[dims - 2]->extent(),
IrBuilder::create<Val>(0, DataType::Index),
IrBuilder::create<Val>(1, DataType::Index),
DataType::Int);

auto tv_columns = iota(
tv->domain()->logical()[dims - 1]->extent(),
SimplifyingIrBuilder::mulExpr(
offset, IrBuilder::create<Val>(-1, DataType::Index)),
IrBuilder::create<Val>(1, DataType::Index),
DataType::Int);

auto tv_rows_b = broadcast(tv_rows, {false, true});
auto tv_cols_b = broadcast(tv_columns, {true, false});
auto mask = le(tv_rows_b, tv_cols_b);
return where(mask, tv, IrBuilder::create<Val>(0, tv->dtype()));
}

// UNARY OPERATIONS

#define NVFUSER_DEFINE_UNARY_OP(operator_name, operator_type) \
Expand Down
1 change: 1 addition & 0 deletions csrc/ops/arith.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ NVF_API TensorView* arange(
DataType dtype = DataType::Int);
NVF_API TensorView* eye(Val* size, DataType dtype);
NVF_API TensorView* eye(Val* rows, Val* cols, DataType dtype);
NVF_API TensorView* triu(TensorView* tv, Val* offset);

// UNARY OPERATIONS
// abs
Expand Down
29 changes: 29 additions & 0 deletions tests/cpp/test_tensor_factories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,35 @@ TEST_F(TensorFactoryTest, StandaloneIota) {
}
}

TEST_F(TensorFactoryTest, SimpleTriu) {
std::vector<std::vector<int64_t>> input_sizes = {
{64, 64}, {4, 16}, {16, 4}, {16, 8, 32}};
auto offsets = {0, 1, 2, -1, -2, 200, -200};

for (auto input_size : input_sizes) {
for (auto offset : offsets) {
auto fusion = std::make_unique<Fusion>();
FusionGuard fg(fusion.get());

auto tv_to_triu_on =
makeSymbolicTensor(input_size.size(), DataType::Half);
fusion->addInput(tv_to_triu_on);

auto out =
triu(tv_to_triu_on, IrBuilder::create<Val>(offset, DataType::Int));
fusion->addOutput(out);

auto options = at::TensorOptions().dtype(at::kHalf).device(at::kCUDA);
auto in_tensor = at::randn(input_size, options);

FusionExecutorCache executor_cache(std::move(fusion));
auto cg_outputs = executor_cache.runFusionWithInputs({in_tensor});

EXPECT_TRUE(at::equal(cg_outputs[0], at::triu(in_tensor, offset)));
}
}
}

TEST_F(TensorFactoryTest, StandaloneARange) {
auto starts_ends = {-1., 0., 10.3, 1024. * 256};
auto steps = {-1.5, 1., 2.};
Expand Down
Loading