From e72f9e55b8e5f339ed03c3b7fe8e69aad60a741a Mon Sep 17 00:00:00 2001 From: Naoya Maruyama Date: Thu, 19 Dec 2024 20:19:09 -0800 Subject: [PATCH] Allow fusion of multiple exclusive resize ops (#3611) Followup to #3556. Currently, the resize scheduler is only allowed with a single slice or pad. This PR allows for fusing multiple ops as long as they don't conflict. Please see the [comment](https://github.com/NVIDIA/Fuser/pull/3611/files#diff-b066c49d399243d3be36a44f1221490b9a2f50e41074feab836bc9bb6ee71180R25-R100) for `getNonExclusiveResizeInfo`. In this PR, if there's a conflict, the fusion is simply rejected. A followup PR will address this limitation by replicating computations. --- csrc/scheduler/resize.cpp | 41 +-- csrc/scheduler/tools/resize_utils.cpp | 106 ++++++++ csrc/scheduler/tools/resize_utils.h | 79 ++++++ tests/cpp/test_gpu3.cpp | 2 - tests/cpp/test_resize.cpp | 361 +++++++++++++++++++++++++- 5 files changed, 543 insertions(+), 46 deletions(-) diff --git a/csrc/scheduler/resize.cpp b/csrc/scheduler/resize.cpp index fc96bd3db67..194087b90e8 100644 --- a/csrc/scheduler/resize.cpp +++ b/csrc/scheduler/resize.cpp @@ -71,40 +71,19 @@ bool ResizeScheduler::canScheduleCompileTime(Fusion* fusion) { IdModel id_model(fusion, /*build_graphs=*/false); const auto& broadcast_graph = id_model.buildBroadcastGraph(); - // For now, only a single resize op is allowed to exist. auto resize_based_tensor_ops = ir_utils::getOpsOfType(fusion); - if (resize_based_tensor_ops.size() != 1) { - scheduler_debug_utils::canScheduleRejectReason( - schedulerType(), "Only a single resize op is allowed."); - return false; - } - auto resize_out_tv = - resize_based_tensor_ops.at(0)->output(0)->as(); - - auto all_dep_vals = DependencyCheck::getAllValsBetween( - {fusion->inputs().begin(), fusion->inputs().end()}, {resize_out_tv}); - for (auto tv : ir_utils::filterByType(all_dep_vals)) { - if (tv == resize_out_tv) { - continue; - } - if (tv->isFusionOutput()) { - scheduler_debug_utils::canScheduleRejectReason( - schedulerType(), - "Dependency to fusion output not allowed: ", - tv->toString()); - return false; - } - for (auto consumer_of_tv : ir_utils::consumerTvsOf(tv)) { - if (std::find(all_dep_vals.begin(), all_dep_vals.end(), consumer_of_tv) == - all_dep_vals.end()) { - scheduler_debug_utils::canScheduleRejectReason( - schedulerType(), - "Resize inputs must be exclusively consumed by resize: ", - consumer_of_tv->toString()); - return false; - } + if (auto non_exclusive_resizes = scheduler_tools::getNonExclusiveResizeInfo( + resize_based_tensor_ops, id_model.idGraph(IdMappingMode::EXACT)); + !non_exclusive_resizes.empty()) { + std::stringstream msg; + msg << "Propagation of resizes would affect fusion outputs."; + for (const auto& [tv, resize_ids] : non_exclusive_resizes) { + msg << " Resize input tv: " << tv->toString() + << ", resize input ID groups: " << nvfuser::toString(resize_ids); } + scheduler_debug_utils::canScheduleRejectReason(schedulerType(), msg.str()); + return false; } // Slicing of or to a broadcast ID is not allowed yet. diff --git a/csrc/scheduler/tools/resize_utils.cpp b/csrc/scheduler/tools/resize_utils.cpp index cc914e5684b..c812812b905 100644 --- a/csrc/scheduler/tools/resize_utils.cpp +++ b/csrc/scheduler/tools/resize_utils.cpp @@ -66,5 +66,111 @@ void propagateResizeToInputs(Expr* resize_tensor_op) { } } +std::unordered_map getNonExclusiveResizeInfo( + const std::vector& ordered_resize_tensor_ops, + const ValGraph& exact_graph) { + NVF_ERROR(!ordered_resize_tensor_ops.empty()); + Fusion* fusion = ordered_resize_tensor_ops[0]->fusion(); + + std::unordered_map non_exclusive_resizes; + + std::unordered_set inputs{ + fusion->inputs().begin(), fusion->inputs().end()}; + + auto get_root_to_logical_resizes = + [&exact_graph](TensorView* tv) -> ValGroups { + // This should be only used for outputs of resize-based ops, + // so it should always have a root domain. + NVF_ERROR(tv->hasRoot()); + auto out_tv_root_to_logical_exprs = DependencyCheck::getAllExprsBetween( + {tv->getRootDomain().begin(), tv->getRootDomain().end()}, + {tv->getLogicalDomain().begin(), tv->getLogicalDomain().end()}); + ValGroups resize_inp_ids; + for (auto resize : + ir_utils::filterByType(out_tv_root_to_logical_exprs)) { + resize_inp_ids.pushBack(exact_graph.toGroup(resize->in())); + } + return resize_inp_ids; + }; + + // Traverse the ops in a topological order + for (Expr* resize_tensor_op : ordered_resize_tensor_ops) { + auto inp_tv = dynamic_cast(resize_tensor_op->inputs().at(0)); + auto out_tv = dynamic_cast(resize_tensor_op->outputs().at(0)); + + ValGroups resize_inp_ids = get_root_to_logical_resizes(out_tv); + NVF_ERROR(!resize_inp_ids.empty()); + + auto dep_vals = + DependencyCheck::getAllValsBetween(inputs, std::vector{inp_tv}); + + // For each tensor that inp_tv depends on, check if the resize op + // is considered non-exclusive with respect to the tensor. That + // is, if propagation of the resize may result in externally + // visible changes through the tensor, the resize is considered + // non-exclusive. + for (auto dep_tv : ir_utils::filterByType(dep_vals)) { + bool maybe_non_exclusive = dep_tv->isFusionOutput(); + + if (!maybe_non_exclusive) { + // If a dependent tv has a consumer that inp_tv does not + // depend on, propagation of resize would escape to outputs, + // which needs to be avoided. + for (auto consumer_tv : ir_utils::consumerTvsOf(dep_tv)) { + // We are interested in if resized IDs are used by other tensors + // than out_tv + if (consumer_tv != out_tv && + std::find(dep_vals.begin(), dep_vals.end(), consumer_tv) == + dep_vals.end()) { + maybe_non_exclusive = true; + break; + } + } + } + + if (!maybe_non_exclusive) { + continue; + } + + // dep_tv potentially is either a fusion output or it has a + // consumer outside of the dependency set to the resized + // tensor. Propagating the resize to dep_tv should be + // avoided. However, if the dep_tv iter domain that corresponds + // to the resized ID is a broadcast or there's no such ID, it + // should still be safe to consider the resize op exclusive as + // there's no iter domain to resize. For a concrete example, see + // ResizeSchedulerTest.PropagateMultipleSlicesToInputs4. + const auto inp_tv_logical_groups = + exact_graph.toGroups(inp_tv->getLogicalDomain()); + const auto dep_tv_logical_groups = + exact_graph.toGroups(dep_tv->getLogicalDomain()); + auto vals_between = getValsBetween( + {inp_tv_logical_groups.begin(), inp_tv_logical_groups.end()}, + {dep_tv_logical_groups.begin(), dep_tv_logical_groups.end()}, + exact_graph); + + for (const ValGroup& resize_inp_id : resize_inp_ids) { + if (std::find( + vals_between.begin(), vals_between.end(), resize_inp_id) == + vals_between.end()) { + // This resize can be ignored as there's no corresponding ID + // in the dep tv + continue; + } + + // This resize input ID is not exclusively used + non_exclusive_resizes[inp_tv].pushBack(resize_inp_id); + } + } + + // Analysis of exclusiveness until in_tv is done. Following + // resize-based tensor ops do not need to check the same section + // of the fusion and can start from out_tv. + inputs.insert(out_tv); + } + + return non_exclusive_resizes; +} + } // namespace scheduler_tools } // namespace nvfuser diff --git a/csrc/scheduler/tools/resize_utils.h b/csrc/scheduler/tools/resize_utils.h index cf03083ad4f..7b19062d6de 100644 --- a/csrc/scheduler/tools/resize_utils.h +++ b/csrc/scheduler/tools/resize_utils.h @@ -7,9 +7,12 @@ // clang-format on #pragma once +#include + namespace nvfuser { class Expr; +class TensorView; namespace scheduler_tools { @@ -19,5 +22,81 @@ namespace scheduler_tools { // fusion inputs are skipped as their loop domains don't matter. void propagateResizeToInputs(Expr* resize_op); +// Given a topologically ordered list of resize-based tensor ops such +// as slice and pad, check if they can be propagated to fusion inputs +// exclusively without causing any visible side effect. For example, +// if a tensor is sliced and also is used to produce an output without +// the slicing, the slice is considered non exclusive as the slice +// input has the other visible consumer. Propagating the resize of the +// slice to the slice input is invalid since the output computed from +// the slice input depends on the full iteration space. +// +// For example, consider the following case: +// +// t0 = makeSymbolicTensor(1) +// fusion.addInput(t0) +// t1 = t0 + 1 +// t2 = t1[1:10] +// t3 = t1 + 1 +// fusion.addOutput(t2) +// fusion.addOutput(t3) +// +// In this case, propating the resize op of the slice would alter t1, +// which would in turn affect t3, which is a fusion output. Since the +// change would be visible due to the change of t3, this resize op is +// considered non-exclusive. +// +// Consider a slightly different case as shown below: +// +// t0 = makeSymbolicTensor(1) +// fusion.addInput(t0) +// t1 = t0[1:10] +// t2 = t0 + 1 +// fusion.addOutput(t1) +// fusion.addOutput(t2) +// +// Note that the slice is directly done with the fusion input. Since +// we do not propagate resize ops to fusion inputs, this can be +// considered exclusive. However, this is also considered +// non-exclusive since the actual scheduling inserts a cache after t0, +// which can cause a visible side effect if the resize is propagated. +// +// Another non-exclusivness comes from dependent fusion outputs. For +// example, if a slice input depends on a fusion output, propagation +// would alter the fusion output. Consider a case like: +// +// t0 = makeSymbolicTensor(1) +// fusion.addInput(t0) +// t1 = t0 + 1 +// t2 = t1[1:10] // slice +// fusion.addOutput(t1) +// fusion.addOutput(t2) +// +// If the resize op for the slice is propagated to t1, only the +// section of [1:10] would be computed. Since that would change a +// fusion output, the resize op is considered non-exclusive. +// +// When there's a chain of resize-based ops, for example: +// +// t0 = makeSymbolicTensor(1) +// fusion.addInput(t0) +// t1 = t0 + 1 +// t2 = t1[1:10] +// t3 = t2[2:5] +// t4 = t1 + 1 +// fusion.addOutput(t3) +// fusion.addOutput(t4) +// +// We do not consider the second slice as non-exclusive as +// long as the first slice is considered non-exclusive. This will be +// important when resolving the non-exclusiveness by replication. +// +// The function returns a map from tensors that are input to +// non-exclusive ops to their resize input ID groups. This map will be +// used to resolve the non-exclusiveness by replication. +std::unordered_map getNonExclusiveResizeInfo( + const std::vector& ordered_resize_tensor_ops, + const ValGraph& exact_graph); + } // namespace scheduler_tools } // namespace nvfuser diff --git a/tests/cpp/test_gpu3.cpp b/tests/cpp/test_gpu3.cpp index 66087eab2f5..76d45f6de4c 100644 --- a/tests/cpp/test_gpu3.cpp +++ b/tests/cpp/test_gpu3.cpp @@ -9249,8 +9249,6 @@ TEST_F(NVFuserTest, AllIdsMultipleDependencies) { tv1->split(0, 4); tv1->split(0, 8); - fusion.print(); - auto all_ids = tv1->domain()->allIDs(); auto split2 = tv1->axis(0)->definition()->as(); diff --git a/tests/cpp/test_resize.cpp b/tests/cpp/test_resize.cpp index 4db2c141dd7..beffa0fcf98 100644 --- a/tests/cpp/test_resize.cpp +++ b/tests/cpp/test_resize.cpp @@ -4282,7 +4282,7 @@ TEST_P(ResizeSchedulerTest, PropagateSliceToInputsWithReshape2) { } } -TEST_P(ResizeSchedulerTest, PropagateMultipleSlicesToInputs) { +TEST_P(ResizeSchedulerTest, PropagateMultipleSlicesToInputs1) { auto fusion_ptr = std::make_unique(); Fusion& fusion = *fusion_ptr; FusionGuard fg(fusion_ptr.get()); @@ -4368,7 +4368,12 @@ TEST_P(ResizeSchedulerTest, PropagateMultipleSlicesToInputs) { auto outputs = ke.run(inputs); testValidate(&fusion, outputs, inputs, __LINE__, __FILE__); } else { - GTEST_SKIP() << "Scheduling not yet supported"; + // Make sure all slices are detected as exclusive + IdModel id_model(&fusion, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion), exact_graph); + EXPECT_TRUE(non_exclusive_resize_info.empty()); FusionExecutorCache executor_cache(std::move(fusion_ptr)); auto out_tensors = executor_cache.runFusionWithInputs(inputs); @@ -4387,6 +4392,296 @@ TEST_P(ResizeSchedulerTest, PropagateMultipleSlicesToInputs) { } } +// Two horizontal slices, both of which slice the same iter domain. +TEST_F(ResizeSchedulerTest, PropagateMultipleSlicesToInputs2) { + auto fusion_ptr = std::make_unique(); + Fusion& fusion = *fusion_ptr; + FusionGuard fg(fusion_ptr.get()); + + std::vector shape({-1, 100}); + + EnableOptionsGuard enable_options_guard; + EnableOptionsGuard::getCurOptions().set(EnableOption::IdModel, {"all"}); + + auto tv0 = makeConcreteTensor(shape); + fusion.addInput(tv0); + + auto tv1 = sin(tv0); + + auto tv2 = slice( + tv1, + {{fusion.zeroVal(), tv1->getLogicalDomain().at(0)->extent()}, + {IrBuilder::create(1L), tv1->getLogicalDomain().at(1)->extent()}}); + + auto tv3 = sin(tv2); + + auto tv4 = sin(tv1); + + auto tv5 = slice( + tv4, + {{fusion.zeroVal(), tv1->getLogicalDomain().at(0)->extent()}, + {IrBuilder::create(2L), tv1->getLogicalDomain().at(1)->extent()}}); + + auto tv6 = sin(tv5); + + fusion.addOutput(tv3); + fusion.addOutput(tv6); + + IdModel id_model(&fusion, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion), exact_graph); + + // tv1 is the input of the first slice, which is not exclusive as + // tv1 is also a producer of tv4. + EXPECT_EQ(non_exclusive_resize_info.count(tv1), 1); + EXPECT_EQ( + non_exclusive_resize_info.at(tv1), + exact_graph.toGroups(std::vector{tv1->axis(1)})); + + // Similary, tv4 is the input of the second slice, which is not exclusive as + // tv1 is also a producer of tv2. + EXPECT_EQ(non_exclusive_resize_info.count(tv4), 1); + EXPECT_EQ( + non_exclusive_resize_info.at(tv4), + exact_graph.toGroups(std::vector{tv4->axis(1)})); +} + +// Non-exclusive slice due to a dependency to a fusion output +TEST_F(ResizeSchedulerTest, PropagateMultipleSlicesToInputs3) { + auto fusion_ptr = std::make_unique(); + Fusion& fusion = *fusion_ptr; + FusionGuard fg(fusion_ptr.get()); + + std::vector shape({-1, 100}); + + EnableOptionsGuard enable_options_guard; + EnableOptionsGuard::getCurOptions().set(EnableOption::IdModel, {"all"}); + + auto tv0 = makeConcreteTensor(shape); + fusion.addInput(tv0); + + auto tv1 = makeConcreteTensor({-1}); + fusion.addInput(tv1); + + auto tv2 = sin(tv0); + + fusion.addOutput(tv2); + + auto tv3 = add(tv2, broadcast(tv1, {false, true})); + + auto tv4 = slice( + tv3, + {{fusion.zeroVal(), tv3->getLogicalDomain().at(0)->extent()}, + {IrBuilder::create(1L), tv3->getLogicalDomain().at(1)->extent()}}); + + auto tv5 = sin(tv4); + + fusion.addOutput(tv5); + + IdModel id_model(&fusion, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion), exact_graph); + + // tv3 is the input of the slice, which is not exclusive as + // tv3 depends on tv2, which is a fusion output + EXPECT_EQ(non_exclusive_resize_info.count(tv3), 1); + EXPECT_EQ( + non_exclusive_resize_info.at(tv3), + exact_graph.toGroups(std::vector{tv3->axis(1)})); +} + +// Slice input tensor depends on a fusion output, but the slice is +// still considered exclusive as the fusion output has no +// corresponding ID for the sliced ID. More specifically, tv2 is a +// fusion output and has a dependency to the input of the +// slice. However, the resize is done for the second axis of tv3, +// for which tv2 has no corresponding ID. In this case, it should be +// safe to do the propagation of the resize. +// +// Note that scheduling is not yet supported due to the existence of +// the dependency from the slice input ID to the broadcast ID. +TEST_F(ResizeSchedulerTest, PropagateMultipleSlicesToInputs4) { + auto fusion_ptr = std::make_unique(); + Fusion& fusion = *fusion_ptr; + FusionGuard fg(fusion_ptr.get()); + + std::vector shape({-1, 100}); + + auto tv0 = makeConcreteTensor(shape); + fusion.addInput(tv0); + + auto tv1 = makeConcreteTensor({shape[0]}); + fusion.addInput(tv1); + + auto tv2 = sin(tv1); + + fusion.addOutput(tv2); + + auto tv3 = add(tv0, broadcast(tv2, {false, true})); + + auto tv4 = slice( + tv3, + {{fusion.zeroVal(), tv3->getLogicalDomain().at(0)->extent()}, + {IrBuilder::create(1L), tv3->getLogicalDomain().at(1)->extent()}}); + + auto tv5 = sin(tv4); + + fusion.addOutput(tv5); + + IdModel id_model(&fusion, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion), exact_graph); + + EXPECT_TRUE(non_exclusive_resize_info.empty()); +} + +// Testing chained slices. Should be considered exclusive +TEST_P(ResizeSchedulerTest, PropagateMultipleSlicesToInputs5) { + auto fusion_ptr = std::make_unique(); + Fusion& fusion = *fusion_ptr; + FusionGuard fg(fusion_ptr.get()); + + std::vector shape({-1, 100}); + + auto tv0 = makeConcreteTensor(shape); + fusion.addInput(tv0); + + auto tv1 = sin(tv0); + + auto tv2 = slice( + tv1, + {{fusion.zeroVal(), tv1->getLogicalDomain().at(0)->extent()}, + {IrBuilder::create(1L), tv1->getLogicalDomain().at(1)->extent()}}); + + auto tv3 = slice( + tv2, + {{fusion.zeroVal(), tv2->getLogicalDomain().at(0)->extent()}, + {IrBuilder::create(3L), tv2->getLogicalDomain().at(1)->extent()}}); + + auto tv4 = sin(tv3); + + fusion.addOutput(tv4); + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); + auto t0 = at::randn({16, 100}, options); + std::vector inputs({t0}); + + const bool use_scheduler = GetParam(); + + if (!use_scheduler) { + scheduler_tools::propagateResizeToInputs(tv2->definition()); + scheduler_tools::propagateResizeToInputs(tv3->definition()); + auto ref_tv = tv4; + + // Fusion should have a uniform loop domain + checkLoopDomainEquivalence(ref_tv); + + // Schedule the reference + ref_tv->flatten(); + // For TIDx + ref_tv->split(0, 128); + // For BIDx + ref_tv->split(0, 4); + + scheduler_tools::scheduleLoopDomainsLike( + fusion.allTvs(), ref_tv->getLoopDomain()); + + // Fusion should still have a uniform loop domain + checkLoopDomainEquivalence(ref_tv); + + inlineMost(); + + // All tensors, except for fusion inputs, should be fully inlined + for (auto tv : fusion.allTvs()) { + if (tv->isFusionInput()) { + continue; + } + EXPECT_EQ(tv->getComputeAtPosition(), tv->nDims()); + } + + ref_tv->axis(-1)->parallelize(ParallelType::TIDx); + ref_tv->axis(-2)->parallelize(ParallelType::BIDx); + + KernelExecutor ke; + ke.compile(&fusion, inputs); + auto outputs = ke.run(inputs); + testValidate(&fusion, outputs, inputs, __LINE__, __FILE__); + } else { + // The two slices do not conflict + IdModel id_model(&fusion, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion), exact_graph); + EXPECT_TRUE(non_exclusive_resize_info.empty()); + + FusionExecutorCache executor_cache(std::move(fusion_ptr)); + auto out_tensors = executor_cache.runFusionWithInputs(inputs); + testValidate( + executor_cache.fusion(), out_tensors, inputs, __LINE__, __FILE__); + FusionKernelRuntime* runtime = executor_cache.getMostRecentKernelRuntime(); + EXPECT_FALSE(runtime->isSegmented()); + const auto& heuristic_param = + runtime->schedulerHeuristics()->heuristicsList().front(); + EXPECT_EQ(heuristic_param->scheduler_type, SchedulerType::Resize); + Fusion* scheduled_fusion = + dynamic_cast(runtime->executors().at(0).get()) + ->fusion(); + checkLoopDomainEquivalence( + scheduled_fusion->outputs().at(0)->as()); + } +} + +// Testing chained slices. The first slice is considered +// non-exclusive, but the following slice should not. +TEST_F(ResizeSchedulerTest, PropagateMultipleSlicesToInputs6) { + auto fusion_ptr = std::make_unique(); + Fusion& fusion = *fusion_ptr; + FusionGuard fg(fusion_ptr.get()); + + std::vector shape({-1, 100}); + + auto tv0 = makeConcreteTensor(shape); + fusion.addInput(tv0); + + auto tv1 = sin(tv0); + + auto tv2 = slice( + tv1, + {{fusion.zeroVal(), tv1->getLogicalDomain().at(0)->extent()}, + {IrBuilder::create(1L), tv1->getLogicalDomain().at(1)->extent()}}); + + auto tv3 = slice( + tv2, + {{fusion.zeroVal(), tv2->getLogicalDomain().at(0)->extent()}, + {IrBuilder::create(3L), tv2->getLogicalDomain().at(1)->extent()}}); + + auto tv4 = sin(tv3); + fusion.addOutput(tv4); + + auto tv5 = sin(tv1); + fusion.addOutput(tv5); + + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); + auto t0 = at::randn({16, 100}, options); + std::vector inputs({t0}); + + // The two slices do not conflict + IdModel id_model(&fusion, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion), exact_graph); + EXPECT_EQ(non_exclusive_resize_info.size(), 1); + EXPECT_EQ(non_exclusive_resize_info.count(tv1), 1); + EXPECT_EQ( + non_exclusive_resize_info.at(tv1), + exact_graph.toGroups(std::vector{tv1->axis(1)})); +} + // RoPE-like rotation patten TEST_P(ResizeSchedulerTest, SliceRotateCat) { auto fusion_ptr = std::make_unique(); @@ -4451,16 +4746,6 @@ TEST_P(ResizeSchedulerTest, SliceRotateCat) { // For BIDx ref_tv->split(0, 4); - { - IdModel id_model(&fusion, false); - id_model.buildExactGraph(); - std::ofstream ofs("exact_graph.dot", std::ofstream::trunc); - auto dot_string = - id_model.idGraph(IdMappingMode::EXACT).toGraphvizDotGraph(); - ofs << dot_string; - ofs.close(); - } - scheduler_tools::scheduleLoopDomainsLike( fusion.allTvs(), ref_tv->getLoopDomain(), /*update_mode=*/true); @@ -4485,6 +4770,26 @@ TEST_P(ResizeSchedulerTest, SliceRotateCat) { auto outputs = ke.run(inputs); testValidate(&fusion, outputs, inputs, __LINE__, __FILE__); } else { + // tv1 is not considered exclusive as tv0 is also a consumer of + // tv3. Same for tv3. While the common input, tv0, is a fusion + // input, so it isn't actually scheduled, since a cache is + // inserted, which is indeed scheduled, the two slices do + // conflict. + IdModel id_model(&fusion, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion), exact_graph); + EXPECT_EQ(non_exclusive_resize_info.count(tv1), 1); + EXPECT_EQ( + non_exclusive_resize_info.at(tv1), + exact_graph.toGroups(std::vector{tv1->axis(1)})); + EXPECT_EQ(non_exclusive_resize_info.count(tv3), 1); + EXPECT_EQ( + non_exclusive_resize_info.at(tv3), + exact_graph.toGroups(std::vector{tv3->axis(1)})); + // These two entries should be all the info map has. + EXPECT_EQ(non_exclusive_resize_info.size(), 2); + GTEST_SKIP() << "Scheduling not yet supported"; FusionExecutorCache executor_cache(std::move(fusion_ptr)); @@ -4605,6 +4910,26 @@ TEST_P(ResizeSchedulerTest, SliceRotateCatResidual) { auto outputs = ke.run(inputs); testValidate(&fusion, outputs, inputs, __LINE__, __FILE__); } else { + // tv1 is not considered exclusive as tv0 is also a consumer of + // tv3. Same for tv3. While the common input, tv0, is a fusion + // input, so it isn't actually scheduled, since a cache is + // inserted, which is indeed scheduled, the two slices do + // conflict. + IdModel id_model(&fusion, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion), exact_graph); + EXPECT_EQ(non_exclusive_resize_info.count(tv1), 1); + EXPECT_EQ( + non_exclusive_resize_info.at(tv1), + exact_graph.toGroups(std::vector{tv1->axis(1)})); + EXPECT_EQ(non_exclusive_resize_info.count(tv3), 1); + EXPECT_EQ( + non_exclusive_resize_info.at(tv3), + exact_graph.toGroups(std::vector{tv3->axis(1)})); + // These two entries should be all the info map has. + EXPECT_EQ(non_exclusive_resize_info.size(), 2); + GTEST_SKIP() << "Scheduling not yet supported"; FusionExecutorCache executor_cache(std::move(fusion_ptr)); @@ -4691,6 +5016,12 @@ TEST_P(ResizeSchedulerTest, PropagatePadToInputs) { auto outputs = ke.run(inputs); testValidate(&fusion, outputs, inputs, __LINE__, __FILE__); } else { + IdModel id_model(&fusion, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion), exact_graph); + EXPECT_TRUE(non_exclusive_resize_info.empty()); + FusionExecutorCache executor_cache(std::move(fusion_ptr)); auto out_tensors = executor_cache.runFusionWithInputs(inputs); testValidate( @@ -4787,7 +5118,11 @@ TEST_P(ResizeSchedulerTest, PropagateCatToInputs) { auto outputs = ke.run(inputs); testValidate(&fusion, outputs, inputs, __LINE__, __FILE__); } else { - GTEST_SKIP() << "Scheduling not yet supported"; + IdModel id_model(&fusion, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion), exact_graph); + EXPECT_TRUE(non_exclusive_resize_info.empty()); FusionExecutorCache executor_cache(std::move(fusion_ptr)); auto out_tensors = executor_cache.runFusionWithInputs(inputs);