diff --git a/csrc/scheduler/resize.cpp b/csrc/scheduler/resize.cpp index 194087b90e8..43a7195fe20 100644 --- a/csrc/scheduler/resize.cpp +++ b/csrc/scheduler/resize.cpp @@ -71,24 +71,11 @@ bool ResizeScheduler::canScheduleCompileTime(Fusion* fusion) { IdModel id_model(fusion, /*build_graphs=*/false); const auto& broadcast_graph = id_model.buildBroadcastGraph(); - auto resize_based_tensor_ops = ir_utils::getOpsOfType(fusion); - - if (auto non_exclusive_resizes = scheduler_tools::getNonExclusiveResizeInfo( - resize_based_tensor_ops, id_model.idGraph(IdMappingMode::EXACT)); - !non_exclusive_resizes.empty()) { - std::stringstream msg; - msg << "Propagation of resizes would affect fusion outputs."; - for (const auto& [tv, resize_ids] : non_exclusive_resizes) { - msg << " Resize input tv: " << tv->toString() - << ", resize input ID groups: " << nvfuser::toString(resize_ids); - } - scheduler_debug_utils::canScheduleRejectReason(schedulerType(), msg.str()); - return false; - } + auto resize_tensor_ops = ir_utils::getOpsOfType(fusion); // Slicing of or to a broadcast ID is not allowed yet. - for (auto tensor_op : resize_based_tensor_ops) { - TensorView* out_tv = tensor_op->output(0)->as(); + for (auto resize_tensor_op : resize_tensor_ops) { + TensorView* out_tv = resize_tensor_op->output(0)->as(); for (auto logical_id : out_tv->getLogicalDomain()) { Resize* resize = dynamic_cast(logical_id->definition()); if (resize == nullptr) { @@ -133,6 +120,44 @@ bool ResizeScheduler::canScheduleCompileTime(Fusion* fusion) { return false; } + // Having different resizes between outputs is not allowed at this + // moment. For example, consider a fusion like: + // + // t0 = [i0] + // fusion.addInput(t0) + // t1 = t0[:i0/2] + // t2 = t0[i0/2:] + // fusion.addOutput(t1) + // fusion.addOutput(t2) + // + // For now, this is not going to be fused since t1 and t2 have + // different resize ops, although in this case, since the extents of t1 and + // t2 are the same, it should be relatively straightforward to fuse them + // together. + for (auto out_tv : ir_utils::filterByType(fusion->outputs())) { + if (out_tv == ref_tv) { + continue; + } + auto exprs = ValGraphBFS::getExprGroupsBetween( + broadcast_graph, + broadcast_graph.toGroups(ref_tv->getLogicalDomain()), + broadcast_graph.toGroups(out_tv->getLogicalDomain()), + /*require_all_to_visited=*/false) + .first; + for (const auto& [expr_g, dir] : exprs) { + if (expr_g->front()->isA()) { + std::stringstream msg; + msg << "Resize between reference and output not allowed."; + msg << " Reference: " << ref_tv->toString() + << ". Output: " << out_tv->toString() + << ". Resize: " << expr_g->front()->toString(); + scheduler_debug_utils::canScheduleRejectReason( + schedulerType(), msg.str()); + return false; + } + } + } + // Disable the scheduler if there's a squeeze op. The loop option // may also need to be enabled in that case, but that option is not // turned on automatically yet. @@ -163,6 +188,31 @@ void ResizeScheduler::schedule(Fusion* fusion, const HeuristicParams* params) { scheduler_utils::cacheInputs(fusion, true); scheduler_utils::cacheAndForkOutputs(fusion, true); + auto resize_tensor_ops = ir_utils::getOpsOfType(fusion); + + IdModel id_model(fusion, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + + // Replicate resize inputs if necessary to avoid conflicting + // propagations + const auto exclusivity_info_map = scheduler_tools::getNonExclusiveResizeInfo( + resize_tensor_ops, exact_graph); + for (auto resize_tensor_op : resize_tensor_ops) { + auto out_tv = resize_tensor_op->output(0)->as(); + if (exclusivity_info_map.count(out_tv) == 0) { + continue; + } + auto inp_tv = resize_tensor_op->input(0)->as(); + // Since cacheInput may skip caching if an input is used by + // slice/pad, inp_tv may be a fusion input, in which case it is + // not necessary to recompute the tensor. + if (inp_tv->isFusionInput()) { + continue; + } + auto inp_tv_copy = RecomputeTv::recompute(inp_tv); + ir_utils::replaceValInExprInputs(resize_tensor_op, inp_tv, inp_tv_copy); + } + for (auto expr : fusion->exprs()) { if (!expr->isOneOf()) { continue; @@ -186,9 +236,14 @@ void ResizeScheduler::schedule(Fusion* fusion, const HeuristicParams* params) { ref_tv->axis(-1)->parallelize(ParallelType::TIDx); ref_tv->axis(-2)->parallelize(ParallelType::BIDx); - // Propagate the reference to the other tensors + // Propagate the reference to the other tensors. Note that the + // update flag is enabled so to workaround the resize propagation + // issue. This may not work if there's a tensor that is reshaped + // from the reference tensor, but that should not be the case as the + // reference is picked by the same routine used for the pointwise + // scheduler. scheduler_tools::scheduleLoopDomainsLike( - fusion->allTvs(), ref_tv->getLoopDomain()); + fusion->allTvs(), ref_tv->getLoopDomain(), true); inlineMost(); diff --git a/csrc/scheduler/tools/resize_utils.cpp b/csrc/scheduler/tools/resize_utils.cpp index c812812b905..fc8c0cc5f09 100644 --- a/csrc/scheduler/tools/resize_utils.cpp +++ b/csrc/scheduler/tools/resize_utils.cpp @@ -66,13 +66,13 @@ void propagateResizeToInputs(Expr* resize_tensor_op) { } } -std::unordered_map getNonExclusiveResizeInfo( +std::unordered_map getNonExclusiveResizeInfo( const std::vector& ordered_resize_tensor_ops, const ValGraph& exact_graph) { NVF_ERROR(!ordered_resize_tensor_ops.empty()); Fusion* fusion = ordered_resize_tensor_ops[0]->fusion(); - std::unordered_map non_exclusive_resizes; + std::unordered_map non_exclusive_resizes; std::unordered_set inputs{ fusion->inputs().begin(), fusion->inputs().end()}; @@ -98,6 +98,8 @@ std::unordered_map getNonExclusiveResizeInfo( auto inp_tv = dynamic_cast(resize_tensor_op->inputs().at(0)); auto out_tv = dynamic_cast(resize_tensor_op->outputs().at(0)); + ResizeExclusivityInfo info; + ValGroups resize_inp_ids = get_root_to_logical_resizes(out_tv); NVF_ERROR(!resize_inp_ids.empty()); @@ -159,10 +161,15 @@ std::unordered_map getNonExclusiveResizeInfo( } // This resize input ID is not exclusively used - non_exclusive_resizes[inp_tv].pushBack(resize_inp_id); + info.non_exclusive_dep_tvs.push_back(dep_tv); + info.resized_ids.pushBack(resize_inp_id); } } + if (!info.non_exclusive_dep_tvs.empty()) { + NVF_ERROR(non_exclusive_resizes.emplace(out_tv, info).second); + } + // Analysis of exclusiveness until in_tv is done. Following // resize-based tensor ops do not need to check the same section // of the fusion and can start from out_tv. diff --git a/csrc/scheduler/tools/resize_utils.h b/csrc/scheduler/tools/resize_utils.h index 7b19062d6de..b9afed5effa 100644 --- a/csrc/scheduler/tools/resize_utils.h +++ b/csrc/scheduler/tools/resize_utils.h @@ -91,10 +91,26 @@ void propagateResizeToInputs(Expr* resize_op); // long as the first slice is considered non-exclusive. This will be // important when resolving the non-exclusiveness by replication. // -// The function returns a map from tensors that are input to -// non-exclusive ops to their resize input ID groups. This map will be +// The function returns a map from tensors that are outputs to +// non-exclusive ops to ResizeExclusivityInfo. This map will be // used to resolve the non-exclusiveness by replication. -std::unordered_map getNonExclusiveResizeInfo( +struct ResizeExclusivityInfo { + // Dependent tensors that should not be resized + std::vector non_exclusive_dep_tvs; + // ID groups of resize input IDs + ValGroups resized_ids; + + bool operator==(const ResizeExclusivityInfo& other) const { + return non_exclusive_dep_tvs == other.non_exclusive_dep_tvs && + resized_ids == other.resized_ids; + } + + bool operator!=(const ResizeExclusivityInfo& other) const { + return !(*this == other); + } +}; + +std::unordered_map getNonExclusiveResizeInfo( const std::vector& ordered_resize_tensor_ops, const ValGraph& exact_graph); diff --git a/tests/cpp/test_resize.cpp b/tests/cpp/test_resize.cpp index 9c959a2e6a4..587f72143a4 100644 --- a/tests/cpp/test_resize.cpp +++ b/tests/cpp/test_resize.cpp @@ -4483,25 +4483,83 @@ TEST_F(ResizeSchedulerTest, PropagateMultipleSlicesToInputs2) { fusion.addOutput(tv3); fusion.addOutput(tv6); - IdModel id_model(&fusion, /*build_graphs=*/false); - const auto& exact_graph = id_model.buildExactGraph(); + { + IdModel id_model(&fusion, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion), exact_graph); - auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( - ir_utils::getOpsOfType(&fusion), exact_graph); + EXPECT_EQ(non_exclusive_resize_info.size(), 2); - // tv1 is the input of the first slice, which is not exclusive as - // tv1 is also a producer of tv4. - EXPECT_EQ(non_exclusive_resize_info.count(tv1), 1); - EXPECT_EQ( - non_exclusive_resize_info.at(tv1), - exact_graph.toGroups(std::vector{tv1->axis(1)})); + // tv2 is the output of the first slice, which is not exclusive as + // tv1 is also a producer of tv4. + EXPECT_EQ(non_exclusive_resize_info.count(tv2), 1); + scheduler_tools::ResizeExclusivityInfo tv2_info{ + {tv1}, exact_graph.toGroups(std::vector{tv1->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv2), tv2_info); + + // Similary, tv5 is the output of the second slice, which is not exclusive + // as tv1 is also a producer of tv2. + EXPECT_EQ(non_exclusive_resize_info.count(tv5), 1); + scheduler_tools::ResizeExclusivityInfo tv5_info{ + {tv1}, exact_graph.toGroups(std::vector{tv4->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv5), tv5_info); + } - // Similary, tv4 is the input of the second slice, which is not exclusive as - // tv1 is also a producer of tv2. - EXPECT_EQ(non_exclusive_resize_info.count(tv4), 1); - EXPECT_EQ( - non_exclusive_resize_info.at(tv4), - exact_graph.toGroups(std::vector{tv4->axis(1)})); + // Test replication-based mitigation of conflicts + { + Fusion fusion_copy = fusion; + FusionGuard fg(&fusion_copy); + + auto tv0 = fusion_copy.inputs().at(0)->as(); + auto tv2 = + fusion_copy.outputs().at(0)->definition()->input(0)->as(); + auto slice = dynamic_cast(tv2->definition()); + ASSERT_NE(slice, nullptr); + auto tv1 = slice->input(0)->as(); + auto tv5 = + fusion_copy.outputs().at(1)->definition()->input(0)->as(); + auto tv4 = tv5->definition()->input(0)->as(); + + // Replicate tv1 for tv2 + auto private_copy = RecomputeTv::recompute(tv1); + ir_utils::replaceValInExprInputs(slice, tv1, private_copy); + + // The two slices should still be reported as non-exclusive but they + // both are shared at the fusion input. + IdModel id_model(&fusion_copy, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion_copy), exact_graph); + EXPECT_EQ(non_exclusive_resize_info.size(), 2); + EXPECT_EQ(non_exclusive_resize_info.count(tv2), 1); + scheduler_tools::ResizeExclusivityInfo tv2_info{ + {tv0}, exact_graph.toGroups(std::vector{tv0->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv2), tv2_info); + + EXPECT_EQ(non_exclusive_resize_info.count(tv5), 1); + scheduler_tools::ResizeExclusivityInfo tv5_info{ + {tv0}, exact_graph.toGroups(std::vector{tv4->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv5), tv5_info); + } + + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); + auto t0 = at::randn({16, 100}, options); + std::vector inputs({t0}); + + FusionExecutorCache executor_cache(std::move(fusion_ptr)); + auto out_tensors = executor_cache.runFusionWithInputs(inputs); + testValidate( + executor_cache.fusion(), out_tensors, inputs, __LINE__, __FILE__); + + // While the slices can be transformed to be all exclusive, it is + // currently segmented as the output has differet shapes. Both + // segments should be scheduled as resize segments. + FusionKernelRuntime* runtime = executor_cache.getMostRecentKernelRuntime(); + const auto& heuristic_list = runtime->schedulerHeuristics()->heuristicsList(); + EXPECT_EQ(heuristic_list.size(), 2); + EXPECT_EQ(heuristic_list[0]->scheduler_type, SchedulerType::Resize); + EXPECT_EQ(heuristic_list[1]->scheduler_type, SchedulerType::Resize); } // Non-exclusive slice due to a dependency to a fusion output @@ -4542,12 +4600,57 @@ TEST_F(ResizeSchedulerTest, PropagateMultipleSlicesToInputs3) { auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( ir_utils::getOpsOfType(&fusion), exact_graph); - // tv3 is the input of the slice, which is not exclusive as + // tv4 is the input of the slice, which is not exclusive as // tv3 depends on tv2, which is a fusion output - EXPECT_EQ(non_exclusive_resize_info.count(tv3), 1); - EXPECT_EQ( - non_exclusive_resize_info.at(tv3), - exact_graph.toGroups(std::vector{tv3->axis(1)})); + EXPECT_EQ(non_exclusive_resize_info.count(tv4), 1); + scheduler_tools::ResizeExclusivityInfo tv4_info{ + {tv2}, exact_graph.toGroups(std::vector{tv3->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv4), tv4_info); + + // Test replication-based mitigation of conflicts + { + Fusion fusion_copy = fusion; + FusionGuard fg(&fusion_copy); + + auto tv0 = fusion_copy.inputs().at(0)->as(); + auto tv5 = fusion_copy.outputs().at(1)->as(); + auto tv4 = tv5->definition()->input(0)->as(); + auto tv3 = tv4->definition()->input(0)->as(); + + auto private_copy = RecomputeTv::recompute(tv3); + ir_utils::replaceValInExprInputs(tv4->definition(), tv3, private_copy); + + IdModel id_model(&fusion_copy, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion_copy), exact_graph); + EXPECT_EQ(non_exclusive_resize_info.size(), 1); + EXPECT_EQ(non_exclusive_resize_info.count(tv4), 1); + scheduler_tools::ResizeExclusivityInfo tv4_info{ + {tv0}, exact_graph.toGroups(std::vector{tv0->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv4), tv4_info); + } + + GTEST_SKIP() << "Scheduling not yet supported due to broadcast"; + + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); + auto t0 = at::randn({16, 100}, options); + auto t1 = at::randn({16}, options); + std::vector inputs({t0, t1}); + + FusionExecutorCache executor_cache(std::move(fusion_ptr)); + auto out_tensors = executor_cache.runFusionWithInputs(inputs); + testValidate( + executor_cache.fusion(), out_tensors, inputs, __LINE__, __FILE__); + FusionKernelRuntime* runtime = executor_cache.getMostRecentKernelRuntime(); + EXPECT_FALSE(runtime->isSegmented()); + const auto& heuristic_param = + runtime->schedulerHeuristics()->heuristicsList().front(); + EXPECT_EQ(heuristic_param->scheduler_type, SchedulerType::Resize); + Fusion* scheduled_fusion = + dynamic_cast(runtime->executors().at(0).get())->fusion(); + checkLoopDomainEquivalence( + scheduled_fusion->outputs().at(0)->as()); } // Slice input tensor depends on a fusion output, but the slice is @@ -4732,10 +4835,29 @@ TEST_F(ResizeSchedulerTest, PropagateMultipleSlicesToInputs6) { auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( ir_utils::getOpsOfType(&fusion), exact_graph); EXPECT_EQ(non_exclusive_resize_info.size(), 1); - EXPECT_EQ(non_exclusive_resize_info.count(tv1), 1); - EXPECT_EQ( - non_exclusive_resize_info.at(tv1), - exact_graph.toGroups(std::vector{tv1->axis(1)})); + EXPECT_EQ(non_exclusive_resize_info.count(tv2), 1); + scheduler_tools::ResizeExclusivityInfo tv2_info{ + {tv1}, exact_graph.toGroups(std::vector{tv1->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv2), tv2_info); + + // When scheduled, since the shape of the tv4 is different from the + // shape of tv5, this fusion is segmented. One segment is a resize + // segment consisting of tv2 and tv3 slices. Another is a pointwise + // segment for tv5. + FusionExecutorCache executor_cache(std::move(fusion_ptr)); + auto out_tensors = executor_cache.runFusionWithInputs(inputs); + testValidate( + executor_cache.fusion(), out_tensors, inputs, __LINE__, __FILE__); + FusionKernelRuntime* runtime = executor_cache.getMostRecentKernelRuntime(); + const auto& heuristic_list = runtime->schedulerHeuristics()->heuristicsList(); + EXPECT_EQ(heuristic_list.size(), 2); + // They should be a combination of a resize scheduler and a pointwise + // scheduler + EXPECT_TRUE( + (heuristic_list[0]->scheduler_type == SchedulerType::PointWise && + heuristic_list[1]->scheduler_type == SchedulerType::Resize) || + (heuristic_list[0]->scheduler_type == SchedulerType::Resize && + heuristic_list[1]->scheduler_type == SchedulerType::PointWise)); } // RoPE-like rotation patten @@ -4835,19 +4957,17 @@ TEST_P(ResizeSchedulerTest, SliceRotateCat) { const auto& exact_graph = id_model.buildExactGraph(); auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( ir_utils::getOpsOfType(&fusion), exact_graph); - EXPECT_EQ(non_exclusive_resize_info.count(tv1), 1); - EXPECT_EQ( - non_exclusive_resize_info.at(tv1), - exact_graph.toGroups(std::vector{tv1->axis(1)})); - EXPECT_EQ(non_exclusive_resize_info.count(tv3), 1); - EXPECT_EQ( - non_exclusive_resize_info.at(tv3), - exact_graph.toGroups(std::vector{tv3->axis(1)})); + EXPECT_EQ(non_exclusive_resize_info.count(tv2), 1); + scheduler_tools::ResizeExclusivityInfo tv2_info{ + {tv0}, exact_graph.toGroups(std::vector{tv1->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv2), tv2_info); + EXPECT_EQ(non_exclusive_resize_info.count(tv4), 1); + scheduler_tools::ResizeExclusivityInfo tv4_info{ + {tv0}, exact_graph.toGroups(std::vector{tv3->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv4), tv4_info); // These two entries should be all the info map has. EXPECT_EQ(non_exclusive_resize_info.size(), 2); - GTEST_SKIP() << "Scheduling not yet supported"; - FusionExecutorCache executor_cache(std::move(fusion_ptr)); auto out_tensors = executor_cache.runFusionWithInputs(inputs); testValidate( @@ -4975,19 +5095,17 @@ TEST_P(ResizeSchedulerTest, SliceRotateCatResidual) { const auto& exact_graph = id_model.buildExactGraph(); auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( ir_utils::getOpsOfType(&fusion), exact_graph); - EXPECT_EQ(non_exclusive_resize_info.count(tv1), 1); - EXPECT_EQ( - non_exclusive_resize_info.at(tv1), - exact_graph.toGroups(std::vector{tv1->axis(1)})); - EXPECT_EQ(non_exclusive_resize_info.count(tv3), 1); - EXPECT_EQ( - non_exclusive_resize_info.at(tv3), - exact_graph.toGroups(std::vector{tv3->axis(1)})); + EXPECT_EQ(non_exclusive_resize_info.count(tv2), 1); + scheduler_tools::ResizeExclusivityInfo tv2_info{ + {tv0}, exact_graph.toGroups(std::vector{tv1->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv2), tv2_info); + EXPECT_EQ(non_exclusive_resize_info.count(tv4), 1); + scheduler_tools::ResizeExclusivityInfo tv4_info{ + {tv0}, exact_graph.toGroups(std::vector{tv3->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv4), tv4_info); // These two entries should be all the info map has. EXPECT_EQ(non_exclusive_resize_info.size(), 2); - GTEST_SKIP() << "Scheduling not yet supported"; - FusionExecutorCache executor_cache(std::move(fusion_ptr)); auto out_tensors = executor_cache.runFusionWithInputs(inputs); testValidate( @@ -5005,6 +5123,104 @@ TEST_P(ResizeSchedulerTest, SliceRotateCatResidual) { } } +// Rotate twice. Resolving the non-exclusivity must be done in a +// topological order. +TEST_F(ResizeSchedulerTest, SliceRotateCatTwice) { + auto fusion_ptr = std::make_unique(); + Fusion& fusion = *fusion_ptr; + FusionGuard fg(fusion_ptr.get()); + + std::vector shape({-1, 100}); + + EnableOptionsGuard enable_options_guard; + EnableOptionsGuard::getCurOptions().set(EnableOption::IdModel, {"all"}); + + auto tv0 = makeConcreteTensor(shape); + fusion.addInput(tv0); + + auto tv1 = sin(tv0); + + auto tv2 = slice( + tv1, + {{fusion.zeroVal(), tv1->getLogicalDomain().at(0)->extent()}, + {fusion.zeroVal(), IrBuilder::create(shape[1] / 2)}}); + + auto tv3 = slice( + tv1, + {{fusion.zeroVal(), tv1->getLogicalDomain().at(0)->extent()}, + {IrBuilder::create(shape[1] / 2), + IrBuilder::create(shape[1])}}); + + auto tv4 = cat({tv3, tv2}, -1); + + auto tv5 = slice( + tv4, + {{fusion.zeroVal(), tv4->getLogicalDomain().at(0)->extent()}, + {fusion.zeroVal(), IrBuilder::create(shape[1] / 2)}}); + + auto tv6 = slice( + tv4, + {{fusion.zeroVal(), tv4->getLogicalDomain().at(0)->extent()}, + {IrBuilder::create(shape[1] / 2), + IrBuilder::create(shape[1])}}); + + auto tv7 = cat({tv6, tv5}, -1); + + fusion.addOutput(tv7); + + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); + auto t0 = at::randn({16, 100}, options); + std::vector inputs({t0}); + + // tv1 is not considered exclusive as tv0 is also a consumer of + // tv3. Same for tv3. While the common input, tv0, is a fusion + // input, so it isn't actually scheduled, since a cache is + // inserted, which is indeed scheduled, the two slices do + // conflict. + IdModel id_model(&fusion, /*build_graphs=*/false); + const auto& exact_graph = id_model.buildExactGraph(); + auto non_exclusive_resize_info = scheduler_tools::getNonExclusiveResizeInfo( + ir_utils::getOpsOfType(&fusion), exact_graph); + + // tv2 + EXPECT_EQ(non_exclusive_resize_info.count(tv2), 1); + scheduler_tools::ResizeExclusivityInfo tv2_info{ + {tv1}, exact_graph.toGroups(std::vector{tv1->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv2), tv2_info); + + // tv3 + EXPECT_EQ(non_exclusive_resize_info.count(tv3), 1); + scheduler_tools::ResizeExclusivityInfo tv3_info{ + {tv1}, exact_graph.toGroups(std::vector{tv1->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv3), tv3_info); + + // tv5 + EXPECT_EQ(non_exclusive_resize_info.count(tv5), 1); + scheduler_tools::ResizeExclusivityInfo tv5_info{ + {tv4}, exact_graph.toGroups(std::vector{tv4->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv5), tv5_info); + + // tv6 + EXPECT_EQ(non_exclusive_resize_info.count(tv6), 1); + scheduler_tools::ResizeExclusivityInfo tv6_info{ + {tv4}, exact_graph.toGroups(std::vector{tv4->axis(1)})}; + EXPECT_EQ(non_exclusive_resize_info.at(tv6), tv6_info); + + // These should be all the info the map has. + EXPECT_EQ(non_exclusive_resize_info.size(), 4); + + FusionExecutorCache executor_cache(std::move(fusion_ptr)); + auto out_tensors = executor_cache.runFusionWithInputs(inputs); + testValidate( + executor_cache.fusion(), out_tensors, inputs, __LINE__, __FILE__); + + FusionKernelRuntime* runtime = executor_cache.getMostRecentKernelRuntime(); + EXPECT_FALSE(runtime->isSegmented()); + const auto& heuristic_param = + runtime->schedulerHeuristics()->heuristicsList().front(); + EXPECT_EQ(heuristic_param->scheduler_type, SchedulerType::Resize); +} + // Consumer-based scheduling of pad TEST_P(ResizeSchedulerTest, PropagatePadToInputs) { auto fusion_ptr = std::make_unique();