Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions include/cutlass/gemm/kernel/sm100_tile_scheduler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,11 @@ class PersistentTileSchedulerSm100 {
return clc_pipe_producer_state;
}

CUTLASS_DEVICE
bool is_last_tile(WorkTileInfo& work_tile_info, uint32_t advance_count = 1) const {
return false;
}

// Kernel helper function to get next work tile
template <class TileSchedulerPipeline, class TileSchedulerPipelineState>
CUTLASS_HOST_DEVICE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,12 @@ class PersistentTileSchedulerSm100StreamK {
PipelineState<Stages>
advance_to_next_work(Pipeline& clc_pipeline, PipelineState<Stages> clc_pipe_producer_state) const {
return sm100_scheduler_.advance_to_next_work(clc_pipeline, clc_pipe_producer_state);
}
}

CUTLASS_DEVICE
bool is_last_tile(WorkTileInfo work_tile_info, uint32_t advance_count = 1) const {
return false;
}

// Given the inputs, computes the total number of output blocks this problem will compute over
template<class ProblemShape>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -804,15 +804,14 @@ class GemmUniversal<
// Update starting mainloop pipeline state for the next tile
mainloop_pipe_consumer_state.advance(work_k_tile_count);
}
#ifdef CUTLASS_ENABLE_GDC_FOR_SM90

if (scheduler.is_last_tile(work_tile_info)) {
// Hint on an early release of global memory resources.
// The timing of calling this function only influences performance,
// not functional correctness.
cutlass::arch::launch_dependent_grids();

}
#endif

// Index of warp group within consumer warp groups
int consumer_warp_group_idx = canonical_warp_group_idx() - NumLoadWarpGroups;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -868,15 +868,13 @@ class GemmUniversal<
// Update starting mainloop pipeline state for the next tile
mainloop_pipe_consumer_state.advance(k_tile_count * NumMmaWarpGroups);

#ifdef CUTLASS_ENABLE_GDC_FOR_SM90
if (scheduler.is_last_tile(work_tile_info, NumMmaWarpGroups)) {
// Hint on an early release of global memory resources.
// The timing of calling this function only influences performance,
// not functional correctness.
cutlass::arch::launch_dependent_grids();

}
#endif

// Order two Math WG's Epilogue one after the other
math_wg_order_barrier.wait();
Expand Down