File tree 3 files changed +6
-4
lines changed
3 files changed +6
-4
lines changed Original file line number Diff line number Diff line change @@ -601,7 +601,7 @@ class AllocationInserter : public kir::ExprMutator {
601
601
// generic-async proxy fence and wgmma fence before each mma
602
602
// instruction. For this case, we need to insert these fences
603
603
// after the initialization of the accumulator, so that the
604
- // inilization is visible to the async proxy.
604
+ // initialization is visible to the async proxy.
605
605
// When all inputs are guarded by mbarrier, we will insert these
606
606
// fences before each mma instruction, so there is no need to
607
607
// insert them after the initialization of the accumulator here.
Original file line number Diff line number Diff line change @@ -782,7 +782,7 @@ class ReadAfterWriteSyncs : public kir::ExprMutator {
782
782
}
783
783
};
784
784
785
- // Insert wait expressions for WAR harzard for async operations such as wgmma
785
+ // Insert wait expressions for WAR hazard for async operations such as wgmma
786
786
// and tma store. To do so, we find the structure like the following example:
787
787
// for 1
788
788
// for 2
Original file line number Diff line number Diff line change @@ -633,15 +633,17 @@ void HopperMultipleMatmulScheduler::setUpCircularBuffering() {
633
633
/* prefetch_distance=*/
634
634
params_->circular_buffer_options .smem_circular_buffer_stage -
635
635
params_->circular_buffer_options
636
- .smem_circular_buffer_prefetch_gap );
636
+ .smem_circular_buffer_prefetch_gap ,
637
+ WarpSpecialized (ParallelType::TIDy));
637
638
}
638
639
for (TensorView* bcw_smem : bcw_smems_) {
639
640
bcw_smem->circularBuffer (
640
641
params_->circular_buffer_options .smem_circular_buffer_stage ,
641
642
/* prefetch_distance=*/
642
643
params_->circular_buffer_options .smem_circular_buffer_stage -
643
644
params_->circular_buffer_options
644
- .smem_circular_buffer_prefetch_gap );
645
+ .smem_circular_buffer_prefetch_gap ,
646
+ WarpSpecialized (ParallelType::TIDy));
645
647
}
646
648
}
647
649
You can’t perform that action at this time.
0 commit comments