Skip to content

Commit

Permalink
make blocksync compatible with warp specialization
Browse files Browse the repository at this point in the history
  • Loading branch information
rdspring1 committed Dec 20, 2024
1 parent 99f492a commit 0c784e0
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions csrc/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3325,10 +3325,16 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
// Use a custom synchronization method if enabled
if (getNvFuserEnv("USE_BLOCK_SYNC_ATOMIC")) {
indent() << "block_sync::sync();\n";
} else if (isAligned()) {
indent() << "__syncthreads();\n";
} else {
indent() << "__barrier_sync(0);\n";
ArgumentBuilder sync_call_template_parms;
sync_call_template_parms.arg(isAligned());

ArgumentBuilder sync_call_args;
sync_call_args.arg(genComputeBlockDim());

auto sync_call =
genCall("block_sync::sync", sync_call_template_parms, sync_call_args);
indent() << sync_call << ";\n";
}
}

Expand Down

0 comments on commit 0c784e0

Please sign in to comment.