Skip to content

Commit 39aec16

Browse files
authored
Use tcgen05 as namespace for TMem ld/st (NVIDIA#4279)
For better consistency
1 parent bb5b38c commit 39aec16

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

csrc/kernel_ir.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ std::string Asm::utility() const {
457457
std::regex ld_pattern(R"(tcgen05\.ld\.sync\.aligned\.([^.]+)\.x\d+\.b32)");
458458
std::smatch match;
459459
if (std::regex_match(code, match, ld_pattern)) {
460-
std::string result = "tmem::load";
460+
std::string result = "tcgen05::load";
461461
result.append(match[1]);
462462
return result;
463463
}
@@ -466,7 +466,7 @@ std::string Asm::utility() const {
466466
std::regex st_pattern(R"(tcgen05\.st\.sync\.aligned\.([^.]+)\.x\d+\.b32)");
467467
std::smatch match;
468468
if (std::regex_match(code, match, st_pattern)) {
469-
std::string result = "tmem::store";
469+
std::string result = "tcgen05::store";
470470
result.append(match[1]);
471471
return result;
472472
}

tests/cpp/test_tmem.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ TEST_F(TMemTestCompileOnly, SetTMemDimSepPosNonTMem) {
290290
// But in the TMem load/store's loop domain, Ix (the ID parallelized on TIDx)
291291
// have extent 32. Then we will generate code like:
292292
// if (threadIdx.x < 32) {
293-
// tmem::load
293+
// tcgen05::load
294294
// }
295295
// For threadIdx.y == 0, it is correct. But for threadIdx.y == 1, it is wrong
296296
// because we are using the thread id 33-65 for the load, which is not a warp.
@@ -342,7 +342,7 @@ TEST_F(TMemTestCompileOnly, WrongStride) {
342342
// map is [TIDy, TIDx] = [2, 33], but in the TMem load/store's loop domain,
343343
// we have Iy{1}, Ix{32}. the generated code will be like:
344344
// if (threadIdx.x < 32 && threadIdx.y < 1) {
345-
// tmem::load
345+
// tcgen05::load
346346
// }
347347
// This is valid because we are using a whole warp for the load.
348348
TEST_F(TMemTest, InexactParallelType) {

0 commit comments

Comments
 (0)