diff --git a/taichi/runtime/llvm/llvm_context.cpp b/taichi/runtime/llvm/llvm_context.cpp
index beb0e6309feaa..8cd5cf4e785dd 100644
--- a/taichi/runtime/llvm/llvm_context.cpp
+++ b/taichi/runtime/llvm/llvm_context.cpp
@@ -841,9 +841,11 @@ void TaichiLLVMContext::update_runtime_jit_module(
   }
 
   if (arch_ == Arch::amdgpu) {
+#ifdef TI_WITH_AMDGPU
     llvm::legacy::PassManager module_pass_manager;
     module_pass_manager.add(new AMDGPUConvertFuncParamAddressSpacePass());
     module_pass_manager.run(*module);
+#endif
   }
 
   eliminate_unused_functions(module.get(), [](std::string func_name) {
diff --git a/taichi/runtime/llvm/llvm_context_pass.h b/taichi/runtime/llvm/llvm_context_pass.h
index c6d2cd7a64ada..b1a09325c4ea5 100644
--- a/taichi/runtime/llvm/llvm_context_pass.h
+++ b/taichi/runtime/llvm/llvm_context_pass.h
@@ -66,18 +66,23 @@ struct AMDGPUConvertFuncParamAddressSpacePass : public ModulePass {
         const std::string func_name = f.getName().str();
         if (starts_with(func_name, "runtime_")) {
           f.setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
-          f.addFnAttr("amdgpu-flat-work-group-size", "1, 256");
+          // ref https://llvm.org/docs/AMDGPUUsage.html
+          // “amdgpu-flat-work-group-size”=”min,max”
+          // Specify the minimum and maximum flat work group sizes that will be specified when the kernel is dispatched. 
+          // Generated by the amdgpu_flat_work_group_size CLANG attribute [CLANG-ATTR]. 
+          // The implied default value is 1,1024.
+          f.addFnAttr("amdgpu-flat-work-group-size", "1, 1024");
           is_kernel = true;
         }
         if (!is_kernel && !f.isDeclaration())
           f.setLinkage(llvm::Function::PrivateLinkage);
       }
-      std::vector<llvm::Function *> global_func;
+      std::vector<llvm::Function *> kernel_function;
       for (auto &f : M) {
         if (f.getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL)
-          global_func.push_back(&f);
+          kernel_function.push_back(&f);
       }
-      for (auto &f : global_func) {
+      for (auto &f : kernel_function) {
         llvm::FunctionType *func_type = f->getFunctionType();
         std::vector<llvm::Type*> new_func_params;
         for (auto &arg : f->args()) {