Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

can not compile candle with cuda on win11 #2458

Open
NoNameCanUse9 opened this issue Aug 29, 2024 · 2 comments
Open

can not compile candle with cuda on win11 #2458

NoNameCanUse9 opened this issue Aug 29, 2024 · 2 comments

Comments

@NoNameCanUse9
Copy link

did i do something wrong?
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\include\sm_32_intrinsics.hpp(112): error: asm operand type size(8) does not match type/size implied by constraint 'r'
static __declspec(device) __inline int __ldg(const int *ptr) { unsigned int ret; asm volatile ("ld.global.nc.s32 %0, [%1];" : "=r"(ret) : "r" (ptr)); return (int)ret; }

                       ^

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\include\sm_32_intrinsics.hpp(113): error: asm operand type size(8) does not match type/size implied by constraint 'r'
static __declspec(device) __inline long long __ldg(const long long *ptr) { unsigned long long ret; asm volatile ("ld.global.nc.s64 %0, [%1];" : "=l"(ret) : "r" (ptr)); return (long long)ret; }

                                         ^

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\include\sm_32_intrinsics.hpp(114): error: asm operand type size(8) does not match type/size implied by constraint 'r'
static __declspec(device) __inline char2 __ldg(const char2 *ptr) { char2 ret; int2 tmp; asm volatile ("ld.global.nc.v2.s8 {%0,%1}, [%2];" : "=r"(tmp.x), "=r"(tmp.y) : "r" (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; return ret; }

                                                    ^

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\include\sm_32_intrinsics.hpp(115): error: asm operand type size(8) does not match type/size implied by constraint 'r'
static __declspec(device) __inline char4 __ldg(const char4 *ptr) { char4 ret; int4 tmp; asm volatile ("ld.global.nc.v4.s8 {%0,%1,%2,%3}, [%4];" : "=r"(tmp.x), "=r"(tmp.y), "=r"(tmp.z), "=r"(tmp.w) : "r" (ptr)); ret.x = (char)tmp.x; ret.y = (char)tmp.y; ret.z = (char)tmp.z; ret.w = (char)tmp.w; return ret; }

                                                                                    ^

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\include\sm_32_intrinsics.hpp(116): error: asm operand type size(8) does not match type/size implied by constraint 'r'
static __declspec(device) __inline short2 __ldg(const short2 *ptr) { short2 ret; asm volatile ("ld.global.nc.v2.s16 {%0,%1}, [%2];" : "=h"(ret.x), "=h"(ret.y) : "r" (ptr)); return ret; }

                                              ^

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\include\sm_32_intrinsics.hpp(117): error: asm operand type size(8) does not match type/size implied by constraint 'r'
static __declspec(device) __inline short4 __ldg(const short4 *ptr) { short4 ret; asm volatile ("ld.global.nc.v4.s16 {%0,%1,%2,%3}, [%4];" : "=h"(ret.x), "=h"(ret.y), "=h"(ret.z), "=h"(ret.w) : "r" (ptr)); return ret; }

                                                                              ^

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\include\sm_32_intrinsics.hpp(118): error: asm operand type size(8) does not match type/size implied by constraint 'r'
static __declspec(device) __inline int2 __ldg(const int2 *ptr) { int2 ret; asm volatile ("ld.global.nc.v2.s32 {%0,%1}, [%2];" : "=r"(ret.x), "=r"(ret.y) : "r" (ptr)); return ret; }

                                        ^

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\include\sm_32_intrinsics.hpp(119): error: asm operand type size(8) does not match type/size implied by constraint 'r'
static __declspec(device) __inline int4 __ldg(const int4 *ptr) { int4 ret; asm volatile ("ld.global.nc.v4.s32 {%0,%1,%2,%3}, [%4];" : "=r"(ret.x), "=r"(ret.y), "=r"(ret.z), "=r"(ret.w) : "r" (ptr)); return ret; }

                                                                        ^

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\include\sm_32_intrinsics.hpp(120): error: asm operand type size(8) does not match type/size implied by constraint 'r'
static __declspec(device) __inline longlong2 __ldg(const longlong2 *ptr) { longlong2 ret; asm volatile ("ld.global.nc.v2.s64 {%0,%1}, [%2];" : "=l"(ret.x), "=l"(ret.y) : "r" (ptr)); return ret; }

@EndlessReform
Copy link

https://stackoverflow.com/questions/12843846/problems-when-running-nvcc-from-command-line%5B/url%5D

I had the same issue. Apparently, this is because Visual Studio uses the wrong cl.exe by default. You have to search in Start Menu for "x64 Native Tools Command Prompt for VS 2022" and run from there. Compiling Candle worked after that.

@Treasure1201
Copy link

"x64 Native Tools Command Prompt for VS 2022

Thank you, thank you, thank you. You have saved me so much time.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants