@@ -225,15 +225,26 @@ bool cuda_supports_virtual_memory(int device) {
225
225
return supports_vmm != 0 ;
226
226
}
227
227
228
+ std::unordered_map<int , cudaDeviceProp>& cuda_device_properties () {
229
+ static auto * cuda_device_props = new std::unordered_map<int , cudaDeviceProp>{};
230
+ return *cuda_device_props;
231
+ }
232
+
233
+ const cudaDeviceProp& cuda_get_device_properties (int device) {
234
+ if (cuda_device_properties ().count (device) == 0 ) {
235
+ auto & props = cuda_device_properties ()[device];
236
+ CUDA_CHECK_THROW (cudaGetDeviceProperties (&props, device));
237
+ }
238
+
239
+ return cuda_device_properties ().at (device);
240
+ }
241
+
228
242
std::string cuda_device_name (int device) {
229
- cudaDeviceProp props;
230
- CUDA_CHECK_THROW (cudaGetDeviceProperties (&props, device));
231
- return props.name ;
243
+ return cuda_get_device_properties (device).name ;
232
244
}
233
245
234
246
uint32_t cuda_compute_capability (int device) {
235
- cudaDeviceProp props;
236
- CUDA_CHECK_THROW (cudaGetDeviceProperties (&props, device));
247
+ const auto & props = cuda_get_device_properties (device);
237
248
return props.major * 10 + props.minor ;
238
249
}
239
250
@@ -255,15 +266,11 @@ uint32_t cuda_supported_compute_capability(int device) {
255
266
}
256
267
257
268
size_t cuda_max_shmem (int device) {
258
- cudaDeviceProp props;
259
- CUDA_CHECK_THROW (cudaGetDeviceProperties (&props, device));
260
- return props.sharedMemPerBlockOptin ;
269
+ return cuda_get_device_properties (device).sharedMemPerBlockOptin ;
261
270
}
262
271
263
272
uint32_t cuda_max_registers (int device) {
264
- cudaDeviceProp props;
265
- CUDA_CHECK_THROW (cudaGetDeviceProperties (&props, device));
266
- return (uint32_t )props.regsPerBlock ;
273
+ return (uint32_t )cuda_get_device_properties (device).regsPerBlock ;
267
274
}
268
275
269
276
size_t cuda_memory_granularity (int device) {
0 commit comments