You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 306, in _lazy_init
queued_call()
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 174, in _check_capability
capability = get_device_capability(d)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 430, in get_device_capability
prop = get_device_properties(device)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 448, in get_device_properties
return _get_device_properties(device) # type: ignore[name-defined]
RuntimeError: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/data1/zhangwenkk/project/yolov7/train_aux.py", line 591, in
device = select_device(opt.device, batch_size=opt.batch_size)
File "/data1/zhangwenkk/project/yolov7/utils/torch_utils.py", line 80, in select_device
p = torch.cuda.get_device_properties(i)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 444, in get_device_properties
_lazy_init() # will define _get_device_properties
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 312, in _lazy_init
raise DeferredCudaCallError(msg) from e
torch.cuda.DeferredCudaCallError: CUDA call failed lazily at initialization with error: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
CUDA call was originally invoked at:
File "/data1/zhangwenkk/project/yolov7/train_aux.py", line 12, in
import torch.distributed as dist
File "", line 1027, in _find_and_load
File "", line 992, in _find_and_load_unlocked
File "", line 241, in _call_with_frames_removed
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/opt/conda/lib/python3.10/site-packages/torch/init.py", line 1480, in
_C._initExtension(manager_path())
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 238, in
_lazy_call(_check_capability)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 235, in _lazy_call
_queued_calls.append((callable, traceback.format_stack()))
root@training-98:/data1/zhangwenkk/project/yolov7# chmod -R 777 /data1/zhangwenkk/project/yolov7-main
root@training-98:/data1/zhangwenkk/project/yolov7# cd /data1/zhangwenkk/project/yolov7-main
root@training-98:/data1/zhangwenkk/project/yolov7-main# bash run_test_.sh
Traceback (most recent call last):
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 306, in _lazy_init
queued_call()
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 174, in _check_capability
capability = get_device_capability(d)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 430, in get_device_capability
prop = get_device_properties(device)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 448, in get_device_properties
return _get_device_properties(device) # type: ignore[name-defined]
RuntimeError: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/data1/zhangwenkk/project/yolov7-main/train_aux.py", line 591, in
device = select_device(opt.device, batch_size=opt.batch_size)
File "/data1/zhangwenkk/project/yolov7-main/utils/torch_utils.py", line 80, in select_device
p = torch.cuda.get_device_properties(i)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 444, in get_device_properties
_lazy_init() # will define _get_device_properties
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 312, in _lazy_init
raise DeferredCudaCallError(msg) from e
torch.cuda.DeferredCudaCallError: CUDA call failed lazily at initialization with error: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
CUDA call was originally invoked at:
File "/data1/zhangwenkk/project/yolov7-main/train_aux.py", line 12, in
import torch.distributed as dist
File "", line 1027, in _find_and_load
File "", line 992, in _find_and_load_unlocked
File "", line 241, in _call_with_frames_removed
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/opt/conda/lib/python3.10/site-packages/torch/init.py", line 1480, in
_C._initExtension(manager_path())
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 238, in
_lazy_call(_check_capability)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 235, in _lazy_call
_queued_calls.append((callable, traceback.format_stack()))
root@training-98:/data1/zhangwenkk/project/yolov7-main# bash run_test_.sh
Traceback (most recent call last):
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 306, in _lazy_init
queued_call()
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 174, in _check_capability
capability = get_device_capability(d)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 430, in get_device_capability
prop = get_device_properties(device)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 448, in get_device_properties
return _get_device_properties(device) # type: ignore[name-defined]
RuntimeError: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/data1/zhangwenkk/project/yolov7-main/train.py", line 595, in
device = select_device(opt.device, batch_size=opt.batch_size)
File "/data1/zhangwenkk/project/yolov7-main/utils/torch_utils.py", line 80, in select_device
p = torch.cuda.get_device_properties(i)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 444, in get_device_properties
_lazy_init() # will define _get_device_properties
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 312, in _lazy_init
raise DeferredCudaCallError(msg) from e
torch.cuda.DeferredCudaCallError: CUDA call failed lazily at initialization with error: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
CUDA call was originally invoked at:
File "/data1/zhangwenkk/project/yolov7-main/train.py", line 12, in
import torch.distributed as dist
File "", line 1027, in _find_and_load
File "", line 992, in _find_and_load_unlocked
File "", line 241, in _call_with_frames_removed
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/opt/conda/lib/python3.10/site-packages/torch/init.py", line 1480, in
_C._initExtension(manager_path())
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 238, in
_lazy_call(_check_capability)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 235, in _lazy_call
Traceback (most recent call last):
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 306, in _lazy_init
queued_call()
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 174, in _check_capability
capability = get_device_capability(d)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 430, in get_device_capability
prop = get_device_properties(device)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 448, in get_device_properties
return _get_device_properties(device) # type: ignore[name-defined]
RuntimeError: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/data1/zhangwenkk/project/yolov7/train_aux.py", line 591, in
device = select_device(opt.device, batch_size=opt.batch_size)
File "/data1/zhangwenkk/project/yolov7/utils/torch_utils.py", line 80, in select_device
p = torch.cuda.get_device_properties(i)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 444, in get_device_properties
_lazy_init() # will define _get_device_properties
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 312, in _lazy_init
raise DeferredCudaCallError(msg) from e
torch.cuda.DeferredCudaCallError: CUDA call failed lazily at initialization with error: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
CUDA call was originally invoked at:
File "/data1/zhangwenkk/project/yolov7/train_aux.py", line 12, in
import torch.distributed as dist
File "", line 1027, in _find_and_load
File "", line 992, in _find_and_load_unlocked
File "", line 241, in _call_with_frames_removed
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/opt/conda/lib/python3.10/site-packages/torch/init.py", line 1480, in
_C._initExtension(manager_path())
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 238, in
_lazy_call(_check_capability)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 235, in _lazy_call
_queued_calls.append((callable, traceback.format_stack()))
root@training-98:/data1/zhangwenkk/project/yolov7# chmod -R 777 /data1/zhangwenkk/project/yolov7-main
root@training-98:/data1/zhangwenkk/project/yolov7# cd /data1/zhangwenkk/project/yolov7-main
root@training-98:/data1/zhangwenkk/project/yolov7-main# bash run_test_.sh
Traceback (most recent call last):
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 306, in _lazy_init
queued_call()
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 174, in _check_capability
capability = get_device_capability(d)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 430, in get_device_capability
prop = get_device_properties(device)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 448, in get_device_properties
return _get_device_properties(device) # type: ignore[name-defined]
RuntimeError: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/data1/zhangwenkk/project/yolov7-main/train_aux.py", line 591, in
device = select_device(opt.device, batch_size=opt.batch_size)
File "/data1/zhangwenkk/project/yolov7-main/utils/torch_utils.py", line 80, in select_device
p = torch.cuda.get_device_properties(i)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 444, in get_device_properties
_lazy_init() # will define _get_device_properties
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 312, in _lazy_init
raise DeferredCudaCallError(msg) from e
torch.cuda.DeferredCudaCallError: CUDA call failed lazily at initialization with error: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
CUDA call was originally invoked at:
File "/data1/zhangwenkk/project/yolov7-main/train_aux.py", line 12, in
import torch.distributed as dist
File "", line 1027, in _find_and_load
File "", line 992, in _find_and_load_unlocked
File "", line 241, in _call_with_frames_removed
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/opt/conda/lib/python3.10/site-packages/torch/init.py", line 1480, in
_C._initExtension(manager_path())
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 238, in
_lazy_call(_check_capability)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 235, in _lazy_call
_queued_calls.append((callable, traceback.format_stack()))
root@training-98:/data1/zhangwenkk/project/yolov7-main# bash run_test_.sh
Traceback (most recent call last):
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 306, in _lazy_init
queued_call()
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 174, in _check_capability
capability = get_device_capability(d)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 430, in get_device_capability
prop = get_device_properties(device)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 448, in get_device_properties
return _get_device_properties(device) # type: ignore[name-defined]
RuntimeError: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/data1/zhangwenkk/project/yolov7-main/train.py", line 595, in
device = select_device(opt.device, batch_size=opt.batch_size)
File "/data1/zhangwenkk/project/yolov7-main/utils/torch_utils.py", line 80, in select_device
p = torch.cuda.get_device_properties(i)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 444, in get_device_properties
_lazy_init() # will define _get_device_properties
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 312, in _lazy_init
raise DeferredCudaCallError(msg) from e
torch.cuda.DeferredCudaCallError: CUDA call failed lazily at initialization with error: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1716905971132/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=
CUDA call was originally invoked at:
File "/data1/zhangwenkk/project/yolov7-main/train.py", line 12, in
import torch.distributed as dist
File "", line 1027, in _find_and_load
File "", line 992, in _find_and_load_unlocked
File "", line 241, in _call_with_frames_removed
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/opt/conda/lib/python3.10/site-packages/torch/init.py", line 1480, in
_C._initExtension(manager_path())
File "", line 1027, in _find_and_load
File "", line 1006, in _find_and_load_unlocked
File "", line 688, in _load_unlocked
File "", line 883, in exec_module
File "", line 241, in _call_with_frames_removed
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 238, in
_lazy_call(_check_capability)
File "/opt/conda/lib/python3.10/site-packages/torch/cuda/init.py", line 235, in _lazy_call
My train script
python train_aux.py --device 0 --weights ./yolov7-e6e.pt --cfg ./cfg/training/yolov7-e6e.yaml --data ./data/test_xiancun.yaml --hyp ./data/hyp.scratch.p6.yaml --batch-size 4 --img-size 896 --epochs 300 --name test_xiancun --exist-ok --image-weights --workers 8
Training using multiple cards is successful but training using a single card fails
The text was updated successfully, but these errors were encountered: