在执行下面函数里的jittor_target = jittor.array(target, dtype=jittor.int64)
出现报错:
def _mask_transform(self, mask):
# 将 PIL.Image 转换为 numpy 数组
mask_np = np.array(mask).astype('int32')
print(f"mask_np type: {type(mask_np)}, mask_np dtype: {mask_np.dtype}, mask_np shape: {mask_np.shape}")
print(f"mask_np min value: {mask_np.min()}, mask_np max value: {mask_np.max()}")
# 应用 _class_to_index 方法
target = self._class_to_index(mask_np)
print(
f"target after _class_to_index type: {type(target)}, target dtype: {target.dtype}, target shape: {target.shape}")
print(f"target min value: {target.min()}, target max value: {target.max()}")
# 将结果转换为 numpy 数组并设置数据类型
target = np.array(target).astype('int64')
print(f"target type: {type(target)}, target dtype: {target.dtype}, target shape: {target.shape}")
print(f"target min value: {target.min()}, target max value: {target.max()}")
# 检查异常值
if np.any(np.isnan(target)):
print("target contains NaN values")
if np.any(np.isinf(target)):
print("target contains infinite values")
# 尝试转换为 jittor.array
try:
jittor_target = jittor.array(target, dtype=jittor.int64)
print(f"jittor_target type: {type(jittor_target)}, jittor_target dtype: {jittor_target.dtype}")
except Exception as e:
print(f"Error converting to Jittor array: {e}")
import traceback
traceback.print_exc()
return jittor_target
报错信息
Traceback (most recent call last):
File "/home/ubuntu/hdd2/llf/fdlnet_jittor/core/data/dataloader/nightcity.py", line 110, in _mask_transform
jittor_target = jittor.array(target, dtype=jittor.int64)
File "/home/ubuntu/hdd2/llf/miniconda3/envs/fdlnet_j/lib/python3.8/site-packages/jittor/__init__.py", line 382, in array
ret = ops.array(np.array(data, dtype))
RuntimeError: Wrong inputs arguments, Please refer to examples(help(jt.ops.array)).
Types of your inputs are:
self = module,
args = (ndarray, ),
The function declarations are:
VarHolder* array__(PyObject* obj)
Failed reason:[f 0704 15:44:37.671641 36 helper_cuda.h:128] CUDA error at /home/ubuntu/hdd2/llf/miniconda3/envs/fdlnet_j/lib/python3.8/site-packages/jittor/src/mem/allocator/cuda_host_allocator.cc:22 code=3( cudaErrorInitializationError ) cudaMallocHost(&ptr, size)
详细Log
(fdlnet_j) llf@XY-TITAN-RTX:/home/ubuntu/hdd2/llf/fdlnet_jittor/scripts$ python train.py --model fdlnet --backbone resnet50 --dataset night --aux
[i 0704 15:44:33.382103 92 compiler.py:956] Jittor(1.3.8.5) src: /home/ubuntu/hdd2/llf/miniconda3/envs/fdlnet_j/lib/python3.8/site-packages/jittor
[i 0704 15:44:33.388335 92 compiler.py:957] g++ at /usr/bin/g++(5.5.0)
[i 0704 15:44:33.388413 92 compiler.py:958] cache_path: /home/llf/.cache/jittor/jt1.3.8/g++5.5.0/py3.8.19/Linux-4.15.0-1x37/IntelRXeonRGolx4e/default
[i 0704 15:44:33.417804 92 install_cuda.py:93] cuda_driver_version: [12, 1]
[i 0704 15:44:33.418458 92 install_cuda.py:81] restart /home/ubuntu/hdd2/llf/miniconda3/envs/fdlnet_j/bin/python ['train.py', '--model', 'fdlnet', '--backbone', 'resnet50', '--dataset', 'night', '--aux']
[i 0704 15:44:33.668282 36 compiler.py:956] Jittor(1.3.8.5) src: /home/ubuntu/hdd2/llf/miniconda3/envs/fdlnet_j/lib/python3.8/site-packages/jittor
[i 0704 15:44:33.676400 36 compiler.py:957] g++ at /usr/bin/g++(5.5.0)
[i 0704 15:44:33.676649 36 compiler.py:958] cache_path: /home/llf/.cache/jittor/jt1.3.8/g++5.5.0/py3.8.19/Linux-4.15.0-1x37/IntelRXeonRGolx4e/default
[i 0704 15:44:33.702405 36 install_cuda.py:93] cuda_driver_version: [12, 1]
[i 0704 15:44:33.711586 36 __init__.py:411] Found /home/llf/.cache/jittor/jtcuda/cuda11.2_cudnn8_linux/bin/nvcc(11.2.152) at /home/llf/.cache/jittor/jtcuda/cuda11.2_cudnn8_linux/bin/nvcc.
[i 0704 15:44:33.782424 36 __init__.py:411] Found gdb(8.1.1) at /usr/bin/gdb.
[i 0704 15:44:33.790506 36 __init__.py:411] Found addr2line(2.30) at /usr/bin/addr2line.
[i 0704 15:44:33.942985 36 compiler.py:1011] cuda key:cu11.2.152_sm_75
[i 0704 15:44:34.337327 36 __init__.py:227] Total mem: 125.56GB, using 16 procs for compiling.
[i 0704 15:44:34.442261 36 jit_compiler.cc:28] Load cc_path: /usr/bin/g++
[i 0704 15:44:34.562010 36 init.cc:62] Found cuda archs: [75,]
[i 0704 15:44:34.593657 36 __init__.py:411] Found mpicc(2.1.1) at /usr/bin/mpicc.
[i 0704 15:44:36.739931 36 cuda_flags.cc:49] CUDA enabled.
2024-07-04 15:44:36,747 semantic_segmentation INFO: Using 1 GPUs
2024-07-04 15:44:36,747 semantic_segmentation INFO: Namespace(aux=True, aux_weight=0.4, backbone='resnet50', base_size=520, batch_size=4, crop_size=480, dataset='night', device='cuda', distributed=False, epochs=50, jpu=False, local_rank=0, log_dir='../runs/logs/', log_iter=10, lr=0.0001, model='fdlnet', momentum=0.9, no_cuda=False, num_gpus=1, resume=None, save_dir='~/.torch/models', save_epoch=10, skip_val=False, start_epoch=0, use_ohem=False, val_epoch=1, warmup_factor=0.3333333333333333, warmup_iters=0, warmup_method='linear', weight_decay=0.0001, workers=4)
self.root: {} ../../datasets/night
Found 2998 images in the folder ../../datasets/night/images/train
self.root: {} ../../datasets/night
Found 1299 images in the folder ../../datasets/night/images/val
[w 0704 15:44:37.182724 36 nn.py:2280] The `Parameter` interface isn't needed in Jittor, this interface
does nothings and it is just used for compatible.
A Jittor Var is a Parameter
when it is a member of Module, if you don't want a Jittor
Var menber is treated as a Parameter, just name it startswith
underscore `_`.
2024-07-04 15:44:37,185 semantic_segmentation INFO: Start training, Total Epochs: 50 = Total Iterations 37450
———————————————————get_item————————————————————————————
../../datasets/night/images/train/Chicago_0183.png
../../datasets/night/label/train/Chicago_0183_labelIds.png
———————————————————sync_transform————————————————————————————
mask type: <class 'PIL.PngImagePlugin.PngImageFile'>
mask_np type: <class 'numpy.ndarray'>, mask_np dtype: int32, mask_np shape: (480, 480)
mask_np min value: 0, mask_np max value: 26
target after _class_to_index type: <class 'numpy.ndarray'>, target dtype: int32, target shape: (480, 480)
target min value: -1, target max value: 13
target type: <class 'numpy.ndarray'>, target dtype: int64, target shape: (480, 480)
target min value: -1, target max value: 13
Error converting to Jittor array: Wrong inputs arguments, Please refer to examples(help(jt.ops.array)).
Types of your inputs are:
self = module,
args = (ndarray, ),
The function declarations are:
VarHolder* array__(PyObject* obj)
Failed reason:[f 0704 15:44:37.671641 36 helper_cuda.h:128] CUDA error at /home/ubuntu/hdd2/llf/miniconda3/envs/fdlnet_j/lib/python3.8/site-packages/jittor/src/mem/allocator/cuda_host_allocator.cc:22 code=3( cudaErrorInitializationError ) cudaMallocHost(&ptr, size)
Traceback (most recent call last):
File "/home/ubuntu/hdd2/llf/fdlnet_jittor/core/data/dataloader/nightcity.py", line 110, in _mask_transform
jittor_target = jittor.array(target, dtype=jittor.int64)
File "/home/ubuntu/hdd2/llf/miniconda3/envs/fdlnet_j/lib/python3.8/site-packages/jittor/__init__.py", line 382, in array
ret = ops.array(np.array(data, dtype))
RuntimeError: Wrong inputs arguments, Please refer to examples(help(jt.ops.array)).
Types of your inputs are:
self = module,
args = (ndarray, ),
The function declarations are:
VarHolder* array__(PyObject* obj)
Failed reason:[f 0704 15:44:37.671641 36 helper_cuda.h:128] CUDA error at /home/ubuntu/hdd2/llf/miniconda3/envs/fdlnet_j/lib/python3.8/site-packages/jittor/src/mem/allocator/cuda_host_allocator.cc:22 code=3( cudaErrorInitializationError ) cudaMallocHost(&ptr, size)
Traceback (most recent call last):
File "/home/ubuntu/hdd2/llf/miniconda3/envs/fdlnet_j/lib/python3.8/site-packages/jittor/dataset/dataset.py", line 258, in _worker_main
batch.append(self[i])
File "/home/ubuntu/hdd2/llf/fdlnet_jittor/core/data/dataloader/nightcity.py", line 63, in __getitem__
img, mask = self._sync_transform(img, mask)
File "/home/ubuntu/hdd2/llf/fdlnet_jittor/core/data/dataloader/segbase.py", line 86, in _sync_transform
img, mask = self._img_transform(img), self._mask_transform(mask)
File "/home/ubuntu/hdd2/llf/fdlnet_jittor/core/data/dataloader/nightcity.py", line 117, in _mask_transform
return jittor_target
UnboundLocalError: local variable 'jittor_target' referenced before assignment
[e 0704 15:44:37.673297 36 log.cc:258] Caught SIGINT, quick exit
补充
该错误只发生在GPU下,CPU不会报错