-
Notifications
You must be signed in to change notification settings - Fork 2k
Open
Description
Hello All,
I already using yolov8 for change the object detection, in 17 and 26 keypoints the output looks similar with the demo code. but when i change the models too 136 keypoints sometimes i got error like this :
RuntimeError: shape '[1, 136, 1, 64, 48]' is invalid for input of size 783360
I try to reshape the images bbox with the image size they asked in configs file, it's not error but the results was bad (the keypoints coordinates very different with demo repo).
Anyone knows how to solve this? here the code :
cfg_file = "pretrained_models/harpe_136/256x192_res152_lr1e-3_1x-duc.yaml"
checkpoint = "pretrained_models/harpe_136/halpe136_fast152_duc_regression_256x192.pth"
cfg = update_config(cfg_file)
heatmap_to_coord, heatmap_mode = get_func_heatmap_to_coord(cfg)
norm_type = cfg.LOSS.get('NORM_TYPE', None)
hm_size = cfg.DATA_PRESET.HEATMAP_SIZE
input_height, input_width = cfg.DATA_PRESET.IMAGE_SIZE
output_size = cfg.DATA_PRESET.HEATMAP_SIZE
sigma = cfg.DATA_PRESET.SIGMA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pose_model = builder.build_sppe(cfg.MODEL, preset_cfg=cfg.DATA_PRESET)
pose_model.load_state_dict(torch.load(checkpoint, map_location="cuda"))
pose_model = pose_model.to("cuda" if torch.cuda.is_available() else "cpu")
pose_model.eval()
# Transformasi gambar sebelum masuk ke model
pose_transform = transforms.Compose([
transforms.ToTensor()
])
def detect_pose(image_path, model_path, output_dir):
"""Deteksi keypoints hanya pada bounding box"""
img = cv2.imread(image_path)
img_h, img_w, _ = img.shape
model_yolo = YOLO(model_path)
results = model_yolo(img, conf=0.3, verbose=False)[0]
bboxes = [[int(x1), int(y1), int(x2), int(y2)] for x1, y1, x2, y2 in results.boxes.xyxy.cpu().numpy()]
# bboxes = yolo_to_bbox(txt_path, img_w, img_h)
scores = results.boxes.conf.cpu().numpy().tolist()
_result = []
predss_img = []
predss_scores = []
for i, (xmin, ymin, xmax, ymax) in enumerate(bboxes):
bbox = [xmin, ymin, xmax, ymax]
person_crop = img[ymin:ymax, xmin:xmax]
orig_h, orig_w = person_crop.shape[:2]
if heatmap_mode == 'simple_regress':
person_crop = cv2.resize(img, (input_width, input_height))
# print(person_crop.shape[:2])
# cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
# Convert ke tensor dan normalisasi
person_crop = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
# person_crop = Image.fromarray(person_crop)
person_crop = pose_transform(person_crop)
inp_pose = person_crop.unsqueeze(0).float().to("cuda" if torch.cuda.is_available() else "cpu")
# Inferensi pose estimation
with torch.no_grad():
kpts = pose_model(inp_pose)
print(f"Original kpts shape: {kpts.shape}")
print(f"Expected hm_size: {hm_size}")
print(f"Input image size: {inp_pose.shape}")
face_hand_num = 110
if kpts.size()[1] == 136:
eval_joints = [*range(0,136)]
elif kpts.size()[1] == 26:
eval_joints = [*range(0,26)]
elif kpts.size()[1] == 133:
eval_joints = [*range(0,133)]
elif kpts.size()[1] == 68:
face_hand_num = 42
eval_joints = [*range(0,68)]
elif kpts.size()[1] == 21:
eval_joints = [*range(0,21)]
elif kpts.size()[1] == 17:
eval_joints = [*range(0,17)]
pose_coords = []
pose_scores = []
# print(kpts.shape)
# print(len(eval_joints))
# print(hm_size)
for i in range(kpts.shape[0]):
if isinstance(heatmap_to_coord, list):
pose_coords_body_foot, pose_scores_body_foot = heatmap_to_coord[0](
kpts[i][eval_joints[:-face_hand_num]], bbox, hm_shape=hm_size, norm_type=norm_type)
pose_coords_face_hand, pose_scores_face_hand = heatmap_to_coord[1](
kpts[i][eval_joints[-face_hand_num:]], bbox, hm_shape=hm_size, norm_type=norm_type)
pose_coord = np.concatenate((pose_coords_body_foot, pose_coords_face_hand), axis=0)
pose_score = np.concatenate((pose_scores_body_foot, pose_scores_face_hand), axis=0)
else:
print('kesini')
# try:
pose_coord, pose_score = heatmap_to_coord(kpts[i][eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type)
# except:
# pose_coord, pose_score = heatmap_to_coord(kpts[i][eval_joints], bbox, hm_shape= kpts.shape[2:], norm_type=norm_type)
pose_coords.append(torch.from_numpy(pose_coord).unsqueeze(0))
pose_scores.append(torch.from_numpy(pose_score).unsqueeze(0))
preds_img = torch.cat(pose_coords)
preds_scores = torch.cat(pose_scores)
if preds_img.ndim == 3 and preds_img.shape[0] == 1:
preds_img = preds_img.squeeze(0)
if preds_scores.ndim == 3 and preds_scores.shape[0] == 1:
preds_scores = preds_scores.squeeze(0)
predss_img.append(preds_img)
predss_scores.append(preds_scores)
# print(preds_img)
for k in range(len(scores)):
_result.append(
{
'keypoints':predss_img[k],
'kp_score':predss_scores[k],
'proposal_score': torch.mean(predss_scores[k]) + scores[k] + 1.25 * max(predss_scores[k]),
'box':[bboxes[k][0], bboxes[k][1], bboxes[k][2],bboxes[k][3]]
}
)
result = {
'imgname': 'aaa',
'result': _result
}
# Simpan hasil sebagai gambar
img = vis_frame(img, result, vis_thres=0.3)
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, os.path.basename(image_path))
cv2.imwrite(output_path, img)
print(f"Saved: {output_path}")
`
Metadata
Metadata
Assignees
Labels
No labels