Skip to content

136 keypoints results is not good when i try using yolov8n #1237

@raffelbroox

Description

@raffelbroox

Hello All,

I already using yolov8 for change the object detection, in 17 and 26 keypoints the output looks similar with the demo code. but when i change the models too 136 keypoints sometimes i got error like this :

RuntimeError: shape '[1, 136, 1, 64, 48]' is invalid for input of size 783360

I try to reshape the images bbox with the image size they asked in configs file, it's not error but the results was bad (the keypoints coordinates very different with demo repo).

Anyone knows how to solve this? here the code :

cfg_file = "pretrained_models/harpe_136/256x192_res152_lr1e-3_1x-duc.yaml"
checkpoint = "pretrained_models/harpe_136/halpe136_fast152_duc_regression_256x192.pth"

cfg = update_config(cfg_file)
heatmap_to_coord, heatmap_mode = get_func_heatmap_to_coord(cfg)
norm_type = cfg.LOSS.get('NORM_TYPE', None)
hm_size = cfg.DATA_PRESET.HEATMAP_SIZE
input_height, input_width = cfg.DATA_PRESET.IMAGE_SIZE
output_size = cfg.DATA_PRESET.HEATMAP_SIZE
sigma = cfg.DATA_PRESET.SIGMA
device = torch.device("cuda" if  torch.cuda.is_available() else "cpu")

pose_model = builder.build_sppe(cfg.MODEL, preset_cfg=cfg.DATA_PRESET)
pose_model.load_state_dict(torch.load(checkpoint, map_location="cuda"))
pose_model = pose_model.to("cuda" if torch.cuda.is_available() else "cpu")
pose_model.eval()

# Transformasi gambar sebelum masuk ke model
pose_transform = transforms.Compose([
    transforms.ToTensor()
])

def detect_pose(image_path, model_path, output_dir):
    """Deteksi keypoints hanya pada bounding box"""
    img = cv2.imread(image_path)
    img_h, img_w, _ = img.shape
    model_yolo = YOLO(model_path)
    results = model_yolo(img, conf=0.3, verbose=False)[0]
    bboxes = [[int(x1), int(y1), int(x2), int(y2)] for x1, y1, x2, y2 in results.boxes.xyxy.cpu().numpy()]
    # bboxes = yolo_to_bbox(txt_path, img_w, img_h)
    scores = results.boxes.conf.cpu().numpy().tolist()

    _result = []
    predss_img = []
    predss_scores = []

    for i, (xmin, ymin, xmax, ymax) in enumerate(bboxes):
        bbox = [xmin, ymin, xmax, ymax]
        person_crop = img[ymin:ymax, xmin:xmax]
        orig_h, orig_w = person_crop.shape[:2]
        if heatmap_mode == 'simple_regress':
            person_crop = cv2.resize(img, (input_width, input_height))
        # print(person_crop.shape[:2])
        # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

        # Convert ke tensor dan normalisasi
        person_crop = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
        # person_crop = Image.fromarray(person_crop)
        person_crop = pose_transform(person_crop)
        inp_pose = person_crop.unsqueeze(0).float().to("cuda" if torch.cuda.is_available() else "cpu")
        
        # Inferensi pose estimation
        with torch.no_grad():
            kpts = pose_model(inp_pose)

        print(f"Original kpts shape: {kpts.shape}")
        print(f"Expected hm_size: {hm_size}")
        print(f"Input image size: {inp_pose.shape}")

        face_hand_num = 110
        if kpts.size()[1] == 136:
            eval_joints = [*range(0,136)]
        elif kpts.size()[1] == 26:
            eval_joints = [*range(0,26)]
        elif kpts.size()[1] == 133:
            eval_joints = [*range(0,133)]
        elif kpts.size()[1] == 68:
            face_hand_num = 42
            eval_joints = [*range(0,68)]
        elif kpts.size()[1] == 21:
           eval_joints = [*range(0,21)]
        elif kpts.size()[1] == 17:
            eval_joints = [*range(0,17)]
        pose_coords = []
        pose_scores = []
        # print(kpts.shape)
        # print(len(eval_joints))
        # print(hm_size)
        for i in range(kpts.shape[0]):
            if isinstance(heatmap_to_coord, list):
                pose_coords_body_foot, pose_scores_body_foot = heatmap_to_coord[0](
                    kpts[i][eval_joints[:-face_hand_num]], bbox, hm_shape=hm_size, norm_type=norm_type)
                pose_coords_face_hand, pose_scores_face_hand = heatmap_to_coord[1](
                    kpts[i][eval_joints[-face_hand_num:]], bbox, hm_shape=hm_size, norm_type=norm_type)
                pose_coord = np.concatenate((pose_coords_body_foot, pose_coords_face_hand), axis=0)
                pose_score = np.concatenate((pose_scores_body_foot, pose_scores_face_hand), axis=0)
            else:
                print('kesini')
                # try:
                pose_coord, pose_score = heatmap_to_coord(kpts[i][eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type)
                # except:
                #     pose_coord, pose_score = heatmap_to_coord(kpts[i][eval_joints], bbox, hm_shape= kpts.shape[2:], norm_type=norm_type)
               
            pose_coords.append(torch.from_numpy(pose_coord).unsqueeze(0))
            pose_scores.append(torch.from_numpy(pose_score).unsqueeze(0))
        preds_img = torch.cat(pose_coords)
        preds_scores = torch.cat(pose_scores)

        if preds_img.ndim == 3 and preds_img.shape[0] == 1:
            preds_img = preds_img.squeeze(0)
        if preds_scores.ndim == 3 and preds_scores.shape[0] == 1:
            preds_scores = preds_scores.squeeze(0)

        predss_img.append(preds_img)
        predss_scores.append(preds_scores)

        # print(preds_img)

    for k in range(len(scores)):
        _result.append(
            {
                'keypoints':predss_img[k],
                'kp_score':predss_scores[k],
                'proposal_score': torch.mean(predss_scores[k]) + scores[k] + 1.25 * max(predss_scores[k]),
                'box':[bboxes[k][0], bboxes[k][1], bboxes[k][2],bboxes[k][3]] 
            }
        )

    result = {
        'imgname': 'aaa',
        'result': _result
    }

    # Simpan hasil sebagai gambar
    img = vis_frame(img, result, vis_thres=0.3)

    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, os.path.basename(image_path))
    cv2.imwrite(output_path, img)
    print(f"Saved: {output_path}")

`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions