136 keypoints results is not good when i try using yolov8n

Hello All,

I already using yolov8 for change the object detection, in 17 and 26 keypoints the output looks similar with the demo code. but when i change the models too 136 keypoints sometimes i got error like this : 

RuntimeError: shape '[1, 136, 1, 64, 48]' is invalid for input of size 783360

I try to reshape the images bbox with the image size they asked in configs file, it's not error but the results was bad (the keypoints coordinates very different with demo repo).

Anyone knows how to solve this? here the code : 

```
cfg_file = "pretrained_models/harpe_136/256x192_res152_lr1e-3_1x-duc.yaml"
checkpoint = "pretrained_models/harpe_136/halpe136_fast152_duc_regression_256x192.pth"

cfg = update_config(cfg_file)
heatmap_to_coord, heatmap_mode = get_func_heatmap_to_coord(cfg)
norm_type = cfg.LOSS.get('NORM_TYPE', None)
hm_size = cfg.DATA_PRESET.HEATMAP_SIZE
input_height, input_width = cfg.DATA_PRESET.IMAGE_SIZE
output_size = cfg.DATA_PRESET.HEATMAP_SIZE
sigma = cfg.DATA_PRESET.SIGMA
device = torch.device("cuda" if  torch.cuda.is_available() else "cpu")

pose_model = builder.build_sppe(cfg.MODEL, preset_cfg=cfg.DATA_PRESET)
pose_model.load_state_dict(torch.load(checkpoint, map_location="cuda"))
pose_model = pose_model.to("cuda" if torch.cuda.is_available() else "cpu")
pose_model.eval()

# Transformasi gambar sebelum masuk ke model
pose_transform = transforms.Compose([
    transforms.ToTensor()
])

def detect_pose(image_path, model_path, output_dir):
    """Deteksi keypoints hanya pada bounding box"""
    img = cv2.imread(image_path)
    img_h, img_w, _ = img.shape
    model_yolo = YOLO(model_path)
    results = model_yolo(img, conf=0.3, verbose=False)[0]
    bboxes = [[int(x1), int(y1), int(x2), int(y2)] for x1, y1, x2, y2 in results.boxes.xyxy.cpu().numpy()]
    # bboxes = yolo_to_bbox(txt_path, img_w, img_h)
    scores = results.boxes.conf.cpu().numpy().tolist()

    _result = []
    predss_img = []
    predss_scores = []

    for i, (xmin, ymin, xmax, ymax) in enumerate(bboxes):
        bbox = [xmin, ymin, xmax, ymax]
        person_crop = img[ymin:ymax, xmin:xmax]
        orig_h, orig_w = person_crop.shape[:2]
        if heatmap_mode == 'simple_regress':
            person_crop = cv2.resize(img, (input_width, input_height))
        # print(person_crop.shape[:2])
        # cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

        # Convert ke tensor dan normalisasi
        person_crop = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB)
        # person_crop = Image.fromarray(person_crop)
        person_crop = pose_transform(person_crop)
        inp_pose = person_crop.unsqueeze(0).float().to("cuda" if torch.cuda.is_available() else "cpu")
        
        # Inferensi pose estimation
        with torch.no_grad():
            kpts = pose_model(inp_pose)

        print(f"Original kpts shape: {kpts.shape}")
        print(f"Expected hm_size: {hm_size}")
        print(f"Input image size: {inp_pose.shape}")

        face_hand_num = 110
        if kpts.size()[1] == 136:
            eval_joints = [*range(0,136)]
        elif kpts.size()[1] == 26:
            eval_joints = [*range(0,26)]
        elif kpts.size()[1] == 133:
            eval_joints = [*range(0,133)]
        elif kpts.size()[1] == 68:
            face_hand_num = 42
            eval_joints = [*range(0,68)]
        elif kpts.size()[1] == 21:
           eval_joints = [*range(0,21)]
        elif kpts.size()[1] == 17:
            eval_joints = [*range(0,17)]
        pose_coords = []
        pose_scores = []
        # print(kpts.shape)
        # print(len(eval_joints))
        # print(hm_size)
        for i in range(kpts.shape[0]):
            if isinstance(heatmap_to_coord, list):
                pose_coords_body_foot, pose_scores_body_foot = heatmap_to_coord[0](
                    kpts[i][eval_joints[:-face_hand_num]], bbox, hm_shape=hm_size, norm_type=norm_type)
                pose_coords_face_hand, pose_scores_face_hand = heatmap_to_coord[1](
                    kpts[i][eval_joints[-face_hand_num:]], bbox, hm_shape=hm_size, norm_type=norm_type)
                pose_coord = np.concatenate((pose_coords_body_foot, pose_coords_face_hand), axis=0)
                pose_score = np.concatenate((pose_scores_body_foot, pose_scores_face_hand), axis=0)
            else:
                print('kesini')
                # try:
                pose_coord, pose_score = heatmap_to_coord(kpts[i][eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type)
                # except:
                #     pose_coord, pose_score = heatmap_to_coord(kpts[i][eval_joints], bbox, hm_shape= kpts.shape[2:], norm_type=norm_type)
               
            pose_coords.append(torch.from_numpy(pose_coord).unsqueeze(0))
            pose_scores.append(torch.from_numpy(pose_score).unsqueeze(0))
        preds_img = torch.cat(pose_coords)
        preds_scores = torch.cat(pose_scores)

        if preds_img.ndim == 3 and preds_img.shape[0] == 1:
            preds_img = preds_img.squeeze(0)
        if preds_scores.ndim == 3 and preds_scores.shape[0] == 1:
            preds_scores = preds_scores.squeeze(0)

        predss_img.append(preds_img)
        predss_scores.append(preds_scores)

        # print(preds_img)

    for k in range(len(scores)):
        _result.append(
            {
                'keypoints':predss_img[k],
                'kp_score':predss_scores[k],
                'proposal_score': torch.mean(predss_scores[k]) + scores[k] + 1.25 * max(predss_scores[k]),
                'box':[bboxes[k][0], bboxes[k][1], bboxes[k][2],bboxes[k][3]] 
            }
        )

    result = {
        'imgname': 'aaa',
        'result': _result
    }

    # Simpan hasil sebagai gambar
    img = vis_frame(img, result, vis_thres=0.3)

    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, os.path.basename(image_path))
    cv2.imwrite(output_path, img)
    print(f"Saved: {output_path}")
```

`

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

136 keypoints results is not good when i try using yolov8n #1237

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

136 keypoints results is not good when i try using yolov8n #1237

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions