Source code for consnet.api.evaluation

# -----------------------------------------------------
# ConsNet
# Licensed under the GNU General Public License v3.0
# Written by Ye Liu (ye-liu at whu.edu.cn)
# -----------------------------------------------------

import nncore
import torch

from .bbox import pair_iou
from .data import (get_hoi_name, get_non_rare_hoi_idx, get_rare_hoi_idx,
                   get_seen_hoi_idx, get_unseen_hoi_idx, hoi_idx_to_obj_idx,
                   load_anno, obj_idx_to_hoi_idx)


def _compute_ap(cls_anno, cls_blob):
    if (num_blob := cls_blob.size(0)) == 0:
        return 0, 0

    anno_map = dict()
    imgs = cls_anno[:, 0].unique().int().tolist()
    for img_id in imgs:
        anno_map[img_id] = [False] * sum(cls_anno[:, 0] == img_id).item()

    tp = torch.zeros(num_blob)
    fp = torch.zeros(num_blob)

    for i in range(num_blob):
        if (img_id := cls_blob[i, 0].int().item()) not in cls_anno[:, 0]:
            fp[i] = 1
            continue

        keep = cls_anno[:, 0] == img_id
        anno = cls_anno[keep][:, 2:]

        iou = pair_iou(anno, cls_blob[None, i, 1:9])
        max_iou, idx = iou.max(dim=0)

        if max_iou >= 0.5 and not anno_map[img_id][idx]:
            anno_map[img_id][idx] = True
            tp[i] = 1
        else:
            fp[i] = 1

    tp = tp.cumsum(0)
    fp = fp.cumsum(0)
    prc = tp / (tp + fp)
    rec = tp / cls_anno.size(0)

    ap = 0
    for i in range(11):
        p = prc[rec >= 0.1 * i]
        ap += p.max().item() / 11 if p.size(0) > 0 else 0

    rec = rec[-1].item()
    return ap, rec


[docs]@nncore.recursive(key='mode', type='dict')
def hico_det_eval(blob,
                  anno,
                  split='test',
                  mode=['def', 'ko'],
                  zero_shot=None,
                  logger=None):
    """
    Perform standard evaluation on HICO-DET dataset using mean average
    precision (mAP) as introduced in [1].

    Args:
        blob (list[:obj:`Tensor[N, 10]`]): Human-object pairs and their
            detection scores to be evaluated. The length of the list should be
            600 and each item should be an ``N * 10`` tensor in
            ``(batch_id, x1, y1, x2, y2, ..., score)`` format.
        anno (:obj:`torch.Tensor` or str): The annotations object or path to
            the ``anno_bbox.mat`` file.
        split (str, optional): The dataset split to be evaluated. Expected
            values are ``'train'`` and ``'test'``.
        mode (list[str] or str, optional): Mode of evaluation. Expected values
            are ``'def'``, ``'ko'`` or a list containing these terms,
            denoting the default mode and known-object mode introduced in [1].
        zero_shot (dict or None, optional): Configurations for zero-shot
            settings. It should contain the following fields:

            - `type` (str): Expected values include ``'uc'``, ``'ub'`` and \
                ``'ua'``, representing unseen action-object combination, \
                unseen object and unseen action scenarios introduced in [2].
            - `id` (int, optional): Only valid when ``type='uc'``. Expected \
                values are in the range of ``0 ~ 4``, indicating the 5 groups \
                of unseen action-object combination settings in [2, 3].

        logger (:obj:`logging.Logger` or str or None, optional): The potential
            logger or name of the logger to be used.

    Returns:
        dict: Evaluation results including mean average precision (mAP) and \
            mean recall (mRec) values of multiple dataset splits under \
            different evaluation modes.

    Example:
        >>> results = hico_det_eval(blob, '<path-to-anno_bbox.mat>')
        >>> print(results)
        ... {'def_mAP': xxx, 'def_mRec': xxx, ... }

    References:
        1. Chao et al. (https://arxiv.org/abs/1702.05448)
        2. Liu et al. (https://arxiv.org/abs/2008.06254)
        3. Bansal et al. (https://arxiv.org/abs/1904.03181)
    """
    assert mode in ('def', 'ko')
    nncore.log_or_print(f'Evaluating mAP in *{mode}* mode...', logger)

    if isinstance(anno, str):
        anno = load_anno(anno, split=split)

    rare_idx = get_rare_hoi_idx()
    non_rare_idx = get_non_rare_hoi_idx()

    if (zero_shot_mode := zero_shot is not None):
        seen_idx = get_seen_hoi_idx(**zero_shot)
        unseen_idx = get_unseen_hoi_idx(**zero_shot)

    ap, rec = torch.zeros(600), torch.zeros(600)
    for hoi_idx in range(600):
        cls_anno = anno[anno[:, 1] == hoi_idx]
        cls_blob = blob[hoi_idx]

        inds = cls_blob[:, -1].argsort(descending=True)
        cls_blob = cls_blob[inds]

        if mode == 'ko':
            obj_idx = hoi_idx_to_obj_idx(hoi_idx)
            keep_hoi_idx = obj_idx_to_hoi_idx(obj_idx)

            keep_imgs = torch.cat([
                anno[anno[:, 1] == idx][:, 0] for idx in keep_hoi_idx
            ]).unique()

            keep = torch.full_like(cls_blob[:, 0], False, dtype=torch.bool)
            for img_id in keep_imgs:
                keep += cls_blob[:, 0] == img_id
            cls_blob = cls_blob[keep]

        cls_ap, cls_rec = _compute_ap(cls_anno, cls_blob)
        ap[hoi_idx], rec[hoi_idx] = cls_ap, cls_rec

        if zero_shot_mode:
            cls_type = 'SEEN' if hoi_idx in seen_idx else 'UNSEEN'
        else:
            cls_type = 'RARE' if hoi_idx in rare_idx else 'NON_RARE'

        nncore.log_or_print(
            '{:03d} - {:<30} AP: {:.3f} | REC: {:.3f} | GT: {:<4} | '
            'DET: {:<6} | {}'.format(hoi_idx, get_hoi_name(hoi_idx),
                                     cls_ap, cls_rec, cls_anno.size(0),
                                     cls_blob.size(0), cls_type), logger)

    results = {
        f'{mode}_mAP': ap,
        f'{mode}_mRec': rec,
        f'{mode}_mAP_rare': ap[rare_idx],
        f'{mode}_mRec_rare': rec[rare_idx],
        f'{mode}_mAP_non_rare': ap[non_rare_idx],
        f'{mode}_mRec_non_rare': rec[non_rare_idx]
    }

    if zero_shot_mode:
        results.update({
            f'{mode}_mAP_seen': ap[seen_idx],
            f'{mode}_mRec_seen': rec[seen_idx],
            f'{mode}_mAP_unseen': ap[unseen_idx],
            f'{mode}_mRec_unseen': rec[unseen_idx]
        })

    results = {k: round(v.mean().item(), 4) for k, v in results.items()}
    return results