Source code for consnet.api.data

# -----------------------------------------------------
# ConsNet
# Licensed under the GNU General Public License v3.0
# Written by Ye Liu (ye-liu at whu.edu.cn)
# -----------------------------------------------------

import nncore
import torch
from scipy.io import loadmat

from .static import (ACT_IDX_TO_ACT_NAME, HOI_IDX_TO_ACT_IDX,
                     HOI_IDX_TO_OBJ_IDX, OBJ_IDX_TO_COCO_ID,
                     OBJ_IDX_TO_OBJ_NAME, RARE_HOI_IDX, UA_HOI_IDX, UC_HOI_IDX,
                     UO_HOI_IDX)


[docs]def convert_anno(anno_file, out_file, split): """ Convert annotations of HICO-DET [1] dataset to COCO [2] format. Args: anno_file (str): Path to the ``anno_bbox.mat`` file. out_file (str): Path to the output file. The filename must end with ``'.json'``. split (str): The dataset split to be converted. Expected values are ``'train'`` and ``'test'``. References: 1. Chao et al. (https://arxiv.org/abs/1702.05448) 2. Lin et al. (https://arxiv.org/abs/1405.0312) """ assert isinstance(out_file, str) and out_file.endswith('.json') assert split in ('train', 'test') nncore.is_file(anno_file, raise_error=True) print(f'converting annotations of *{split}* split') mat_anno = loadmat(anno_file)[f'bbox_{split}'][0] coco_anno, img_id, anno_id = dict( images=[], annotations=[], categories=[]), 0, 0 for idx in range(80): coco_anno['categories'].append( dict( id=obj_idx_to_coco_id(idx), name=get_obj_name(idx).replace('_', ' '))) prog_bar = nncore.ProgressBar(num_tasks=len(mat_anno)) for img_anno in mat_anno: anno = [] for ins in img_anno[2][0]: if bool(ins[4]): continue for i in range(1, 3): cat_id = 1 if i == 1 else hoi_idx_to_coco_id(int(ins[0]) - 1) for bbox in ins[i][0]: x1, x2, y1, y2 = map(int, bbox) x, y, w, h = x1, y1, x2 - x1, y2 - y1 ins_anno = dict( id=anno_id, image_id=img_id, category_id=cat_id, iscrowd=0, bbox=[x, y, w, h], area=w * h) anno.append(ins_anno) anno_id += 1 if len(anno) > 0: img_info = dict( id=img_id, file_name=str(img_anno[0][0]), width=int(img_anno[1][0][0][0]), height=int(img_anno[1][0][0][1])) coco_anno['images'].append(img_info) coco_anno['annotations'] += anno img_id += 1 prog_bar.update() print(f'saving results to {out_file}...') nncore.dump(coco_anno, out_file)
[docs]def load_anno(anno_file, split): """ Load annotations of HICO-DET [1] dataset. The loaded annotations will be stored in an ``N * 10`` tensor whose rows are in ``(img_id, hoi_idx, human_bbox, object_bbox)`` format. The ``hoi_idx`` will be in the range of ``0 ~ 599`` and the bboxes will be in ``xyxy`` style. Invisible human-object pairs will be discarded. Args: anno_file (str): Path to the ``anno_bbox.mat`` file. split (str): The dataset split to be loaded. Expected values are ``'train'`` and ``'test'``. Returns: :obj:`Tensor[N, 10]`: The loaded annotations. Example: >>> anno = load_anno('<path-to-anno_bbox.mat>', 'train') >>> print(anno) ... tensor([[img_id_1, hoi_idx_1, human_bbox_1, object_bbox_1] ... [img_id_2, hoi_idx_2, human_bbox_2, object_bbox_2] ... [img_id_3, hoi_idx_3, human_bbox_3, object_bbox_3]]) References: 1. Chao et al. (https://arxiv.org/abs/1702.05448) """ assert split in ('train', 'test') nncore.is_file(anno_file, raise_error=True) mat_anno = loadmat(anno_file)[f'bbox_{split}'][0] collected = [] for img_anno in mat_anno: hoi_idx, h_bbox, o_bbox = [], [], [] for ins in img_anno[2][0]: if bool(ins[4]): continue h_bboxes = [[float(c) for c in b] for b in ins[1][0]] o_bboxes = [[float(c) for c in b] for b in ins[2][0]] for conn in ins[3]: hoi_idx.append(int(ins[0]) - 1) h_bbox.append(h_bboxes[conn[0] - 1]) o_bbox.append(o_bboxes[conn[1] - 1]) if (num_anno := len(hoi_idx)) > 0: img_id = int(img_anno[0][0][-12:-4]) img_id = torch.full((num_anno, 1), img_id, dtype=torch.float) hoi_idx = torch.Tensor([hoi_idx]).t() h_bbox = torch.Tensor(h_bbox)[:, [0, 2, 1, 3]] o_bbox = torch.Tensor(o_bbox)[:, [0, 2, 1, 3]] anno = torch.cat((img_id, hoi_idx, h_bbox, o_bbox), dim=1) collected.append(anno) return torch.cat(collected)
[docs]@nncore.recursive() def obj_idx_to_coco_id(obj_idx): """ Convert object index (0 ~ 79) to COCO id (90 classes). Args: obj_idx (list[int] or int): The object index or list of object indexes. Returns: list[int] or int: The converted COCO id or list of COCO ids. """ return OBJ_IDX_TO_COCO_ID[obj_idx]
[docs]@nncore.recursive() def hoi_idx_to_coco_id(hoi_idx): """ Convert HOI index (0 ~ 599) to COCO id (90 classes). Args: hoi_idx (list[int] or int): The HOI index or list of HOI indexes. Returns: list[int] or int: The converted COCO id or list of COCO ids. """ obj_idx = hoi_idx_to_obj_idx(hoi_idx) return obj_idx_to_coco_id(obj_idx)
[docs]@nncore.recursive() def hoi_idx_to_act_idx(hoi_idx): """ Convert HOI index (0 ~ 599) to action index (0 ~ 116). Args: hoi_idx (list[int] or int): The HOI index or list of HOI indexes. Returns: list[int] or int: The converted action index or list of action indexes. """ return HOI_IDX_TO_ACT_IDX[hoi_idx]
[docs]@nncore.recursive() def hoi_idx_to_obj_idx(hoi_idx): """ Convert HOI index (0 ~ 599) to object index (0 ~ 79). Args: hoi_idx (list[int] or int): The HOI index or list of HOI indexes. Returns: list[int] or int: The converted object index or list of object indexes. """ return HOI_IDX_TO_OBJ_IDX[hoi_idx]
[docs]@nncore.recursive() def act_idx_to_hoi_idx(act_idx): """ Convert action index (0 ~ 116) to HOI index (0 ~ 599). Args: act_idx (list[int] or int): The action index or list of action indexes. Returns: list[list] or list[int]: The converted HOI indexes. """ hoi_idx = [] for idx in range(600): if HOI_IDX_TO_ACT_IDX[idx] == act_idx: hoi_idx.append(idx) return hoi_idx
[docs]@nncore.recursive() def obj_idx_to_hoi_idx(obj_idx): """ Convert object index (0 ~ 79) to HOI index (0 ~ 599). Args: obj_idx (list[int] or int): The object index or list of object indexes. Returns: list[list] or list[int]: The converted HOI indexes. """ hoi_idx = [] for idx in range(600): if HOI_IDX_TO_OBJ_IDX[idx] == obj_idx: hoi_idx.append(idx) return hoi_idx
[docs]def get_act_name(act_idx): """ Get action name according to action index (0 ~ 116). Args: act_idx (int): The action index. Returns: str: The action name. """ return ACT_IDX_TO_ACT_NAME[act_idx]
[docs]def get_obj_name(obj_idx): """ Get object name according to object index (0 ~ 79). Args: act_idx (int): The object index. Returns: str: The object name. """ return OBJ_IDX_TO_OBJ_NAME[obj_idx]
[docs]def get_act_and_obj_name(hoi_idx): """ Get action and object name according to HOI index (0 ~ 599). Args: hoi_idx (int): The HOI index. Returns: tuple[str]: The action and object name. """ act_idx = hoi_idx_to_act_idx(hoi_idx) obj_idx = hoi_idx_to_obj_idx(hoi_idx) act_name = get_act_name(act_idx) obj_name = get_obj_name(obj_idx) return act_name, obj_name
[docs]def get_hoi_name(hoi_idx): """ Get HOI name according to HOI index (0 ~ 599). Args: hoi_idx (int): The HOI index. Returns: str: The HOI name. """ act_name, obj_name = get_act_and_obj_name(hoi_idx) return '{}_{}'.format(act_name, obj_name)
[docs]def get_rare_hoi_idx(): """ Get rare HOI indexes (0 ~ 599). Returns: list[int]: The list of rare HOI indexes. """ return RARE_HOI_IDX
[docs]def get_non_rare_hoi_idx(): """ Get non-rare HOI indexes (0 ~ 599). Returns: list[int]: The list of non-rare HOI indexes. """ return [idx for idx in range(600) if idx not in RARE_HOI_IDX]
[docs]def get_seen_hoi_idx(type, id=None): """ Get seen HOI indexes (0 ~ 599). Args: type (str): Type of the zero-shot scenario. Expected values include ``'uc'``, ``'uo'`` and ``'ua'``, representing unseen action-object combination, unseen object and unseen action scenarios. id (int, optional): Only valid when ``type='uc'``. Expected values are in the range of ``0 ~ 4``, indicating the 5 groups of unseen action-object combination settings in [1, 2]. Returns: list[int]: The list of seen HOI indexes. References: 1. Liu et al. (https://arxiv.org/abs/2008.06254) 2. Bansal et al. (https://arxiv.org/abs/1904.03181) """ unseen_idx = get_unseen_hoi_idx(type, id=id) return [idx for idx in range(600) if idx not in unseen_idx]
[docs]def get_unseen_hoi_idx(type, id=None): """ Get unseen HOI indexes (0 ~ 599). Args: type (str): Type of the zero-shot scenario. Expected values include ``'uc'``, ``'uo'`` and ``'ua'``, representing unseen action-object combination, unseen object and unseen action scenarios. id (int, optional): Only valid when ``type='uc'``. Expected values are in the range of ``0 ~ 4``, indicating the 5 groups of unseen action-object combination settings in [1, 2]. Returns: list[int]: The list of unseen HOI indexes. References: 1. Liu et al. (https://arxiv.org/abs/2008.06254) 2. Bansal et al. (https://arxiv.org/abs/1904.03181) """ assert type in ('uc', 'uo', 'ua') if type == 'uc': return UC_HOI_IDX[id] elif type == 'uo': return UO_HOI_IDX else: return UA_HOI_IDX
[docs]def scatter_hum_to_hoi(tensor): """ Scatter human to HOI classes. Args: tensor (:obj:`torch.Tensor[N, 1]`): The input tensor. Returns: :obj:`torch.Tensor[N, 600]`: The scattered tensor. """ return tensor.repeat(1, 600)
[docs]def scatter_act_to_hoi(tensor): """ Scatter action classes to HOI classes. Args: tensor (:obj:`torch.Tensor[N, 117]`): The input tensor. Returns: :obj:`torch.Tensor[N, 600]`: The scattered tensor. """ return tensor[:, HOI_IDX_TO_ACT_IDX]
[docs]def scatter_obj_to_hoi(tensor): """ Scatter object classes to HOI classes. Args: tensor (:obj:`torch.Tensor[N, 80]`): The input tensor. Returns: :obj:`torch.Tensor[N, 600]`: The scattered tensor. """ return tensor[:, HOI_IDX_TO_OBJ_IDX]