From 999a4d23a9cc9d45b849dadb7983b38c6dbf8a77 Mon Sep 17 00:00:00 2001 From: Guanzhong Chen Date: Tue, 19 Dec 2023 16:22:47 +0800 Subject: [PATCH] 1 --- .../cv/detection/ssd/update_ssd_mmdet.diff | 811 ++++++++++++++++++ 1 file changed, 811 insertions(+) create mode 100644 AscendIE/TorchAIE/built-in/cv/detection/ssd/update_ssd_mmdet.diff diff --git a/AscendIE/TorchAIE/built-in/cv/detection/ssd/update_ssd_mmdet.diff b/AscendIE/TorchAIE/built-in/cv/detection/ssd/update_ssd_mmdet.diff new file mode 100644 index 0000000000..d3ceb38444 --- /dev/null +++ b/AscendIE/TorchAIE/built-in/cv/detection/ssd/update_ssd_mmdet.diff @@ -0,0 +1,811 @@ +diff --git a/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py b/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py +index e9eb3579..066e90fe 100644 +--- a/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py ++++ b/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py +@@ -1,3 +1,7 @@ ++# Copyright (c) OpenMMLab. All rights reserved. ++import warnings ++ ++import mmcv + import numpy as np + import torch + +@@ -20,16 +24,25 @@ class DeltaXYWHBBoxCoder(BaseBBoxCoder): + target for delta coordinates + clip_border (bool, optional): Whether clip the objects outside the + border of the image. Defaults to True. ++ add_ctr_clamp (bool): Whether to add center clamp, when added, the ++ predicted box is clamped is its center is too far away from ++ the original anchor's center. Only used by YOLOF. Default False. ++ ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF. ++ Default 32. + """ + + def __init__(self, + target_means=(0., 0., 0., 0.), + target_stds=(1., 1., 1., 1.), +- clip_border=True): ++ clip_border=True, ++ add_ctr_clamp=False, ++ ctr_clamp=32): + super(BaseBBoxCoder, self).__init__() + self.means = target_means + self.stds = target_stds + self.clip_border = clip_border ++ self.add_ctr_clamp = add_ctr_clamp ++ self.ctr_clamp = ctr_clamp + + def encode(self, bboxes, gt_bboxes): + """Get box regression transformation deltas that can be used to +@@ -57,10 +70,16 @@ class DeltaXYWHBBoxCoder(BaseBBoxCoder): + """Apply transformation `pred_bboxes` to `boxes`. + + Args: +- boxes (torch.Tensor): Basic boxes. +- pred_bboxes (torch.Tensor): Encoded boxes with shape +- max_shape (tuple[int], optional): Maximum shape of boxes. +- Defaults to None. ++ bboxes (torch.Tensor): Basic boxes. Shape (B, N, 4) or (N, 4) ++ pred_bboxes (Tensor): Encoded offsets with respect to each roi. ++ Has shape (B, N, num_classes * 4) or (B, N, 4) or ++ (N, num_classes * 4) or (N, 4). Note N = num_anchors * W * H ++ when rois is a grid of anchors.Offset encoding follows [1]_. ++ max_shape (Sequence[int] or torch.Tensor or Sequence[ ++ Sequence[int]],optional): Maximum bounds for boxes, specifies ++ (H, W, C) or (H, W). If bboxes shape is (B, N, 4), then ++ the max_shape should be a Sequence[Sequence[int]] ++ and the length of max_shape should also be B. + wh_ratio_clip (float, optional): The allowed ratio between + width and height. + +@@ -69,8 +88,28 @@ class DeltaXYWHBBoxCoder(BaseBBoxCoder): + """ + + assert pred_bboxes.size(0) == bboxes.size(0) +- decoded_bboxes = delta2bbox(bboxes, pred_bboxes, self.means, self.stds, +- max_shape, wh_ratio_clip, self.clip_border) ++ if pred_bboxes.ndim == 3: ++ assert pred_bboxes.size(1) == bboxes.size(1) ++ ++ if pred_bboxes.ndim == 2 and not True: ++ # single image decode ++ decoded_bboxes = delta2bbox(bboxes, pred_bboxes, self.means, ++ self.stds, max_shape, wh_ratio_clip, ++ self.clip_border, self.add_ctr_clamp, ++ self.ctr_clamp) ++ else: ++ if pred_bboxes.ndim == 3 and not True: ++ warnings.warn( ++ 'DeprecationWarning: onnx_delta2bbox is deprecated ' ++ 'in the case of batch decoding and non-ONNX, ' ++ 'please use “delta2bbox” instead. In order to improve ' ++ 'the decoding speed, the batch function will no ' ++ 'longer be supported. ') ++ decoded_bboxes = onnx_delta2bbox(bboxes, pred_bboxes, self.means, ++ self.stds, max_shape, ++ wh_ratio_clip, self.clip_border, ++ self.add_ctr_clamp, ++ self.ctr_clamp) + + return decoded_bboxes + +@@ -126,7 +165,108 @@ def delta2bbox(rois, + stds=(1., 1., 1., 1.), + max_shape=None, + wh_ratio_clip=16 / 1000, +- clip_border=True): ++ clip_border=True, ++ add_ctr_clamp=False, ++ ctr_clamp=32): ++ """Apply deltas to shift/scale base boxes. ++ ++ Typically the rois are anchor or proposed bounding boxes and the deltas are ++ network outputs used to shift/scale those boxes. ++ This is the inverse function of :func:`bbox2delta`. ++ ++ Args: ++ rois (Tensor): Boxes to be transformed. Has shape (N, 4). ++ deltas (Tensor): Encoded offsets relative to each roi. ++ Has shape (N, num_classes * 4) or (N, 4). Note ++ N = num_base_anchors * W * H, when rois is a grid of ++ anchors. Offset encoding follows [1]_. ++ means (Sequence[float]): Denormalizing means for delta coordinates. ++ Default (0., 0., 0., 0.). ++ stds (Sequence[float]): Denormalizing standard deviation for delta ++ coordinates. Default (1., 1., 1., 1.). ++ max_shape (tuple[int, int]): Maximum bounds for boxes, specifies ++ (H, W). Default None. ++ wh_ratio_clip (float): Maximum aspect ratio for boxes. Default ++ 16 / 1000. ++ clip_border (bool, optional): Whether clip the objects outside the ++ border of the image. Default True. ++ add_ctr_clamp (bool): Whether to add center clamp. When set to True, ++ the center of the prediction bounding box will be clamped to ++ avoid being too far away from the center of the anchor. ++ Only used by YOLOF. Default False. ++ ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF. ++ Default 32. ++ ++ Returns: ++ Tensor: Boxes with shape (N, num_classes * 4) or (N, 4), where 4 ++ represent tl_x, tl_y, br_x, br_y. ++ ++ References: ++ .. [1] https://arxiv.org/abs/1311.2524 ++ ++ Example: ++ >>> rois = torch.Tensor([[ 0., 0., 1., 1.], ++ >>> [ 0., 0., 1., 1.], ++ >>> [ 0., 0., 1., 1.], ++ >>> [ 5., 5., 5., 5.]]) ++ >>> deltas = torch.Tensor([[ 0., 0., 0., 0.], ++ >>> [ 1., 1., 1., 1.], ++ >>> [ 0., 0., 2., -1.], ++ >>> [ 0.7, -1.9, -0.5, 0.3]]) ++ >>> delta2bbox(rois, deltas, max_shape=(32, 32, 3)) ++ tensor([[0.0000, 0.0000, 1.0000, 1.0000], ++ [0.1409, 0.1409, 2.8591, 2.8591], ++ [0.0000, 0.3161, 4.1945, 0.6839], ++ [5.0000, 5.0000, 5.0000, 5.0000]]) ++ """ ++ num_bboxes, num_classes = deltas.size(0), deltas.size(1) // 4 ++ if num_bboxes == 0: ++ return deltas ++ ++ deltas = deltas.reshape(-1, 4) ++ ++ means = deltas.new_tensor(means).view(1, -1) ++ stds = deltas.new_tensor(stds).view(1, -1) ++ denorm_deltas = deltas * stds + means ++ ++ dxy = denorm_deltas[:, :2] ++ dwh = denorm_deltas[:, 2:] ++ ++ # Compute width/height of each roi ++ rois_ = rois.repeat(1, num_classes).reshape(-1, 4) ++ pxy = ((rois_[:, :2] + rois_[:, 2:]) * 0.5) ++ pwh = (rois_[:, 2:] - rois_[:, :2]) ++ ++ dxy_wh = pwh * dxy ++ ++ max_ratio = np.abs(np.log(wh_ratio_clip)) ++ if add_ctr_clamp: ++ dxy_wh = torch.clamp(dxy_wh, max=ctr_clamp, min=-ctr_clamp) ++ dwh = torch.clamp(dwh, max=max_ratio) ++ else: ++ dwh = dwh.clamp(min=-max_ratio, max=max_ratio) ++ ++ gxy = pxy + dxy_wh ++ gwh = pwh * dwh.exp() ++ x1y1 = gxy - (gwh * 0.5) ++ x2y2 = gxy + (gwh * 0.5) ++ bboxes = torch.cat([x1y1, x2y2], dim=-1) ++ if clip_border and max_shape is not None: ++ bboxes[..., 0::2].clamp_(min=0, max=max_shape[1]) ++ bboxes[..., 1::2].clamp_(min=0, max=max_shape[0]) ++ bboxes = bboxes.reshape(num_bboxes, -1) ++ return bboxes ++ ++ ++def onnx_delta2bbox(rois, ++ deltas, ++ means=(0., 0., 0., 0.), ++ stds=(1., 1., 1., 1.), ++ max_shape=None, ++ wh_ratio_clip=16 / 1000, ++ clip_border=True, ++ add_ctr_clamp=False, ++ ctr_clamp=32): + """Apply deltas to shift/scale base boxes. + + Typically the rois are anchor or proposed bounding boxes and the deltas are +@@ -134,21 +274,34 @@ def delta2bbox(rois, + This is the inverse function of :func:`bbox2delta`. + + Args: +- rois (Tensor): Boxes to be transformed. Has shape (N, 4) ++ rois (Tensor): Boxes to be transformed. Has shape (N, 4) or (B, N, 4) + deltas (Tensor): Encoded offsets with respect to each roi. +- Has shape (N, 4 * num_classes). Note N = num_anchors * W * H when +- rois is a grid of anchors. Offset encoding follows [1]_. +- means (Sequence[float]): Denormalizing means for delta coordinates ++ Has shape (B, N, num_classes * 4) or (B, N, 4) or ++ (N, num_classes * 4) or (N, 4). Note N = num_anchors * W * H ++ when rois is a grid of anchors.Offset encoding follows [1]_. ++ means (Sequence[float]): Denormalizing means for delta coordinates. ++ Default (0., 0., 0., 0.). + stds (Sequence[float]): Denormalizing standard deviation for delta +- coordinates +- max_shape (tuple[int, int]): Maximum bounds for boxes. specifies (H, W) ++ coordinates. Default (1., 1., 1., 1.). ++ max_shape (Sequence[int] or torch.Tensor or Sequence[ ++ Sequence[int]],optional): Maximum bounds for boxes, specifies ++ (H, W, C) or (H, W). If rois shape is (B, N, 4), then ++ the max_shape should be a Sequence[Sequence[int]] ++ and the length of max_shape should also be B. Default None. + wh_ratio_clip (float): Maximum aspect ratio for boxes. ++ Default 16 / 1000. + clip_border (bool, optional): Whether clip the objects outside the +- border of the image. Defaults to True. ++ border of the image. Default True. ++ add_ctr_clamp (bool): Whether to add center clamp, when added, the ++ predicted box is clamped is its center is too far away from ++ the original anchor's center. Only used by YOLOF. Default False. ++ ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF. ++ Default 32. + + Returns: +- Tensor: Boxes with shape (N, 4), where columns represent +- tl_x, tl_y, br_x, br_y. ++ Tensor: Boxes with shape (B, N, num_classes * 4) or (B, N, 4) or ++ (N, num_classes * 4) or (N, 4), where 4 represent ++ tl_x, tl_y, br_x, br_y. + + References: + .. [1] https://arxiv.org/abs/1311.2524 +@@ -162,43 +315,76 @@ def delta2bbox(rois, + >>> [ 1., 1., 1., 1.], + >>> [ 0., 0., 2., -1.], + >>> [ 0.7, -1.9, -0.5, 0.3]]) +- >>> delta2bbox(rois, deltas, max_shape=(32, 32)) ++ >>> delta2bbox(rois, deltas, max_shape=(32, 32, 3)) + tensor([[0.0000, 0.0000, 1.0000, 1.0000], + [0.1409, 0.1409, 2.8591, 2.8591], + [0.0000, 0.3161, 4.1945, 0.6839], + [5.0000, 5.0000, 5.0000, 5.0000]]) + """ +- means = deltas.new_tensor(means).view(1, -1).repeat(1, deltas.size(1) // 4) +- stds = deltas.new_tensor(stds).view(1, -1).repeat(1, deltas.size(1) // 4) ++ means = deltas.new_tensor(means).view(1, ++ -1).repeat(1, ++ deltas.size(-1) // 4) ++ stds = deltas.new_tensor(stds).view(1, -1).repeat(1, deltas.size(-1) // 4) + denorm_deltas = deltas * stds + means +- dx = denorm_deltas[:, 0::4] +- dy = denorm_deltas[:, 1::4] +- dw = denorm_deltas[:, 2::4] +- dh = denorm_deltas[:, 3::4] +- max_ratio = np.abs(np.log(wh_ratio_clip)) +- dw = dw.clamp(min=-max_ratio, max=max_ratio) +- dh = dh.clamp(min=-max_ratio, max=max_ratio) ++ dx = denorm_deltas[..., 0::4] ++ dy = denorm_deltas[..., 1::4] ++ dw = denorm_deltas[..., 2::4] ++ dh = denorm_deltas[..., 3::4] ++ ++ x1, y1 = rois[..., 0], rois[..., 1] ++ x2, y2 = rois[..., 2], rois[..., 3] + # Compute center of each roi +- px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx) +- py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy) ++ px = ((x1 + x2) * 0.5).unsqueeze(-1).expand_as(dx) ++ py = ((y1 + y2) * 0.5).unsqueeze(-1).expand_as(dy) + # Compute width/height of each roi +- pw = (rois[:, 2] - rois[:, 0]).unsqueeze(1).expand_as(dw) +- ph = (rois[:, 3] - rois[:, 1]).unsqueeze(1).expand_as(dh) ++ pw = (x2 - x1).unsqueeze(-1).expand_as(dw) ++ ph = (y2 - y1).unsqueeze(-1).expand_as(dh) ++ ++ dx_width = pw * dx ++ dy_height = ph * dy ++ ++ max_ratio = np.abs(np.log(wh_ratio_clip)) ++ if add_ctr_clamp: ++ dx_width = torch.clamp(dx_width, max=ctr_clamp, min=-ctr_clamp) ++ dy_height = torch.clamp(dy_height, max=ctr_clamp, min=-ctr_clamp) ++ dw = torch.clamp(dw, max=max_ratio) ++ dh = torch.clamp(dh, max=max_ratio) ++ else: ++ dw = dw.clamp(min=-max_ratio, max=max_ratio) ++ dh = dh.clamp(min=-max_ratio, max=max_ratio) + # Use exp(network energy) to enlarge/shrink each roi + gw = pw * dw.exp() + gh = ph * dh.exp() + # Use network energy to shift the center of each roi +- gx = px + pw * dx +- gy = py + ph * dy ++ gx = px + dx_width ++ gy = py + dy_height + # Convert center-xy/width/height to top-left, bottom-right + x1 = gx - gw * 0.5 + y1 = gy - gh * 0.5 + x2 = gx + gw * 0.5 + y2 = gy + gh * 0.5 +- if clip_border and max_shape is not None: +- x1 = x1.clamp(min=0, max=max_shape[1]) +- y1 = y1.clamp(min=0, max=max_shape[0]) +- x2 = x2.clamp(min=0, max=max_shape[1]) +- y2 = y2.clamp(min=0, max=max_shape[0]) ++ + bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view(deltas.size()) ++ ++ if clip_border and max_shape is not None: ++ # clip bboxes with dynamic `min` and `max` for onnx ++ if True: ++ from mmdet.core.export.onnx_helper import dynamic_clip_for_onnx ++ x1, y1, x2, y2 = dynamic_clip_for_onnx(x1, y1, x2, y2, max_shape) ++ bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view(deltas.size()) ++ return bboxes ++ if not isinstance(max_shape, torch.Tensor): ++ max_shape = x1.new_tensor(max_shape) ++ max_shape = max_shape[..., :2].type_as(x1) ++ if max_shape.ndim == 2: ++ assert bboxes.ndim == 3 ++ assert max_shape.size(0) == bboxes.size(0) ++ ++ min_xy = x1.new_tensor(0) ++ max_xy = torch.cat( ++ [max_shape] * (deltas.size(-1) // 2), ++ dim=-1).flip(-1).unsqueeze(-2) ++ bboxes = torch.where(bboxes < min_xy, min_xy, bboxes) ++ bboxes = torch.where(bboxes > max_xy, max_xy, bboxes) ++ + return bboxes +diff --git a/mmdet/core/export/pytorch2onnx.py b/mmdet/core/export/pytorch2onnx.py +index 8f9309df..b9f43d48 100644 +--- a/mmdet/core/export/pytorch2onnx.py ++++ b/mmdet/core/export/pytorch2onnx.py +@@ -39,6 +39,7 @@ def generate_inputs_and_wrap_model(config_path, checkpoint_path, input_config): + + model = build_model_from_cfg(config_path, checkpoint_path) + one_img, one_meta = preprocess_example_input(input_config) ++ one_meta['img_shape_for_onnx'] = one_img.shape[-2:] + tensor_data = [one_img] + model.forward = partial( + model.forward, img_metas=[[one_meta]], return_loss=False) +diff --git a/mmdet/core/post_processing/bbox_nms.py b/mmdet/core/post_processing/bbox_nms.py +index 463fe2e4..72ca09d3 100644 +--- a/mmdet/core/post_processing/bbox_nms.py ++++ b/mmdet/core/post_processing/bbox_nms.py +@@ -55,7 +55,7 @@ def multiclass_nms(multi_bboxes, + inds = valid_mask.nonzero(as_tuple=False).squeeze(1) + bboxes, scores, labels = bboxes[inds], scores[inds], labels[inds] + if inds.numel() == 0: +- if torch.onnx.is_in_onnx_export(): ++ if True: + raise RuntimeError('[ONNX Error] Can not record NMS ' + 'as it has not been executed this time') + if return_inds: +diff --git a/mmdet/models/backbones/ssd_vgg.py b/mmdet/models/backbones/ssd_vgg.py +index cbc4fbb2..4bb7e37a 100644 +--- a/mmdet/models/backbones/ssd_vgg.py ++++ b/mmdet/models/backbones/ssd_vgg.py +@@ -162,8 +162,14 @@ class L2Norm(nn.Module): + + def forward(self, x): + """Forward function.""" +- # normalization layer convert to FP32 in FP16 training ++ # # normalization layer convert to FP32 in FP16 training ++ # x_float = x.float() ++ # norm = x_float.pow(2).sum(1, keepdim=True).sqrt() + self.eps ++ # return (self.weight[None, :, None, None].float().expand_as(x_float) * ++ # x_float / norm).type_as(x) ++ + x_float = x.float() +- norm = x_float.pow(2).sum(1, keepdim=True).sqrt() + self.eps ++ x_mul = x_float * x_float ++ norm = x_mul.sum(1, keepdim=True).sqrt() + self.eps + return (self.weight[None, :, None, None].float().expand_as(x_float) * + x_float / norm).type_as(x) +diff --git a/mmdet/models/dense_heads/anchor_head.py b/mmdet/models/dense_heads/anchor_head.py +index a5bb4137..6e0d892e 100644 +--- a/mmdet/models/dense_heads/anchor_head.py ++++ b/mmdet/models/dense_heads/anchor_head.py +@@ -487,6 +487,162 @@ class AnchorHead(BaseDenseHead, BBoxTestMixin): + num_total_samples=num_total_samples) + return dict(loss_cls=losses_cls, loss_bbox=losses_bbox) + ++ @force_fp32(apply_to=('cls_scores', 'bbox_preds')) ++ def onnx_export(self, ++ cls_scores, ++ bbox_preds, ++ score_factors=None, ++ img_metas=None, ++ with_nms=True): ++ """Transform network output for a batch into bbox predictions. ++ ++ Args: ++ cls_scores (list[Tensor]): Box scores for each scale level ++ with shape (N, num_points * num_classes, H, W). ++ bbox_preds (list[Tensor]): Box energies / deltas for each scale ++ level with shape (N, num_points * 4, H, W). ++ score_factors (list[Tensor]): score_factors for each s ++ cale level with shape (N, num_points * 1, H, W). ++ Default: None. ++ img_metas (list[dict]): Meta information of each image, e.g., ++ image size, scaling factor, etc. Default: None. ++ with_nms (bool): Whether apply nms to the bboxes. Default: True. ++ ++ Returns: ++ tuple[Tensor, Tensor] | list[tuple]: When `with_nms` is True, ++ it is tuple[Tensor, Tensor], first tensor bboxes with shape ++ [N, num_det, 5], 5 arrange as (x1, y1, x2, y2, score) ++ and second element is class labels of shape [N, num_det]. ++ When `with_nms` is False, first tensor is bboxes with ++ shape [N, num_det, 4], second tensor is raw score has ++ shape [N, num_det, num_classes]. ++ """ ++ assert len(cls_scores) == len(bbox_preds) ++ ++ num_levels = len(cls_scores) ++ ++ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] ++ ++ mlvl_priors = self.anchor_generator.grid_anchors( ++ featmap_sizes, device=bbox_preds[0].device) ++ ++ mlvl_cls_scores = [cls_scores[i].detach() for i in range(num_levels)] ++ mlvl_bbox_preds = [bbox_preds[i].detach() for i in range(num_levels)] ++ ++ assert len( ++ img_metas ++ ) == 1, 'Only support one input image while in exporting to ONNX' ++ img_shape = torch.tensor( ++ img_metas[0]['img_shape_for_onnx'], ++ dtype=torch.long, ++ device=bbox_preds[0].device) ++ ++ cfg = self.test_cfg ++ assert len(cls_scores) == len(bbox_preds) == len(mlvl_priors) ++ device = cls_scores[0].device ++ batch_size = cls_scores[0].shape[0] ++ # convert to tensor to keep tracing ++ nms_pre_tensor = torch.tensor( ++ cfg.get('nms_pre', -1), device=device, dtype=torch.long) ++ ++ # e.g. Retina, FreeAnchor, etc. ++ if score_factors is None: ++ with_score_factors = False ++ mlvl_score_factor = [None for _ in range(num_levels)] ++ else: ++ # e.g. FCOS, PAA, ATSS, etc. ++ with_score_factors = True ++ mlvl_score_factor = [ ++ score_factors[i].detach() for i in range(num_levels) ++ ] ++ mlvl_score_factors = [] ++ ++ mlvl_batch_bboxes = [] ++ mlvl_scores = [] ++ ++ for cls_score, bbox_pred, score_factors, priors in zip( ++ mlvl_cls_scores, mlvl_bbox_preds, mlvl_score_factor, ++ mlvl_priors): ++ assert cls_score.size()[-2:] == bbox_pred.size()[-2:] ++ ++ scores = cls_score.permute(0, 2, 3, ++ 1).reshape(batch_size, -1, ++ self.cls_out_channels) ++ if self.use_sigmoid_cls: ++ scores = scores.sigmoid() ++ nms_pre_score = scores ++ else: ++ scores = scores.softmax(-1) ++ nms_pre_score = scores ++ ++ if with_score_factors: ++ score_factors = score_factors.permute(0, 2, 3, 1).reshape( ++ batch_size, -1).sigmoid() ++ bbox_pred = bbox_pred.permute(0, 2, 3, ++ 1).reshape(batch_size, -1, 4) ++ priors = priors.expand(batch_size, -1, priors.size(-1)) ++ # Get top-k predictions ++ from mmdet.core.export.onnx_helper import get_k_for_topk ++ nms_pre = get_k_for_topk(nms_pre_tensor, bbox_pred.shape[1]) ++ if nms_pre > 0: ++ ++ if with_score_factors: ++ nms_pre_score = (nms_pre_score * score_factors[..., None]) ++ else: ++ nms_pre_score = nms_pre_score ++ ++ # Get maximum scores for foreground classes. ++ if self.use_sigmoid_cls: ++ max_scores, _ = nms_pre_score.max(-1) ++ else: ++ # remind that we set FG labels to [0, num_class-1] ++ # since mmdet v2.0 ++ # BG cat_id: num_class ++ max_scores, _ = nms_pre_score[..., :-1].max(-1) ++ _, topk_inds = max_scores.topk(nms_pre) ++ ++ batch_inds = torch.arange( ++ batch_size, device=bbox_pred.device).view( ++ -1, 1).expand_as(topk_inds).long() ++ # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501 ++ # transformed_inds = bbox_pred.shape[1] * batch_inds + topk_inds ++ transformed_inds = (bbox_pred.shape[1] * batch_inds).int() + topk_inds.int() ++ transformed_inds = transformed_inds.long() ++ priors = priors.reshape( ++ -1, priors.size(-1))[transformed_inds, :].reshape( ++ batch_size, -1, priors.size(-1)) ++ bbox_pred = bbox_pred.reshape(-1, ++ 4)[transformed_inds, :].reshape( ++ batch_size, -1, 4) ++ scores = scores.reshape( ++ -1, self.cls_out_channels)[transformed_inds, :].reshape( ++ batch_size, -1, self.cls_out_channels) ++ if with_score_factors: ++ score_factors = score_factors.reshape( ++ -1, 1)[transformed_inds].reshape(batch_size, -1) ++ ++ bboxes = self.bbox_coder.decode( ++ priors, bbox_pred, max_shape=img_shape) ++ ++ mlvl_batch_bboxes.append(bboxes) ++ mlvl_scores.append(scores) ++ if with_score_factors: ++ mlvl_score_factors.append(score_factors) ++ ++ batch_bboxes = torch.cat(mlvl_batch_bboxes, dim=1) ++ batch_scores = torch.cat(mlvl_scores, dim=1) ++ if with_score_factors: ++ batch_score_factors = torch.cat(mlvl_score_factors, dim=1) ++ ++ if not self.use_sigmoid_cls: ++ batch_scores = batch_scores[..., :self.num_classes] ++ ++ if with_score_factors: ++ batch_scores = batch_scores * (batch_score_factors.unsqueeze(2)) ++ ++ # directly return bboxes without NMS ++ return batch_bboxes, batch_scores ++ + @force_fp32(apply_to=('cls_scores', 'bbox_preds')) + def get_bboxes(self, + cls_scores, +@@ -545,38 +701,45 @@ class AnchorHead(BaseDenseHead, BBoxTestMixin): + >>> assert det_bboxes.shape[1] == 5 + >>> assert len(det_bboxes) == len(det_labels) == cfg.max_per_img + """ +- assert len(cls_scores) == len(bbox_preds) +- num_levels = len(cls_scores) +- +- device = cls_scores[0].device +- featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)] +- mlvl_anchors = self.anchor_generator.grid_anchors( +- featmap_sizes, device=device) +- +- result_list = [] +- for img_id in range(len(img_metas)): +- cls_score_list = [ +- cls_scores[i][img_id].detach() for i in range(num_levels) +- ] +- bbox_pred_list = [ +- bbox_preds[i][img_id].detach() for i in range(num_levels) +- ] +- img_shape = img_metas[img_id]['img_shape'] +- scale_factor = img_metas[img_id]['scale_factor'] +- if with_nms: +- # some heads don't support with_nms argument +- proposals = self._get_bboxes_single(cls_score_list, +- bbox_pred_list, +- mlvl_anchors, img_shape, +- scale_factor, cfg, rescale) +- else: +- proposals = self._get_bboxes_single(cls_score_list, +- bbox_pred_list, +- mlvl_anchors, img_shape, +- scale_factor, cfg, rescale, +- with_nms) +- result_list.append(proposals) +- return result_list ++ if True: ++ return self.onnx_export(cls_scores, ++ bbox_preds, ++ score_factors=None, ++ img_metas=img_metas, ++ with_nms=with_nms) ++ else: ++ assert len(cls_scores) == len(bbox_preds) ++ num_levels = len(cls_scores) ++ ++ device = cls_scores[0].device ++ featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)] ++ mlvl_anchors = self.anchor_generator.grid_anchors( ++ featmap_sizes, device=device) ++ ++ result_list = [] ++ for img_id in range(len(img_metas)): ++ cls_score_list = [ ++ cls_scores[i][img_id].detach() for i in range(num_levels) ++ ] ++ bbox_pred_list = [ ++ bbox_preds[i][img_id].detach() for i in range(num_levels) ++ ] ++ img_shape = img_metas[img_id]['img_shape'] ++ scale_factor = img_metas[img_id]['scale_factor'] ++ if with_nms: ++ # some heads don't support with_nms argument ++ proposals = self._get_bboxes_single(cls_score_list, ++ bbox_pred_list, ++ mlvl_anchors, img_shape, ++ scale_factor, cfg, rescale) ++ else: ++ proposals = self._get_bboxes_single(cls_score_list, ++ bbox_pred_list, ++ mlvl_anchors, img_shape, ++ scale_factor, cfg, rescale, ++ with_nms) ++ result_list.append(proposals) ++ return result_list + + def _get_bboxes_single(self, + cls_score_list, +@@ -612,6 +775,7 @@ class AnchorHead(BaseDenseHead, BBoxTestMixin): + are bounding box positions (tl_x, tl_y, br_x, br_y) and the + 5-th column is a score between 0 and 1. + """ ++ print('in _get_bboxes_single') + cfg = self.test_cfg if cfg is None else cfg + assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors) + mlvl_bboxes = [] +diff --git a/mmdet/models/dense_heads/yolo_head.py b/mmdet/models/dense_heads/yolo_head.py +index 93d051e7..94e496b5 100644 +--- a/mmdet/models/dense_heads/yolo_head.py ++++ b/mmdet/models/dense_heads/yolo_head.py +@@ -281,7 +281,7 @@ class YOLOV3Head(BaseDenseHead, BBoxTestMixin): + # Get top-k prediction + nms_pre = cfg.get('nms_pre', -1) + if 0 < nms_pre < conf_pred.size(0) and ( +- not torch.onnx.is_in_onnx_export()): ++ not True): + _, topk_inds = conf_pred.topk(nms_pre) + bbox_pred = bbox_pred[topk_inds, :] + cls_pred = cls_pred[topk_inds, :] +diff --git a/mmdet/models/detectors/base.py b/mmdet/models/detectors/base.py +index 7c6d5e96..8bd74238 100644 +--- a/mmdet/models/detectors/base.py ++++ b/mmdet/models/detectors/base.py +@@ -179,6 +179,8 @@ class BaseDetector(nn.Module, metaclass=ABCMeta): + if return_loss: + return self.forward_train(img, img_metas, **kwargs) + else: ++ if not isinstance(img, list): ++ img = [img] + return self.forward_test(img, img_metas, **kwargs) + + def _parse_losses(self, losses): +diff --git a/mmdet/models/detectors/single_stage.py b/mmdet/models/detectors/single_stage.py +index 96c4acac..f5be2641 100644 +--- a/mmdet/models/detectors/single_stage.py ++++ b/mmdet/models/detectors/single_stage.py +@@ -114,7 +114,7 @@ class SingleStageDetector(BaseDetector): + bbox_list = self.bbox_head.get_bboxes( + *outs, img_metas, rescale=rescale) + # skip post-processing when exporting to ONNX +- if torch.onnx.is_in_onnx_export(): ++ if True: + return bbox_list + + bbox_results = [ +diff --git a/mmdet/models/roi_heads/cascade_roi_head.py b/mmdet/models/roi_heads/cascade_roi_head.py +index 45b6f36a..1199a443 100644 +--- a/mmdet/models/roi_heads/cascade_roi_head.py ++++ b/mmdet/models/roi_heads/cascade_roi_head.py +@@ -349,7 +349,7 @@ class CascadeRoIHead(BaseRoIHead, BBoxTestMixin, MaskTestMixin): + det_bboxes.append(det_bbox) + det_labels.append(det_label) + +- if torch.onnx.is_in_onnx_export(): ++ if True: + return det_bboxes, det_labels + bbox_results = [ + bbox2result(det_bboxes[i], det_labels[i], +diff --git a/mmdet/models/roi_heads/mask_heads/fcn_mask_head.py b/mmdet/models/roi_heads/mask_heads/fcn_mask_head.py +index 0cba3cda..d69054b6 100644 +--- a/mmdet/models/roi_heads/mask_heads/fcn_mask_head.py ++++ b/mmdet/models/roi_heads/mask_heads/fcn_mask_head.py +@@ -195,7 +195,7 @@ class FCNMaskHead(nn.Module): + scale_factor = bboxes.new_tensor(scale_factor) + bboxes = bboxes / scale_factor + +- if torch.onnx.is_in_onnx_export(): ++ if True: + # TODO: Remove after F.grid_sample is supported. + from torchvision.models.detection.roi_heads \ + import paste_masks_in_image +@@ -316,7 +316,7 @@ def _do_paste_mask(masks, boxes, img_h, img_w, skip_empty=True): + gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1)) + grid = torch.stack([gx, gy], dim=3) + +- if torch.onnx.is_in_onnx_export(): ++ if True: + raise RuntimeError( + 'Exporting F.grid_sample from Pytorch to ONNX is not supported.') + img_masks = F.grid_sample( +diff --git a/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py b/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py +index c0eebc4a..534b1c9b 100644 +--- a/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py ++++ b/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py +@@ -55,7 +55,7 @@ class SingleRoIExtractor(BaseRoIExtractor): + """Forward function.""" + out_size = self.roi_layers[0].output_size + num_levels = len(feats) +- if torch.onnx.is_in_onnx_export(): ++ if True: + # Work around to export mask-rcnn to onnx + roi_feats = rois[:, :1].clone().detach() + roi_feats = roi_feats.expand( +@@ -82,7 +82,7 @@ class SingleRoIExtractor(BaseRoIExtractor): + mask = target_lvls == i + inds = mask.nonzero(as_tuple=False).squeeze(1) + # TODO: make it nicer when exporting to onnx +- if torch.onnx.is_in_onnx_export(): ++ if True: + # To keep all roi_align nodes exported to onnx + rois_ = rois[inds] + roi_feats_t = self.roi_layers[i](feats[i], rois_) +diff --git a/mmdet/models/roi_heads/standard_roi_head.py b/mmdet/models/roi_heads/standard_roi_head.py +index c530f2a5..85f95e0c 100644 +--- a/mmdet/models/roi_heads/standard_roi_head.py ++++ b/mmdet/models/roi_heads/standard_roi_head.py +@@ -246,7 +246,7 @@ class StandardRoIHead(BaseRoIHead, BBoxTestMixin, MaskTestMixin): + + det_bboxes, det_labels = self.simple_test_bboxes( + x, img_metas, proposal_list, self.test_cfg, rescale=rescale) +- if torch.onnx.is_in_onnx_export(): ++ if True: + if self.with_mask: + segm_results = self.simple_test_mask( + x, img_metas, det_bboxes, det_labels, rescale=rescale) +diff --git a/mmdet/models/roi_heads/test_mixins.py b/mmdet/models/roi_heads/test_mixins.py +index 0e675d6e..ecc08cf6 100644 +--- a/mmdet/models/roi_heads/test_mixins.py ++++ b/mmdet/models/roi_heads/test_mixins.py +@@ -197,7 +197,7 @@ class MaskTestMixin(object): + torch.from_numpy(scale_factor).to(det_bboxes[0].device) + for scale_factor in scale_factors + ] +- if torch.onnx.is_in_onnx_export(): ++ if True: + # avoid mask_pred.split with static number of prediction + mask_preds = [] + _bboxes = [] +diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py +index a8e7487b..97ed2d09 100644 +--- a/tools/pytorch2onnx.py ++++ b/tools/pytorch2onnx.py +@@ -33,23 +33,32 @@ def pytorch2onnx(config_path, + one_img, one_meta = preprocess_example_input(input_config) + model, tensor_data = generate_inputs_and_wrap_model( + config_path, checkpoint_path, input_config) ++ ++ input_names = ['input'] ++ dynamic_axes = {'input': {0: 'batch', 2: 'height', 3: 'width'}} ++ + output_names = ['boxes'] ++ dynamic_axes['boxes'] = {0: 'batch'} + if model.with_bbox: + output_names.append('labels') ++ dynamic_axes['labels'] = {0: 'batch'} + if model.with_mask: + output_names.append('masks') ++ dynamic_axes['masks'] = {0: 'batch'} + + torch.onnx.export( + model, + tensor_data, + output_file, +- input_names=['input'], ++ input_names=input_names, + output_names=output_names, ++ dynamic_axes=dynamic_axes, + export_params=True, + keep_initializers_as_inputs=True, + do_constant_folding=True, + verbose=show, +- opset_version=opset_version) ++ opset_version=opset_version, ++ enable_onnx_checker=False) + + model.forward = orig_model.forward + print(f'Successfully exported ONNX model: {output_file}') +@@ -67,6 +76,7 @@ def pytorch2onnx(config_path, + tensor_data = [one_img] + # check the numerical value + # get pytorch output ++ one_meta['img_shape_for_onnx'] = one_img.shape[-2:] + pytorch_results = model(tensor_data, [[one_meta]], return_loss=False) + pytorch_results = pytorch_results[0] + # get onnx output -- Gitee