From 5add2f3f44199b96fa98b74d9c91924730daafa4 Mon Sep 17 00:00:00 2001
From: Qw1kowa <1741235576@qq.com>
Date: Thu, 30 Jun 2022 21:39:56 +0800
Subject: [PATCH 1/8] =?UTF-8?q?SSDLite320=E9=A6=96=E6=AC=A1=E6=8F=90?=
=?UTF-8?q?=E4=BA=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../cv/classification/SSDLite320/README.md | 61 +
.../cv/classification/SSDLite320/coco_eval.py | 191 ++
.../classification/SSDLite320/coco_utils.py | 249 ++
.../cv/classification/SSDLite320/engine.py | 214 ++
.../SSDLite320/extract_ops_by_step.py | 60 +
.../SSDLite320/group_by_aspect_ratio.py | 196 ++
.../classification/SSDLite320/image_list.py | 25 +
.../cv/classification/SSDLite320/presets.py | 47 +
.../SSDLite320/requirements.txt | 5 +
.../classification/SSDLite320/test/env_npu.sh | 75 +
.../SSDLite320/test/train_eval_1p.sh | 103 +
.../SSDLite320/test/train_full_1p.sh | 101 +
.../SSDLite320/test/train_full_8p.sh | 101 +
.../SSDLite320/test/train_performance_1p.sh | 101 +
.../SSDLite320/test/train_performance_8p.sh | 101 +
.../SSDLite320/torchvision/__init__.py | 74 +
.../torchvision/_internally_replaced_utils.py | 52 +
.../torchvision/datasets/__init__.py | 34 +
.../torchvision/datasets/caltech.py | 206 ++
.../SSDLite320/torchvision/datasets/celeba.py | 158 ++
.../SSDLite320/torchvision/datasets/cifar.py | 162 ++
.../torchvision/datasets/cityscapes.py | 207 ++
.../SSDLite320/torchvision/datasets/coco.py | 123 +
.../torchvision/datasets/fakedata.py | 58 +
.../SSDLite320/torchvision/datasets/flickr.py | 154 ++
.../SSDLite320/torchvision/datasets/folder.py | 207 ++
.../SSDLite320/torchvision/datasets/hmdb51.py | 130 ++
.../torchvision/datasets/imagenet.py | 218 ++
.../torchvision/datasets/kinetics.py | 79 +
.../SSDLite320/torchvision/datasets/lsun.py | 152 ++
.../SSDLite320/torchvision/datasets/mnist.py | 485 ++++
.../torchvision/datasets/omniglot.py | 91 +
.../torchvision/datasets/phototour.py | 209 ++
.../torchvision/datasets/samplers/__init__.py | 3 +
.../datasets/samplers/clip_sampler.py | 174 ++
.../SSDLite320/torchvision/datasets/sbd.py | 124 +
.../SSDLite320/torchvision/datasets/sbu.py | 107 +
.../torchvision/datasets/semeion.py | 84 +
.../SSDLite320/torchvision/datasets/stl10.py | 176 ++
.../SSDLite320/torchvision/datasets/svhn.py | 114 +
.../SSDLite320/torchvision/datasets/ucf101.py | 107 +
.../SSDLite320/torchvision/datasets/usps.py | 84 +
.../SSDLite320/torchvision/datasets/utils.py | 282 +++
.../torchvision/datasets/video_utils.py | 367 +++
.../SSDLite320/torchvision/datasets/vision.py | 80 +
.../SSDLite320/torchvision/datasets/voc.py | 242 ++
.../SSDLite320/torchvision/extension.py | 58 +
.../SSDLite320/torchvision/io/__init__.py | 34 +
.../SSDLite320/torchvision/io/_video_opt.py | 551 +++++
.../SSDLite320/torchvision/io/video.py | 349 +++
.../SSDLite320/torchvision/models/__init__.py | 14 +
.../SSDLite320/torchvision/models/_utils.py | 83 +
.../torchvision/models/_utils_origin.py | 67 +
.../SSDLite320/torchvision/models/alexnet.py | 65 +
.../SSDLite320/torchvision/models/densenet.py | 279 +++
.../torchvision/models/detection/__init__.py | 5 +
.../torchvision/models/detection/_utils.py | 406 ++++
.../models/detection/_utils_origin.py | 348 +++
.../models/detection/anchor_utils.py | 279 +++
.../models/detection/anchor_utils_origin.py | 270 +++
.../models/detection/backbone_utils.py | 212 ++
.../models/detection/backbone_utils_origin.py | 63 +
.../models/detection/faster_rcnn.py | 355 +++
.../models/detection/generalized_rcnn.py | 84 +
.../models/detection/image_list.py | 25 +
.../models/detection/keypoint_rcnn.py | 330 +++
.../torchvision/models/detection/mask_rcnn.py | 323 +++
.../torchvision/models/detection/roi_heads.py | 870 +++++++
.../torchvision/models/detection/rpn.py | 501 ++++
.../torchvision/models/detection/ssd.py | 638 ++++++
.../models/detection/ssd_origin.py | 629 +++++
.../torchvision/models/detection/ssdlite.py | 274 +++
.../torchvision/models/detection/transform.py | 302 +++
.../models/detection/transform_origin.py | 226 ++
.../torchvision/models/googlenet.py | 290 +++
.../torchvision/models/inception.py | 432 ++++
.../SSDLite320/torchvision/models/mnasnet.py | 258 +++
.../torchvision/models/mobilenet.py | 4 +
.../torchvision/models/mobilenet_origin.py | 177 ++
.../torchvision/models/mobilenetv2.py | 211 ++
.../torchvision/models/mobilenetv3.py | 333 +++
.../models/quantization/__init__.py | 5 +
.../models/quantization/googlenet.py | 166 ++
.../models/quantization/inception.py | 222 ++
.../models/quantization/mobilenet.py | 4 +
.../models/quantization/mobilenetv2.py | 102 +
.../models/quantization/mobilenetv3.py | 171 ++
.../torchvision/models/quantization/resnet.py | 174 ++
.../models/quantization/shufflenetv2.py | 154 ++
.../torchvision/models/quantization/utils.py | 40 +
.../SSDLite320/torchvision/models/resnet.py | 353 +++
.../models/segmentation/__init__.py | 3 +
.../torchvision/models/segmentation/_utils.py | 34 +
.../models/segmentation/deeplabv3.py | 94 +
.../torchvision/models/segmentation/fcn.py | 36 +
.../models/segmentation/segmentation.py | 106 +
.../torchvision/models/shufflenetv2.py | 208 ++
.../torchvision/models/squeezenet.py | 137 ++
.../SSDLite320/torchvision/models/utils.py | 4 +
.../SSDLite320/torchvision/models/vgg.py | 183 ++
.../torchvision/models/video/__init__.py | 1 +
.../torchvision/models/video/resnet.py | 341 +++
.../SSDLite320/torchvision/ops/__init__.py | 20 +
.../torchvision/ops/_register_onnx_ops.py | 51 +
.../SSDLite320/torchvision/ops/_utils.py | 63 +
.../torchvision/ops/_utils_origin.py | 38 +
.../SSDLite320/torchvision/ops/boxes.py | 237 ++
.../SSDLite320/torchvision/ops/deform_conv.py | 139 ++
.../ops/feature_pyramid_network.py | 193 ++
.../SSDLite320/torchvision/ops/misc.py | 168 ++
.../SSDLite320/torchvision/ops/misc_origin.py | 153 ++
.../torchvision/ops/new_empty_tensor.py | 16 +
.../SSDLite320/torchvision/ops/poolers.py | 232 ++
.../torchvision/ops/ps_roi_align.py | 68 +
.../SSDLite320/torchvision/ops/ps_roi_pool.py | 59 +
.../SSDLite320/torchvision/ops/roi_align.py | 69 +
.../SSDLite320/torchvision/ops/roi_pool.py | 57 +
.../torchvision/transforms/__init__.py | 1 +
.../transforms/_functional_video.py | 101 +
.../transforms/_transforms_video.py | 173 ++
.../torchvision/transforms/functional.py | 1392 ++++++++++++
.../transforms/functional_origin.py | 906 ++++++++
.../torchvision/transforms/functional_pil.py | 399 ++++
.../transforms/functional_tensor.py | 987 ++++++++
.../transforms/functional_tensor_origin.py | 238 ++
.../torchvision/transforms/transforms.py | 2016 +++++++++++++++++
.../transforms/transforms_origin.py | 1297 +++++++++++
.../SSDLite320/torchvision/utils.py | 309 +++
.../SSDLite320/torchvision/utils_origin.py | 109 +
.../SSDLite320/torchvision/version.py | 5 +
.../cv/classification/SSDLite320/train.py | 281 +++
.../SSDLite320/transform_ssd.py | 302 +++
.../classification/SSDLite320/transforms.py | 286 +++
.../cv/classification/SSDLite320/utils.py | 288 +++
134 files changed, 28909 insertions(+)
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/README.md
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/engine.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/image_list.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/presets.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/requirements.txt
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/test/env_npu.sh
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/__init__.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/__init__.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cityscapes.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/__init__.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/__init__.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/__init__.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/__init__.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/__init__.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/__init__.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/__init__.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/__init__.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/__init__.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/train.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/transforms.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/utils.py
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/README.md b/PyTorch/contrib/cv/classification/SSDLite320/README.md
new file mode 100644
index 0000000000..b53891e1c8
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/README.md
@@ -0,0 +1,61 @@
+# SSDLite320
+
+SSDlite320 模型在 COCO2017 数据集上的实现,主要修改自 [pytorch/vision/tree/main/references/detection]([github.com](https://github.com/pytorch/vision/tree/main/references/detection)) 源码
+
+## 环境准备
+
+- 安装 Pytorch 和混合精度训练工具 Apex
+- 安装依赖 `pip install -r requirements.txt`
+- 下载 COCO2017 数据集
+
+## torchvision环境替换
+
+将当前路径下torchvision目录替换Ancona环境中的torchvision(固定shape及适配torch1.5),具体操作如下:
+
+```shell
+cp -rf torchvision ~/archiconda3/envs/xxx/lib/python3.7/site-packages/torchvision
+```
+
+注:`~/archiconda3/envs/xxx/lib/python3.7/site-packages`为`xxx`环境下模块下载地址
+
+## 训练
+
+训练阶段,脚本调用 `train.py` 进行训练
+
+```bash
+# 1p train perf
+bash test/train_performance_1p.sh --data_path=/opt/npu/dataset/coco/
+
+# 8p train perf
+bash test/train_performance_8p.sh --data_path=/opt/npu/dataset/coco/
+
+# 8p train full
+bash test/train_full_8p.sh --data_path=/opt/npu/dataset/coco/
+
+# 1p train full
+bash test/train_full_1p.sh --data_path=/opt/npu/dataset/coco/
+
+# 1p eval
+bash test/train_eval_8p.sh --data_path=/opt/npu/dataset/coco/ --model_path=/eval_model.pth
+```
+
+注: 可以通过修改 `--data_path` 来指定数据集文件夹的位置,例如,你的数据集地址为:`/opt/npu/dataset/coco/`, 可设置 `--data_path=/opt/npu/dataset/coco/`
+
+Log Path:
+
+- train_perf_1p.log # 1p 训练下性能测试日志
+- train_perf_8p.log # 8p 训练下性能测试日志
+- train_full_1p.log # 1p 完整训练下性能和精度测试日志
+- train_full_8p.log # 8p 完整训练下性能和精度测试日志
+- train_eval_1p.log # 1p 测试模型验证集精度日志
+
+## SSDlite 训练结果
+
+| top1 acc | FPS | Epochs | AMP_Type | Device |
+| :------: | :---: | :----: | :------: | :----: |
+| - | 10.8 | 1 | O1 | 1p Npu |
+| ? | 100.8 | 660 | O1 | 8p Npu |
+| - | 54.7 | 1 | - | 1p Gpu |
+| 20.4 | 387.2 | 660 | - | 8p Gpu |
+
+注:源仓库模型测试为 21.3 (660 epochs)
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py b/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py
new file mode 100644
index 0000000000..ec0709c5d9
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py
@@ -0,0 +1,191 @@
+import copy
+import io
+from contextlib import redirect_stdout
+
+import numpy as np
+import pycocotools.mask as mask_util
+import torch
+import utils
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+
+
+class CocoEvaluator:
+ def __init__(self, coco_gt, iou_types):
+ assert isinstance(iou_types, (list, tuple))
+ coco_gt = copy.deepcopy(coco_gt)
+ self.coco_gt = coco_gt
+
+ self.iou_types = iou_types
+ self.coco_eval = {}
+ for iou_type in iou_types:
+ self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
+
+ self.img_ids = []
+ self.eval_imgs = {k: [] for k in iou_types}
+
+ def update(self, predictions):
+ img_ids = list(np.unique(list(predictions.keys())))
+ self.img_ids.extend(img_ids)
+
+ for iou_type in self.iou_types:
+ results = self.prepare(predictions, iou_type)
+ with redirect_stdout(io.StringIO()):
+ coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO()
+ coco_eval = self.coco_eval[iou_type]
+
+ coco_eval.cocoDt = coco_dt
+ coco_eval.params.imgIds = list(img_ids)
+ img_ids, eval_imgs = evaluate(coco_eval)
+
+ self.eval_imgs[iou_type].append(eval_imgs)
+
+ def synchronize_between_processes(self):
+ for iou_type in self.iou_types:
+ self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
+ create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
+
+ def accumulate(self):
+ for coco_eval in self.coco_eval.values():
+ coco_eval.accumulate()
+
+ def summarize(self):
+ for iou_type, coco_eval in self.coco_eval.items():
+ print(f"IoU metric: {iou_type}")
+ coco_eval.summarize()
+
+ def prepare(self, predictions, iou_type):
+ if iou_type == "bbox":
+ return self.prepare_for_coco_detection(predictions)
+ if iou_type == "segm":
+ return self.prepare_for_coco_segmentation(predictions)
+ if iou_type == "keypoints":
+ return self.prepare_for_coco_keypoint(predictions)
+ raise ValueError(f"Unknown iou type {iou_type}")
+
+ def prepare_for_coco_detection(self, predictions):
+ coco_results = []
+ for original_id, prediction in predictions.items():
+ if len(prediction) == 0:
+ continue
+
+ boxes = prediction["boxes"]
+ boxes = convert_to_xywh(boxes).tolist()
+ scores = prediction["scores"].tolist()
+ labels = prediction["labels"].tolist()
+
+ coco_results.extend(
+ [
+ {
+ "image_id": original_id,
+ "category_id": labels[k],
+ "bbox": box,
+ "score": scores[k],
+ }
+ for k, box in enumerate(boxes)
+ ]
+ )
+ return coco_results
+
+ def prepare_for_coco_segmentation(self, predictions):
+ coco_results = []
+ for original_id, prediction in predictions.items():
+ if len(prediction) == 0:
+ continue
+
+ scores = prediction["scores"]
+ labels = prediction["labels"]
+ masks = prediction["masks"]
+
+ masks = masks > 0.5
+
+ scores = prediction["scores"].tolist()
+ labels = prediction["labels"].tolist()
+
+ rles = [
+ mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] for mask in masks
+ ]
+ for rle in rles:
+ rle["counts"] = rle["counts"].decode("utf-8")
+
+ coco_results.extend(
+ [
+ {
+ "image_id": original_id,
+ "category_id": labels[k],
+ "segmentation": rle,
+ "score": scores[k],
+ }
+ for k, rle in enumerate(rles)
+ ]
+ )
+ return coco_results
+
+ def prepare_for_coco_keypoint(self, predictions):
+ coco_results = []
+ for original_id, prediction in predictions.items():
+ if len(prediction) == 0:
+ continue
+
+ boxes = prediction["boxes"]
+ boxes = convert_to_xywh(boxes).tolist()
+ scores = prediction["scores"].tolist()
+ labels = prediction["labels"].tolist()
+ keypoints = prediction["keypoints"]
+ keypoints = keypoints.flatten(start_dim=1).tolist()
+
+ coco_results.extend(
+ [
+ {
+ "image_id": original_id,
+ "category_id": labels[k],
+ "keypoints": keypoint,
+ "score": scores[k],
+ }
+ for k, keypoint in enumerate(keypoints)
+ ]
+ )
+ return coco_results
+
+
+def convert_to_xywh(boxes):
+ xmin, ymin, xmax, ymax = boxes.unbind(1)
+ return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
+
+
+def merge(img_ids, eval_imgs):
+ all_img_ids = utils.all_gather(img_ids)
+ all_eval_imgs = utils.all_gather(eval_imgs)
+
+ merged_img_ids = []
+ for p in all_img_ids:
+ merged_img_ids.extend(p)
+
+ merged_eval_imgs = []
+ for p in all_eval_imgs:
+ merged_eval_imgs.append(p)
+
+ merged_img_ids = np.array(merged_img_ids)
+ merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
+
+ # keep only unique (and in sorted order) images
+ merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
+ merged_eval_imgs = merged_eval_imgs[..., idx]
+
+ return merged_img_ids, merged_eval_imgs
+
+
+def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
+ img_ids, eval_imgs = merge(img_ids, eval_imgs)
+ img_ids = list(img_ids)
+ eval_imgs = list(eval_imgs.flatten())
+
+ coco_eval.evalImgs = eval_imgs
+ coco_eval.params.imgIds = img_ids
+ coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
+
+
+def evaluate(imgs):
+ with redirect_stdout(io.StringIO()):
+ imgs.evaluate()
+ return imgs.params.imgIds, np.asarray(imgs.evalImgs).reshape(-1, len(imgs.params.areaRng), len(imgs.params.imgIds))
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py
new file mode 100644
index 0000000000..a656602865
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py
@@ -0,0 +1,249 @@
+import copy
+import os
+
+import torch
+import torch.utils.data
+import torchvision
+import transforms as T
+from pycocotools import mask as coco_mask
+from pycocotools.coco import COCO
+
+
+class FilterAndRemapCocoCategories:
+ def __init__(self, categories, remap=True):
+ self.categories = categories
+ self.remap = remap
+
+ def __call__(self, image, target):
+ anno = target["annotations"]
+ anno = [obj for obj in anno if obj["category_id"] in self.categories]
+ if not self.remap:
+ target["annotations"] = anno
+ return image, target
+ anno = copy.deepcopy(anno)
+ for obj in anno:
+ obj["category_id"] = self.categories.index(obj["category_id"])
+ target["annotations"] = anno
+ return image, target
+
+
+def convert_coco_poly_to_mask(segmentations, height, width):
+ masks = []
+ for polygons in segmentations:
+ rles = coco_mask.frPyObjects(polygons, height, width)
+ mask = coco_mask.decode(rles)
+ if len(mask.shape) < 3:
+ mask = mask[..., None]
+ mask = torch.as_tensor(mask, dtype=torch.uint8)
+ mask = mask.any(dim=2)
+ masks.append(mask)
+ if masks:
+ masks = torch.stack(masks, dim=0)
+ else:
+ masks = torch.zeros((0, height, width), dtype=torch.uint8)
+ return masks
+
+
+class ConvertCocoPolysToMask:
+ def __call__(self, image, target):
+ w, h = image.size
+
+ image_id = target["image_id"]
+ image_id = torch.tensor([image_id])
+
+ anno = target["annotations"]
+
+ anno = [obj for obj in anno if obj["iscrowd"] == 0]
+
+ boxes = [obj["bbox"] for obj in anno]
+ # guard against no boxes via resizing
+ boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
+ boxes[:, 2:] += boxes[:, :2]
+ boxes[:, 0::2].clamp_(min=0, max=w)
+ boxes[:, 1::2].clamp_(min=0, max=h)
+
+ classes = [obj["category_id"] for obj in anno]
+ classes = torch.tensor(classes, dtype=torch.int64)
+
+ segmentations = [obj["segmentation"] for obj in anno]
+ masks = convert_coco_poly_to_mask(segmentations, h, w)
+
+ keypoints = None
+ if anno and "keypoints" in anno[0]:
+ keypoints = [obj["keypoints"] for obj in anno]
+ keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
+ num_keypoints = keypoints.shape[0]
+ if num_keypoints:
+ keypoints = keypoints.view(num_keypoints, -1, 3)
+
+ keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
+ boxes = boxes[keep]
+ classes = classes[keep]
+ masks = masks[keep]
+ if keypoints is not None:
+ keypoints = keypoints[keep]
+
+ target = {}
+ target["boxes"] = boxes
+ target["labels"] = classes
+ target["masks"] = masks
+ target["image_id"] = image_id
+ if keypoints is not None:
+ target["keypoints"] = keypoints
+
+ # for conversion to coco api
+ area = torch.tensor([obj["area"] for obj in anno])
+ iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
+ target["area"] = area
+ target["iscrowd"] = iscrowd
+
+ return image, target
+
+
+def _coco_remove_images_without_annotations(dataset, cat_list=None):
+ def _has_only_empty_bbox(anno):
+ return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
+
+ def _count_visible_keypoints(anno):
+ return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
+
+ min_keypoints_per_image = 10
+
+ def _has_valid_annotation(anno):
+ # if it's empty, there is no annotation
+ if len(anno) == 0:
+ return False
+ # if all boxes have close to zero area, there is no annotation
+ if _has_only_empty_bbox(anno):
+ return False
+ # keypoints task have a slight different critera for considering
+ # if an annotation is valid
+ if "keypoints" not in anno[0]:
+ return True
+ # for keypoint detection tasks, only consider valid images those
+ # containing at least min_keypoints_per_image
+ if _count_visible_keypoints(anno) >= min_keypoints_per_image:
+ return True
+ return False
+
+ assert isinstance(dataset, torchvision.datasets.CocoDetection)
+ ids = []
+ for ds_idx, img_id in enumerate(dataset.ids):
+ ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
+ anno = dataset.coco.loadAnns(ann_ids)
+ if cat_list:
+ anno = [obj for obj in anno if obj["category_id"] in cat_list]
+ if _has_valid_annotation(anno):
+ ids.append(ds_idx)
+
+ dataset = torch.utils.data.Subset(dataset, ids)
+ return dataset
+
+
+def convert_to_coco_api(ds):
+ coco_ds = COCO()
+ # annotation IDs need to start at 1, not 0, see torchvision issue #1530
+ ann_id = 1
+ dataset = {"images": [], "categories": [], "annotations": []}
+ categories = set()
+ for img_idx in range(len(ds)):
+ # find better way to get target
+ # targets = ds.get_annotations(img_idx)
+ img, targets = ds[img_idx]
+ image_id = targets["image_id"].item()
+ img_dict = {}
+ img_dict["id"] = image_id
+ img_dict["height"] = img.shape[-2]
+ img_dict["width"] = img.shape[-1]
+ dataset["images"].append(img_dict)
+ bboxes = targets["boxes"]
+ bboxes[:, 2:] -= bboxes[:, :2]
+ bboxes = bboxes.tolist()
+ labels = targets["labels"].tolist()
+ areas = targets["area"].tolist()
+ iscrowd = targets["iscrowd"].tolist()
+ if "masks" in targets:
+ masks = targets["masks"]
+ # make masks Fortran contiguous for coco_mask
+ masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
+ if "keypoints" in targets:
+ keypoints = targets["keypoints"]
+ keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
+ num_objs = len(bboxes)
+ for i in range(num_objs):
+ ann = {}
+ ann["image_id"] = image_id
+ ann["bbox"] = bboxes[i]
+ ann["category_id"] = labels[i]
+ categories.add(labels[i])
+ ann["area"] = areas[i]
+ ann["iscrowd"] = iscrowd[i]
+ ann["id"] = ann_id
+ if "masks" in targets:
+ ann["segmentation"] = coco_mask.encode(masks[i].numpy())
+ if "keypoints" in targets:
+ ann["keypoints"] = keypoints[i]
+ ann["num_keypoints"] = sum(k != 0 for k in keypoints[i][2::3])
+ dataset["annotations"].append(ann)
+ ann_id += 1
+ dataset["categories"] = [{"id": i} for i in sorted(categories)]
+ coco_ds.dataset = dataset
+ coco_ds.createIndex()
+ return coco_ds
+
+
+def get_coco_api_from_dataset(dataset):
+ for _ in range(10):
+ if isinstance(dataset, torchvision.datasets.CocoDetection):
+ break
+ if isinstance(dataset, torch.utils.data.Subset):
+ dataset = dataset.dataset
+ if isinstance(dataset, torchvision.datasets.CocoDetection):
+ return dataset.coco
+ return convert_to_coco_api(dataset)
+
+
+class CocoDetection(torchvision.datasets.CocoDetection):
+ def __init__(self, img_folder, ann_file, transforms):
+ super().__init__(img_folder, ann_file)
+ self._transforms = transforms
+
+ def __getitem__(self, idx):
+ img, target = super().__getitem__(idx)
+ image_id = self.ids[idx]
+ target = dict(image_id=image_id, annotations=target)
+ if self._transforms is not None:
+ img, target = self._transforms(img, target)
+ return img, target
+
+
+def get_coco(root, image_set, transforms, mode="instances"):
+ anno_file_template = "{}_{}2017.json"
+ PATHS = {
+ "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
+ "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
+ # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
+ }
+
+ t = [ConvertCocoPolysToMask()]
+
+ if transforms is not None:
+ t.append(transforms)
+ transforms = T.Compose(t)
+
+ img_folder, ann_file = PATHS[image_set]
+ img_folder = os.path.join(root, img_folder)
+ ann_file = os.path.join(root, ann_file)
+
+ dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
+
+ if image_set == "train":
+ dataset = _coco_remove_images_without_annotations(dataset)
+
+ # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
+
+ return dataset
+
+
+def get_coco_kp(root, image_set, transforms):
+ return get_coco(root, image_set, transforms, mode="person_keypoints")
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/engine.py b/PyTorch/contrib/cv/classification/SSDLite320/engine.py
new file mode 100644
index 0000000000..33f4897eb5
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/engine.py
@@ -0,0 +1,214 @@
+import math
+import sys
+import time
+import datetime
+import pdb
+import torch
+import torch.nn.functional as F
+import numpy as np
+import torchvision.models.detection.mask_rcnn
+import utils
+from coco_eval import CocoEvaluator
+from coco_utils import get_coco_api_from_dataset
+from transform_ssd import GeneralizedRCNNTransform
+import os
+from apex import amp
+
+def get_ops(images, targets):
+ # 提取算子
+ with torch.autograd.profiler.profile(record_shapes=True, use_cuda=True) as prof:
+ loss_dict = model(images, targets)
+ losses = sum(loss for loss in loss_dict.values())
+ # reduce losses over all GPUs for logging purposes
+ loss_dict_reduced = utils.reduce_dict(loss_dict)
+ losses_reduced = sum(loss for loss in loss_dict_reduced.values())
+ loss_value = losses_reduced.item()
+ if not math.isfinite(loss_value):
+ print(f"Loss is {loss_value}, stopping training")
+ print(loss_dict_reduced)
+ sys.exit(1)
+ optimizer.zero_grad()
+ losses.backward()
+ optimizer.step()
+ if lr_scheduler is not None:
+ lr_scheduler.step()
+ print(prof.table(row_limit=200000))
+
+
+def fix_input_target(images, targets):
+ '''
+ 固定input和targets
+ '''
+ # 固定input image
+ batch_shape = (3, 1024, 1024)
+ # 填充值
+ pad_value = 0
+ # 固定ground_box数量
+ max_boxes = 20
+ classes = 91
+
+ # images = list(image.to(device) for image in images)
+ # len(images): 24 bs
+ # iages[0].shape: torch.Size([3, 207, 281])
+ images_pad = []
+ for image in images:
+ image = image.to(device)
+ padding_size = [0, batch_shape[-1] - image.shape[-1],
+ 0, batch_shape[-2] - image.shape[-2]]
+ padding_size = [0, batch_shape[-1] - image.shape[-1],
+ 0, batch_shape[-2] - image.shape[-2]]
+ image = F.pad(image, padding_size, value=pad_value)
+ images_pad.append(image)
+ images = images_pad
+ # targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+ targets_pad = []
+ for target in targets:
+ boxes_num = target['boxes'].shape[0]
+ if boxes_num < max_boxes:
+ diff_num = max_boxes - boxes_num
+ # box对齐
+ target['boxes'] = torch.cat([target['boxes'], torch.zeros([diff_num, 4])], dim=0)
+ # label对齐
+ padding_label = np.zeros(diff_num) + classes
+ target['labels'] = torch.cat([target['labels'], torch.from_numpy(padding_label).long()], dim=0)
+ # mask对齐
+ # padding_mask = torch.zeros(diff_num, target['masks'].shape[1], target['masks'].shape[2])
+ padding_mask = target['masks'][0].unsqueeze(0)
+ target['masks'] = torch.cat([target['masks'], padding_mask], dim=0)
+ # area对齐
+ padding_area = torch.zeros(diff_num)
+ target['area'] = torch.cat([target['area'], padding_area], dim=0)
+ # iscrowd对齐
+ padding_iscrowd = torch.zeros(diff_num)
+ target['iscrowd'] = torch.cat([target['iscrowd'], padding_iscrowd.long()], dim=0)
+ else:
+ select_idx = torch.randperm(boxes_num)[:max_boxes]
+ target['boxes'] = target['boxes'][select_idx]
+ target['labels'] = target['labels'][select_idx]
+ target['masks'] = target['masks'][select_idx]
+ target['area'] = target['area'][select_idx]
+ target['iscrowd'] = target['iscrowd'][select_idx]
+ target['boxes'] = target['boxes'].to(device)
+ target['labels'] = target['labels'].to(device)
+ target['masks'] = target['masks'].to(device)
+ target['image_id'] = target['image_id'].to(device)
+ target['area'] = target['area'].to(device)
+ target['iscrowd'] = target['iscrowd'].to(device)
+ return images, targets
+
+
+def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
+ model.train()
+ metric_logger = utils.MetricLogger(delimiter=" ")
+ metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
+ header = f"Epoch: [{epoch}]"
+
+ lr_scheduler = None
+
+ for images, targets in metric_logger.log_every(data_loader, print_freq, header):
+ '''
+ 调用ssd原生transform
+ '''
+ size = [320, 320]
+ image_mean = [0.485, 0.456, 0.406]
+ image_std = [0.229, 0.224, 0.225]
+ transform_ssd = GeneralizedRCNNTransform(
+ device, min(size), max(size), image_mean, image_std, size_divisible=1, fixed_size=size
+ )
+ images, targets = transform_ssd(images, targets)
+
+ targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+
+ # torch.npu.global_step_inc()
+ loss_dict = model(images, targets)
+ losses = sum(loss for loss in loss_dict.values())
+ # reduce losses over all GPUs for logging purposes
+ loss_dict_reduced = utils.reduce_dict(loss_dict)
+ losses_reduced = sum(loss for loss in loss_dict_reduced.values())
+ loss_value = losses_reduced.item()
+ if not math.isfinite(loss_value):
+ print(f"Loss is {loss_value}, stopping training")
+ print(loss_dict_reduced)
+ sys.exit(1)
+ optimizer.zero_grad()
+ with amp.scale_loss(losses, optimizer) as scaled_loss:
+ scaled_loss.backward()
+ optimizer.step()
+
+ if lr_scheduler is not None:
+ lr_scheduler.step()
+
+ metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
+ metric_logger.update(lr=optimizer.param_groups[0]["lr"])
+
+ return metric_logger
+
+
+def _get_iou_types(model):
+ model_without_ddp = model
+ if isinstance(model, torch.nn.parallel.DistributedDataParallel):
+ model_without_ddp = model.module
+ iou_types = ["bbox"]
+ if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
+ iou_types.append("segm")
+ if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
+ iou_types.append("keypoints")
+ return iou_types
+
+
+# @torch.inference_mode()
+def evaluate(model, data_loader, device):
+ n_threads = torch.get_num_threads()
+ # FIXME remove this and make paste_masks_in_image run on the GPU
+ torch.set_num_threads(1)
+ cpu_device = torch.device("cpu")
+ model.eval()
+ metric_logger = utils.MetricLogger(delimiter=" ")
+ header = "Test:"
+
+ coco = get_coco_api_from_dataset(data_loader.dataset)
+ iou_types = _get_iou_types(model)
+ coco_evaluator = CocoEvaluator(coco, iou_types)
+
+ for images, targets in metric_logger.log_every(data_loader, 1, header):
+ # get the original image sizes
+ original_image_sizes: List[Tuple[int, int]] = []
+ for img in images:
+ val = img.shape[-2:]
+ assert len(val) == 2
+ original_image_sizes.append((val[0], val[1]))
+ '''
+ 调用ssd原生transform
+ '''
+ size = [320, 320]
+ image_mean = [0.485, 0.456, 0.406]
+ image_std = [0.229, 0.224, 0.225]
+ transform_ssd = GeneralizedRCNNTransform(
+ device, min(size), max(size), image_mean, image_std, size_divisible=1, fixed_size=size
+ )
+ images, _ = transform_ssd(images, None)
+ if torch.npu.is_available():
+ torch.npu.synchronize()
+ model_time = time.time()
+
+ outputs = model(images, original_image_sizes)
+
+ outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
+ model_time = time.time() - model_time
+
+ res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
+ evaluator_time = time.time()
+ coco_evaluator.update(res)
+ evaluator_time = time.time() - evaluator_time
+ metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
+
+ # gather the stats from all processes
+ metric_logger.synchronize_between_processes()
+ print("Averaged stats:", metric_logger)
+ coco_evaluator.synchronize_between_processes()
+
+ # accumulate predictions from all images
+ coco_evaluator.accumulate()
+ coco_evaluator.summarize()
+ torch.set_num_threads(n_threads)
+ return coco_evaluator
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py b/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py
new file mode 100644
index 0000000000..6a499a143f
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py
@@ -0,0 +1,60 @@
+import re
+from collections import defaultdict
+import argparse
+
+def dump_file(ops, file_path):
+ with open(file_path, 'w') as f:
+ for op in ops:
+ f.write(op)
+ f.write('\n')
+
+def parse_profiler(profiler_file):
+ ops_shapes = defaultdict(set)
+ ops_shapes_first_step = defaultdict(set)
+ ops_shapes_other_steps = defaultdict(set)
+ with open(profiler_file, 'r') as f:
+ lines = f.readlines()
+ step = 0
+ for line in lines:
+ if re.findall(r'^Name.*Input Shapes$', line.strip()):
+ step += 1
+ continue
+ if step == 0:
+ continue
+
+ if -1 == line.find('[[') and -1 == line.find('[]'):
+ continue
+
+ line_fields = [field.strip() for field in line.strip().split(' ') if field != '']
+ ops_shapes[line_fields[0]].add(line_fields[-1])
+ if step == 1:
+ ops_shapes_first_step[line_fields[0]].add(line_fields[-1])
+ else:
+ ops_shapes_other_steps[line_fields[0]].add(line_fields[-1])
+
+ all_ops = [k for k, v in ops_shapes.items()]
+
+ dynamic_ops = list()
+ for op_name, shape_set in ops_shapes_other_steps.items():
+ if op_name not in ops_shapes_first_step.keys():
+ dynamic_ops.append(op_name)
+ else:
+ if len(shape_set - ops_shapes_first_step[op_name]) > 0:
+ dynamic_ops.append(op_name)
+ return all_ops, dynamic_ops
+
+def extract_ops(profiler_file):
+ all_ops, dynamic_ops = parse_profiler(profiler_file)
+
+ print('all_ops:', all_ops)
+ print('dynamic_ops', dynamic_ops)
+
+ dump_file(all_ops, 'all_ops.txt')
+ dump_file(dynamic_ops, 'dynamic_ops.txt')
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser('extract ops')
+ parser.add_argument('--profiler_file', default='', type=str, metavar='PATH')
+
+ args = parser.parse_args()
+ extract_ops(args.profiler_file)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py b/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py
new file mode 100644
index 0000000000..1323849a6a
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py
@@ -0,0 +1,196 @@
+import bisect
+import copy
+import math
+from collections import defaultdict
+from itertools import repeat, chain
+
+import numpy as np
+import torch
+import torch.utils.data
+import torchvision
+from PIL import Image
+from torch.utils.data.sampler import BatchSampler, Sampler
+from torch.utils.model_zoo import tqdm
+
+
+def _repeat_to_at_least(iterable, n):
+ repeat_times = math.ceil(n / len(iterable))
+ repeated = chain.from_iterable(repeat(iterable, repeat_times))
+ return list(repeated)
+
+
+class GroupedBatchSampler(BatchSampler):
+ """
+ Wraps another sampler to yield a mini-batch of indices.
+ It enforces that the batch only contain elements from the same group.
+ It also tries to provide mini-batches which follows an ordering which is
+ as close as possible to the ordering from the original sampler.
+ Args:
+ sampler (Sampler): Base sampler.
+ group_ids (list[int]): If the sampler produces indices in range [0, N),
+ `group_ids` must be a list of `N` ints which contains the group id of each sample.
+ The group ids must be a continuous set of integers starting from
+ 0, i.e. they must be in the range [0, num_groups).
+ batch_size (int): Size of mini-batch.
+ """
+
+ def __init__(self, sampler, group_ids, batch_size):
+ if not isinstance(sampler, Sampler):
+ raise ValueError(f"sampler should be an instance of torch.utils.data.Sampler, but got sampler={sampler}")
+ self.sampler = sampler
+ self.group_ids = group_ids
+ self.batch_size = batch_size
+
+ def __iter__(self):
+ buffer_per_group = defaultdict(list)
+ samples_per_group = defaultdict(list)
+
+ num_batches = 0
+ for idx in self.sampler:
+ group_id = self.group_ids[idx]
+ buffer_per_group[group_id].append(idx)
+ samples_per_group[group_id].append(idx)
+ if len(buffer_per_group[group_id]) == self.batch_size:
+ yield buffer_per_group[group_id]
+ num_batches += 1
+ del buffer_per_group[group_id]
+ assert len(buffer_per_group[group_id]) < self.batch_size
+
+ # now we have run out of elements that satisfy
+ # the group criteria, let's return the remaining
+ # elements so that the size of the sampler is
+ # deterministic
+ expected_num_batches = len(self)
+ num_remaining = expected_num_batches - num_batches
+ if num_remaining > 0:
+ # for the remaining batches, take first the buffers with largest number
+ # of elements
+ for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True):
+ remaining = self.batch_size - len(buffer_per_group[group_id])
+ samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining)
+ buffer_per_group[group_id].extend(samples_from_group_id[:remaining])
+ assert len(buffer_per_group[group_id]) == self.batch_size
+ yield buffer_per_group[group_id]
+ num_remaining -= 1
+ if num_remaining == 0:
+ break
+ assert num_remaining == 0
+
+ def __len__(self):
+ return len(self.sampler) // self.batch_size
+
+
+def _compute_aspect_ratios_slow(dataset, indices=None):
+ print(
+ "Your dataset doesn't support the fast path for "
+ "computing the aspect ratios, so will iterate over "
+ "the full dataset and load every image instead. "
+ "This might take some time..."
+ )
+ if indices is None:
+ indices = range(len(dataset))
+
+ class SubsetSampler(Sampler):
+ def __init__(self, indices):
+ self.indices = indices
+
+ def __iter__(self):
+ return iter(self.indices)
+
+ def __len__(self):
+ return len(self.indices)
+
+ sampler = SubsetSampler(indices)
+ data_loader = torch.utils.data.DataLoader(
+ dataset,
+ batch_size=1,
+ sampler=sampler,
+ num_workers=14, # you might want to increase it for faster processing
+ collate_fn=lambda x: x[0],
+ )
+ aspect_ratios = []
+ with tqdm(total=len(dataset)) as pbar:
+ for _i, (img, _) in enumerate(data_loader):
+ pbar.update(1)
+ height, width = img.shape[-2:]
+ aspect_ratio = float(width) / float(height)
+ aspect_ratios.append(aspect_ratio)
+ return aspect_ratios
+
+
+def _compute_aspect_ratios_custom_dataset(dataset, indices=None):
+ if indices is None:
+ indices = range(len(dataset))
+ aspect_ratios = []
+ for i in indices:
+ height, width = dataset.get_height_and_width(i)
+ aspect_ratio = float(width) / float(height)
+ aspect_ratios.append(aspect_ratio)
+ return aspect_ratios
+
+
+def _compute_aspect_ratios_coco_dataset(dataset, indices=None):
+ if indices is None:
+ indices = range(len(dataset))
+ aspect_ratios = []
+ for i in indices:
+ img_info = dataset.coco.imgs[dataset.ids[i]]
+ aspect_ratio = float(img_info["width"]) / float(img_info["height"])
+ aspect_ratios.append(aspect_ratio)
+ return aspect_ratios
+
+
+def _compute_aspect_ratios_voc_dataset(dataset, indices=None):
+ if indices is None:
+ indices = range(len(dataset))
+ aspect_ratios = []
+ for i in indices:
+ # this doesn't load the data into memory, because PIL loads it lazily
+ width, height = Image.open(dataset.images[i]).size
+ aspect_ratio = float(width) / float(height)
+ aspect_ratios.append(aspect_ratio)
+ return aspect_ratios
+
+
+def _compute_aspect_ratios_subset_dataset(dataset, indices=None):
+ if indices is None:
+ indices = range(len(dataset))
+
+ ds_indices = [dataset.indices[i] for i in indices]
+ return compute_aspect_ratios(dataset.dataset, ds_indices)
+
+
+def compute_aspect_ratios(dataset, indices=None):
+ if hasattr(dataset, "get_height_and_width"):
+ return _compute_aspect_ratios_custom_dataset(dataset, indices)
+
+ if isinstance(dataset, torchvision.datasets.CocoDetection):
+ return _compute_aspect_ratios_coco_dataset(dataset, indices)
+
+ if isinstance(dataset, torchvision.datasets.VOCDetection):
+ return _compute_aspect_ratios_voc_dataset(dataset, indices)
+
+ if isinstance(dataset, torch.utils.data.Subset):
+ return _compute_aspect_ratios_subset_dataset(dataset, indices)
+
+ # slow path
+ return _compute_aspect_ratios_slow(dataset, indices)
+
+
+def _quantize(x, bins):
+ bins = copy.deepcopy(bins)
+ bins = sorted(bins)
+ quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
+ return quantized
+
+
+def create_aspect_ratio_groups(dataset, k=0):
+ aspect_ratios = compute_aspect_ratios(dataset)
+ bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0]
+ groups = _quantize(aspect_ratios, bins)
+ # count number of elements per group
+ counts = np.unique(groups, return_counts=True)[1]
+ fbins = [0] + bins + [np.inf]
+ print(f"Using {fbins} as bins for aspect ratio quantization")
+ print(f"Count of instances per bin: {counts}")
+ return groups
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/image_list.py b/PyTorch/contrib/cv/classification/SSDLite320/image_list.py
new file mode 100644
index 0000000000..583866557e
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/image_list.py
@@ -0,0 +1,25 @@
+from typing import List, Tuple
+
+import torch
+from torch import Tensor
+
+
+class ImageList:
+ """
+ Structure that holds a list of images (of possibly
+ varying sizes) as a single tensor.
+ This works by padding the images to the same size,
+ and storing in a field the original sizes of each image
+
+ Args:
+ tensors (tensor): Tensor containing images.
+ image_sizes (list[tuple[int, int]]): List of Tuples each containing size of images.
+ """
+
+ def __init__(self, tensors: Tensor, image_sizes: List[Tuple[int, int]]) -> None:
+ self.tensors = tensors
+ self.image_sizes = image_sizes
+
+ def to(self, device: torch.device) -> "ImageList":
+ cast_tensor = self.tensors.to(device)
+ return ImageList(cast_tensor, self.image_sizes)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/presets.py b/PyTorch/contrib/cv/classification/SSDLite320/presets.py
new file mode 100644
index 0000000000..88d8c697d2
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/presets.py
@@ -0,0 +1,47 @@
+import torch
+import transforms as T
+
+
+class DetectionPresetTrain:
+ def __init__(self, data_augmentation, hflip_prob=0.5, mean=(123.0, 117.0, 104.0)):
+ if data_augmentation == "hflip":
+ self.transforms = T.Compose(
+ [
+ T.RandomHorizontalFlip(p=hflip_prob),
+ T.PILToTensor(),
+ T.ConvertImageDtype(torch.float),
+ ]
+ )
+ elif data_augmentation == "ssd":
+ self.transforms = T.Compose(
+ [
+ T.RandomPhotometricDistort(),
+ T.RandomZoomOut(fill=list(mean)),
+ T.RandomIoUCrop(),
+ T.RandomHorizontalFlip(p=hflip_prob),
+ T.PILToTensor(),
+ T.ConvertImageDtype(torch.float),
+ ]
+ )
+ elif data_augmentation == "ssdlite":
+ self.transforms = T.Compose(
+ [
+ T.RandomIoUCrop(),
+ T.RandomHorizontalFlip(p=hflip_prob),
+ T.PILToTensor(),
+ T.ConvertImageDtype(torch.float),
+ ]
+ )
+ else:
+ raise ValueError(f'Unknown data augmentation policy "{data_augmentation}"')
+
+ def __call__(self, img, target):
+ return self.transforms(img, target)
+
+
+class DetectionPresetEval:
+ def __init__(self):
+ self.transforms = T.ToTensor()
+
+ def __call__(self, img, target):
+ return self.transforms(img, target)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/requirements.txt b/PyTorch/contrib/cv/classification/SSDLite320/requirements.txt
new file mode 100644
index 0000000000..e0311aad69
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/requirements.txt
@@ -0,0 +1,5 @@
+cython
+matplotlib
+pycocotools
+sympy
+decorator
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/env_npu.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/env_npu.sh
new file mode 100644
index 0000000000..1950129888
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/env_npu.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+export install_path=/usr/local/Ascend
+
+if [ -d ${install_path}/toolkit ]; then
+ export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64/:/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH}
+ export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH
+ export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH
+ export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH
+ export ASCEND_OPP_PATH=${install_path}/opp
+else
+ if [ -d ${install_path}/nnae/latest ];then
+ export LD_LIBRARY_PATH=${install_path}/nnae/latest/fwkacllib/lib64/:/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH
+ export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/
+ export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/
+ export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
+ export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
+ export ASCEND_AICPU_PATH=${install_path}/nnae/latest
+ else
+ export LD_LIBRARY_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH
+ export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
+ export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/
+ export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
+ export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/te:$PYTHONPATH
+ export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest
+ fi
+fi
+
+${install_path}/driver/tools/msnpureport -g error -d 0
+${install_path}/driver/tools/msnpureport -g error -d 1
+${install_path}/driver/tools/msnpureport -g error -d 2
+${install_path}/driver/tools/msnpureport -g error -d 3
+${install_path}/driver/tools/msnpureport -g error -d 4
+${install_path}/driver/tools/msnpureport -g error -d 5
+${install_path}/driver/tools/msnpureport -g error -d 6
+${install_path}/driver/tools/msnpureport -g error -d 7
+
+#将Host日志输出到串口,0-关闭/1-开启
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+#设置默认日志级别,0-debug/1-info/2-warning/3-error
+export ASCEND_GLOBAL_LOG_LEVEL=3
+#设置Event日志开启标志,0-关闭/1-开启
+export ASCEND_GLOBAL_EVENT_ENABLE=0
+#设置是否开启taskque,0-关闭/1-开启
+export TASK_QUEUE_ENABLE=1
+#设置是否开启PTCopy,0-关闭/1-开启
+export PTCOPY_ENABLE=1
+#设置是否开启combined标志,0-关闭/1-开启
+export COMBINED_ENABLE=1
+#设置特殊场景是否需要重新编译,不需要修改
+export DYNAMIC_OP="ADD#MUL"
+#HCCL白名单开关,1-关闭/0-开启
+export HCCL_WHITELIST_DISABLE=1
+
+ulimit -SHn 512000
+
+path_lib=$(python3.7 -c """
+import sys
+import re
+result=''
+for index in range(len(sys.path)):
+ match_sit = re.search('-packages', sys.path[index])
+ if match_sit is not None:
+ match_lib = re.search('lib', sys.path[index])
+
+ if match_lib is not None:
+ end=match_lib.span()[1]
+ result += sys.path[index][0:end] + ':'
+
+ result+=sys.path[index] + '/torch/lib:'
+print(result)"""
+)
+
+echo ${path_lib}
+
+export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh
new file mode 100644
index 0000000000..159f319279
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh
@@ -0,0 +1,103 @@
+################基础配置参数,需要模型审视修改##################
+Network="SSDLite320"
+# 训练使用的npu卡数
+export RANK_SIZE=1
+# 数据集路径,保持为空,不需要修改
+data_path="/opt/npu/dataset/coco/"
+batch_size=24
+ASCEND_DEVICE_ID=0
+# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值
+for para in $*
+do
+ if [[ $para == --device_id* ]];then
+ device_id=`echo ${para#*=}`
+ elif [[ $para == --data_path* ]];then
+ data_path=`echo ${para#*=}`
+ elif [[ $para == --model_path* ]];then
+ model_path=`echo ${para#*=}`
+ fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+ echo "[Error] para \"data_path\" must be confing"
+ exit 1
+fi
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+ test_path_dir=${cur_path}
+ cd ..
+ cur_path=`pwd`
+else
+ test_path_dir=${cur_path}/test
+fi
+
+#################创建日志输出目录,不需要修改#################
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+ rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+ mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+ mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+#################启动训练脚本#################
+# 训练开始时间,不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+source ${test_path_dir}/env_npu.sh
+
+python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.py\
+ --dataset coco --data-path $data_path\
+ --model ssdlite320_mobilenet_v3_large\
+ --epochs 600\
+ --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\
+ --lr 0.15 --batch-size 24 --test-only\
+ --weight-decay 0.00004 --data-augmentation ssdlite > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_1p.log 2>&1 &
+
+wait
+
+##################获取训练数据################
+# 训练结束时间,不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+# 结果打印,不需要修改
+echo "------------------ Final result ------------------"
+# 输出性能FPS,需要模型审视修改
+step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'`
+# 打印,不需要修改
+echo "Final Performance images/sec : $FPS"
+
+# 输出训练精度,需要模型审视修改
+train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'
+# 打印,不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+# 性能看护结果汇总
+# 训练用例信息,不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+# 获取性能数据,不需要修改
+# 吞吐量
+ActualFPS=${FPS}
+# 单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+# 关键信息打印到${CaseName}.log中,不需要修改
+echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh
new file mode 100644
index 0000000000..7dafc83788
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh
@@ -0,0 +1,101 @@
+################基础配置参数,需要模型审视修改##################
+Network="SSDLite320"
+# 训练使用的npu卡数
+export RANK_SIZE=1
+# 数据集路径,保持为空,不需要修改
+data_path="/opt/npu/dataset/coco/"
+batch_size=24
+ASCEND_DEVICE_ID=0
+# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值
+for para in $*
+do
+ if [[ $para == --device_id* ]];then
+ device_id=`echo ${para#*=}`
+ elif [[ $para == --data_path* ]];then
+ data_path=`echo ${para#*=}`
+ fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+ echo "[Error] para \"data_path\" must be confing"
+ exit 1
+fi
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+ test_path_dir=${cur_path}
+ cd ..
+ cur_path=`pwd`
+else
+ test_path_dir=${cur_path}/test
+fi
+
+#################创建日志输出目录,不需要修改#################
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+ rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+ mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+ mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+#################启动训练脚本#################
+# 训练开始时间,不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+source ${test_path_dir}/env_npu.sh
+
+python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.py\
+ --dataset coco --data-path $data_path\
+ --model ssdlite320_mobilenet_v3_large\
+ --epochs 600\
+ --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\
+ --lr 0.15 --batch-size 24\
+ --weight-decay 0.00004 --data-augmentation ssdlite --world-size 8 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log 2>&1 &
+
+wait
+
+##################获取训练数据################
+# 训练结束时间,不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+# 结果打印,不需要修改
+echo "------------------ Final result ------------------"
+# 输出性能FPS,需要模型审视修改
+step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'`
+# 打印,不需要修改
+echo "Final Performance images/sec : $FPS"
+
+# 输出训练精度,需要模型审视修改
+train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'
+# 打印,不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+# 性能看护结果汇总
+# 训练用例信息,不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+# 获取性能数据,不需要修改
+# 吞吐量
+ActualFPS=${FPS}
+# 单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+# 关键信息打印到${CaseName}.log中,不需要修改
+echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh
new file mode 100644
index 0000000000..2dc85c74a4
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh
@@ -0,0 +1,101 @@
+################基础配置参数,需要模型审视修改##################
+Network="SSDLite320"
+# 训练使用的npu卡数
+export RANK_SIZE=8
+# 数据集路径,保持为空,不需要修改
+data_path="/opt/npu/dataset/coco/"
+batch_size=24
+ASCEND_DEVICE_ID=0
+# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值
+for para in $*
+do
+ if [[ $para == --device_id* ]];then
+ device_id=`echo ${para#*=}`
+ elif [[ $para == --data_path* ]];then
+ data_path=`echo ${para#*=}`
+ fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+ echo "[Error] para \"data_path\" must be confing"
+ exit 1
+fi
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+ test_path_dir=${cur_path}
+ cd ..
+ cur_path=`pwd`
+else
+ test_path_dir=${cur_path}/test
+fi
+
+#################创建日志输出目录,不需要修改#################
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+ rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+ mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+ mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+#################启动训练脚本#################
+# 训练开始时间,不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+source ${test_path_dir}/env_npu.sh
+
+python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.py\
+ --dataset coco --data-path $data_path\
+ --model ssdlite320_mobilenet_v3_large\
+ --epochs 600\
+ --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\
+ --lr 0.15 --batch-size 24\
+ --weight-decay 0.00004 --data-augmentation ssdlite --world-size 8 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log 2>&1 &
+
+wait
+
+##################获取训练数据################
+# 训练结束时间,不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+# 结果打印,不需要修改
+echo "------------------ Final result ------------------"
+# 输出性能FPS,需要模型审视修改
+step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*8/'${step_time}'}'`
+# 打印,不需要修改
+echo "Final Performance images/sec : $FPS"
+
+# 输出训练精度,需要模型审视修改
+train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'
+# 打印,不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+# 性能看护结果汇总
+# 训练用例信息,不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+# 获取性能数据,不需要修改
+# 吞吐量
+ActualFPS=${FPS}
+# 单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+# 关键信息打印到${CaseName}.log中,不需要修改
+echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh
new file mode 100644
index 0000000000..35d9a5c924
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh
@@ -0,0 +1,101 @@
+################基础配置参数,需要模型审视修改##################
+Network="SSDLite320"
+# 训练使用的npu卡数
+export RANK_SIZE=1
+# 数据集路径,保持为空,不需要修改
+data_path="/opt/npu/dataset/coco/"
+batch_size=24
+ASCEND_DEVICE_ID=0
+# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值
+for para in $*
+do
+ if [[ $para == --device_id* ]];then
+ device_id=`echo ${para#*=}`
+ elif [[ $para == --data_path* ]];then
+ data_path=`echo ${para#*=}`
+ fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+ echo "[Error] para \"data_path\" must be confing"
+ exit 1
+fi
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+ test_path_dir=${cur_path}
+ cd ..
+ cur_path=`pwd`
+else
+ test_path_dir=${cur_path}/test
+fi
+
+#################创建日志输出目录,不需要修改#################
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+ rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+ mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+ mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+#################启动训练脚本#################
+# 训练开始时间,不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+source ${test_path_dir}/env_npu.sh
+
+python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.py\
+ --dataset coco --data-path $data_path\
+ --model ssdlite320_mobilenet_v3_large\
+ --epochs 2\
+ --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\
+ --lr 0.15 --batch-size 24\
+ --weight-decay 0.00004 --data-augmentation ssdlite > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log 2>&1 &
+
+wait
+
+##################获取训练数据################
+# 训练结束时间,不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+# 结果打印,不需要修改
+echo "------------------ Final result ------------------"
+# 输出性能FPS,需要模型审视修改
+step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'`
+# 打印,不需要修改
+echo "Final Performance images/sec : $FPS"
+
+# 输出训练精度,需要模型审视修改
+train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'
+# 打印,不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+# 性能看护结果汇总
+# 训练用例信息,不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+# 获取性能数据,不需要修改
+# 吞吐量
+ActualFPS=${FPS}
+# 单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+# 关键信息打印到${CaseName}.log中,不需要修改
+echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh
new file mode 100644
index 0000000000..90656c3937
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh
@@ -0,0 +1,101 @@
+################基础配置参数,需要模型审视修改##################
+Network="SSDLite320"
+# 训练使用的npu卡数
+export RANK_SIZE=8
+# 数据集路径,保持为空,不需要修改
+data_path="/opt/npu/dataset/coco/"
+batch_size=24
+ASCEND_DEVICE_ID=0
+# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值
+for para in $*
+do
+ if [[ $para == --device_id* ]];then
+ device_id=`echo ${para#*=}`
+ elif [[ $para == --data_path* ]];then
+ data_path=`echo ${para#*=}`
+ fi
+done
+
+# 校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+ echo "[Error] para \"data_path\" must be confing"
+ exit 1
+fi
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+ test_path_dir=${cur_path}
+ cd ..
+ cur_path=`pwd`
+else
+ test_path_dir=${cur_path}/test
+fi
+
+#################创建日志输出目录,不需要修改#################
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+ rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID}
+ mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+else
+ mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID
+fi
+
+#################启动训练脚本#################
+# 训练开始时间,不需要修改
+start_time=$(date +%s)
+# 非平台场景时source 环境变量
+source ${test_path_dir}/env_npu.sh
+
+python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.py\
+ --dataset coco --data-path $data_path\
+ --model ssdlite320_mobilenet_v3_large\
+ --epochs 2\
+ --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\
+ --lr 0.15 --batch-size 24\
+ --weight-decay 0.00004 --data-augmentation ssdlite --world-size 8 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log 2>&1 &
+
+wait
+
+##################获取训练数据################
+# 训练结束时间,不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+
+# 结果打印,不需要修改
+echo "------------------ Final result ------------------"
+# 输出性能FPS,需要模型审视修改
+step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'`
+# 打印,不需要修改
+echo "Final Performance images/sec : $FPS"
+
+# 输出训练精度,需要模型审视修改
+train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'
+# 打印,不需要修改
+echo "Final Train Accuracy : ${train_accuracy}"
+echo "E2E Training Duration sec : $e2e_time"
+
+# 性能看护结果汇总
+# 训练用例信息,不需要修改
+BatchSize=${batch_size}
+DeviceType=`uname -m`
+CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc'
+
+# 获取性能数据,不需要修改
+# 吞吐量
+ActualFPS=${FPS}
+# 单迭代训练时长
+TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'`
+
+# 关键信息打印到${CaseName}.log中,不需要修改
+echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/__init__.py
new file mode 100644
index 0000000000..ce8f9784ca
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/__init__.py
@@ -0,0 +1,74 @@
+import warnings
+
+from torchvision import models
+from torchvision import datasets
+from torchvision import ops
+from torchvision import transforms
+from torchvision import utils
+from torchvision import io
+
+from .extension import _HAS_OPS
+import torch
+
+try:
+ from .version import __version__ # noqa: F401
+except ImportError:
+ pass
+
+_image_backend = 'PIL'
+
+_video_backend = "pyav"
+
+
+def set_image_backend(backend):
+ """
+ Specifies the package used to load images.
+
+ Args:
+ backend (string): Name of the image backend. one of {'PIL', 'accimage'}.
+ The :mod:`accimage` package uses the Intel IPP library. It is
+ generally faster than PIL, but does not support as many operations.
+ """
+ global _image_backend
+ if backend not in ['PIL', 'accimage']:
+ raise ValueError("Invalid backend '{}'. Options are 'PIL' and 'accimage'"
+ .format(backend))
+ _image_backend = backend
+
+
+def get_image_backend():
+ """
+ Gets the name of the package used to load images
+ """
+ return _image_backend
+
+
+def set_video_backend(backend):
+ """
+ Specifies the package used to decode videos.
+
+ Args:
+ backend (string): Name of the video backend. one of {'pyav', 'video_reader'}.
+ The :mod:`pyav` package uses the 3rd party PyAv library. It is a Pythonic
+ binding for the FFmpeg libraries.
+ The :mod:`video_reader` package includes a native C++ implementation on
+ top of FFMPEG libraries, and a python API of TorchScript custom operator.
+ It is generally decoding faster than :mod:`pyav`, but perhaps is less robust.
+ """
+ global _video_backend
+ if backend not in ["pyav", "video_reader"]:
+ raise ValueError(
+ "Invalid video backend '%s'. Options are 'pyav' and 'video_reader'" % backend
+ )
+ if backend == "video_reader" and not io._HAS_VIDEO_OPT:
+ warnings.warn("video_reader video backend is not available")
+ else:
+ _video_backend = backend
+
+
+def get_video_backend():
+ return _video_backend
+
+
+def _is_tracing():
+ return torch._C._get_tracing_state()
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py
new file mode 100644
index 0000000000..d147997b0b
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py
@@ -0,0 +1,52 @@
+import importlib.machinery
+import os
+
+
+def _download_file_from_remote_location(fpath: str, url: str) -> None:
+ pass
+
+
+def _is_remote_location_available() -> bool:
+ return False
+
+
+try:
+ from torch.hub import load_state_dict_from_url # noqa: 401
+except ImportError:
+ from torch.utils.model_zoo import load_url as load_state_dict_from_url # noqa: 401
+
+
+def _get_extension_path(lib_name):
+
+ lib_dir = os.path.dirname(__file__)
+ if os.name == "nt":
+ # Register the main torchvision library location on the default DLL path
+ import ctypes
+ import sys
+
+ kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True)
+ with_load_library_flags = hasattr(kernel32, "AddDllDirectory")
+ prev_error_mode = kernel32.SetErrorMode(0x0001)
+
+ if with_load_library_flags:
+ kernel32.AddDllDirectory.restype = ctypes.c_void_p
+
+ if sys.version_info >= (3, 8):
+ os.add_dll_directory(lib_dir)
+ elif with_load_library_flags:
+ res = kernel32.AddDllDirectory(lib_dir)
+ if res is None:
+ err = ctypes.WinError(ctypes.get_last_error())
+ err.strerror += f' Error adding "{lib_dir}" to the DLL directories.'
+ raise err
+
+ kernel32.SetErrorMode(prev_error_mode)
+
+ loader_details = (importlib.machinery.ExtensionFileLoader, importlib.machinery.EXTENSION_SUFFIXES)
+
+ extfinder = importlib.machinery.FileFinder(lib_dir, loader_details)
+ ext_specs = extfinder.find_spec(lib_name)
+ if ext_specs is None:
+ raise ImportError
+
+ return ext_specs.origin
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/__init__.py
new file mode 100644
index 0000000000..db5b572a46
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/__init__.py
@@ -0,0 +1,34 @@
+from .lsun import LSUN, LSUNClass
+from .folder import ImageFolder, DatasetFolder
+from .coco import CocoCaptions, CocoDetection
+from .cifar import CIFAR10, CIFAR100
+from .stl10 import STL10
+from .mnist import MNIST, EMNIST, FashionMNIST, KMNIST, QMNIST
+from .svhn import SVHN
+from .phototour import PhotoTour
+from .fakedata import FakeData
+from .semeion import SEMEION
+from .omniglot import Omniglot
+from .sbu import SBU
+from .flickr import Flickr8k, Flickr30k
+from .voc import VOCSegmentation, VOCDetection
+from .cityscapes import Cityscapes
+from .imagenet import ImageNet
+from .caltech import Caltech101, Caltech256
+from .celeba import CelebA
+from .sbd import SBDataset
+from .vision import VisionDataset
+from .usps import USPS
+from .kinetics import Kinetics400
+from .hmdb51 import HMDB51
+from .ucf101 import UCF101
+
+__all__ = ('LSUN', 'LSUNClass',
+ 'ImageFolder', 'DatasetFolder', 'FakeData',
+ 'CocoCaptions', 'CocoDetection',
+ 'CIFAR10', 'CIFAR100', 'EMNIST', 'FashionMNIST', 'QMNIST',
+ 'MNIST', 'KMNIST', 'STL10', 'SVHN', 'PhotoTour', 'SEMEION',
+ 'Omniglot', 'SBU', 'Flickr8k', 'Flickr30k',
+ 'VOCSegmentation', 'VOCDetection', 'Cityscapes', 'ImageNet',
+ 'Caltech101', 'Caltech256', 'CelebA', 'SBDataset', 'VisionDataset',
+ 'USPS', 'Kinetics400', 'HMDB51', 'UCF101')
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py
new file mode 100644
index 0000000000..09ec1c3d7f
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py
@@ -0,0 +1,206 @@
+from PIL import Image
+import os
+import os.path
+
+from .vision import VisionDataset
+from .utils import download_and_extract_archive, verify_str_arg
+
+
+class Caltech101(VisionDataset):
+ """`Caltech 101 `_ Dataset.
+
+ .. warning::
+
+ This class needs `scipy `_ to load target files from `.mat` format.
+
+ Args:
+ root (string): Root directory of dataset where directory
+ ``caltech101`` exists or will be saved to if download is set to True.
+ target_type (string or list, optional): Type of target to use, ``category`` or
+ ``annotation``. Can also be a list to output a tuple with all specified target types.
+ ``category`` represents the target class, and ``annotation`` is a list of points
+ from a hand-generated outline. Defaults to ``category``.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+ """
+
+ def __init__(self, root, target_type="category", transform=None,
+ target_transform=None, download=False):
+ super(Caltech101, self).__init__(os.path.join(root, 'caltech101'),
+ transform=transform,
+ target_transform=target_transform)
+ os.makedirs(self.root, exist_ok=True)
+ if not isinstance(target_type, list):
+ target_type = [target_type]
+ self.target_type = [verify_str_arg(t, "target_type", ("category", "annotation"))
+ for t in target_type]
+
+ if download:
+ self.download()
+
+ if not self._check_integrity():
+ raise RuntimeError('Dataset not found or corrupted.' +
+ ' You can use download=True to download it')
+
+ self.categories = sorted(os.listdir(os.path.join(self.root, "101_ObjectCategories")))
+ self.categories.remove("BACKGROUND_Google") # this is not a real class
+
+ # For some reason, the category names in "101_ObjectCategories" and
+ # "Annotations" do not always match. This is a manual map between the
+ # two. Defaults to using same name, since most names are fine.
+ name_map = {"Faces": "Faces_2",
+ "Faces_easy": "Faces_3",
+ "Motorbikes": "Motorbikes_16",
+ "airplanes": "Airplanes_Side_2"}
+ self.annotation_categories = list(map(lambda x: name_map[x] if x in name_map else x, self.categories))
+
+ self.index = []
+ self.y = []
+ for (i, c) in enumerate(self.categories):
+ n = len(os.listdir(os.path.join(self.root, "101_ObjectCategories", c)))
+ self.index.extend(range(1, n + 1))
+ self.y.extend(n * [i])
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (image, target) where the type of target specified by target_type.
+ """
+ import scipy.io
+
+ img = Image.open(os.path.join(self.root,
+ "101_ObjectCategories",
+ self.categories[self.y[index]],
+ "image_{:04d}.jpg".format(self.index[index])))
+
+ target = []
+ for t in self.target_type:
+ if t == "category":
+ target.append(self.y[index])
+ elif t == "annotation":
+ data = scipy.io.loadmat(os.path.join(self.root,
+ "Annotations",
+ self.annotation_categories[self.y[index]],
+ "annotation_{:04d}.mat".format(self.index[index])))
+ target.append(data["obj_contour"])
+ target = tuple(target) if len(target) > 1 else target[0]
+
+ if self.transform is not None:
+ img = self.transform(img)
+
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return img, target
+
+ def _check_integrity(self):
+ # can be more robust and check hash of files
+ return os.path.exists(os.path.join(self.root, "101_ObjectCategories"))
+
+ def __len__(self):
+ return len(self.index)
+
+ def download(self):
+ if self._check_integrity():
+ print('Files already downloaded and verified')
+ return
+
+ download_and_extract_archive(
+ "http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz",
+ self.root,
+ filename="101_ObjectCategories.tar.gz",
+ md5="b224c7392d521a49829488ab0f1120d9")
+ download_and_extract_archive(
+ "http://www.vision.caltech.edu/Image_Datasets/Caltech101/Annotations.tar",
+ self.root,
+ filename="101_Annotations.tar",
+ md5="6f83eeb1f24d99cab4eb377263132c91")
+
+ def extra_repr(self):
+ return "Target type: {target_type}".format(**self.__dict__)
+
+
+class Caltech256(VisionDataset):
+ """`Caltech 256 `_ Dataset.
+
+ Args:
+ root (string): Root directory of dataset where directory
+ ``caltech256`` exists or will be saved to if download is set to True.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+ """
+
+ def __init__(self, root, transform=None, target_transform=None, download=False):
+ super(Caltech256, self).__init__(os.path.join(root, 'caltech256'),
+ transform=transform,
+ target_transform=target_transform)
+ os.makedirs(self.root, exist_ok=True)
+
+ if download:
+ self.download()
+
+ if not self._check_integrity():
+ raise RuntimeError('Dataset not found or corrupted.' +
+ ' You can use download=True to download it')
+
+ self.categories = sorted(os.listdir(os.path.join(self.root, "256_ObjectCategories")))
+ self.index = []
+ self.y = []
+ for (i, c) in enumerate(self.categories):
+ n = len(os.listdir(os.path.join(self.root, "256_ObjectCategories", c)))
+ self.index.extend(range(1, n + 1))
+ self.y.extend(n * [i])
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (image, target) where target is index of the target class.
+ """
+ img = Image.open(os.path.join(self.root,
+ "256_ObjectCategories",
+ self.categories[self.y[index]],
+ "{:03d}_{:04d}.jpg".format(self.y[index] + 1, self.index[index])))
+
+ target = self.y[index]
+
+ if self.transform is not None:
+ img = self.transform(img)
+
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return img, target
+
+ def _check_integrity(self):
+ # can be more robust and check hash of files
+ return os.path.exists(os.path.join(self.root, "256_ObjectCategories"))
+
+ def __len__(self):
+ return len(self.index)
+
+ def download(self):
+ if self._check_integrity():
+ print('Files already downloaded and verified')
+ return
+
+ download_and_extract_archive(
+ "http://www.vision.caltech.edu/Image_Datasets/Caltech256/256_ObjectCategories.tar",
+ self.root,
+ filename="256_ObjectCategories.tar",
+ md5="67b4f42ca05d46448c6bb8ecd2220f6d")
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py
new file mode 100644
index 0000000000..71af65ed11
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py
@@ -0,0 +1,158 @@
+from functools import partial
+import torch
+import os
+import PIL
+from .vision import VisionDataset
+from .utils import download_file_from_google_drive, check_integrity, verify_str_arg
+
+
+class CelebA(VisionDataset):
+ """`Large-scale CelebFaces Attributes (CelebA) Dataset `_ Dataset.
+
+ Args:
+ root (string): Root directory where images are downloaded to.
+ split (string): One of {'train', 'valid', 'test', 'all'}.
+ Accordingly dataset is selected.
+ target_type (string or list, optional): Type of target to use, ``attr``, ``identity``, ``bbox``,
+ or ``landmarks``. Can also be a list to output a tuple with all specified target types.
+ The targets represent:
+ ``attr`` (np.array shape=(40,) dtype=int): binary (0, 1) labels for attributes
+ ``identity`` (int): label for each person (data points with the same identity are the same person)
+ ``bbox`` (np.array shape=(4,) dtype=int): bounding box (x, y, width, height)
+ ``landmarks`` (np.array shape=(10,) dtype=int): landmark points (lefteye_x, lefteye_y, righteye_x,
+ righteye_y, nose_x, nose_y, leftmouth_x, leftmouth_y, rightmouth_x, rightmouth_y)
+ Defaults to ``attr``. If empty, ``None`` will be returned as target.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.ToTensor``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+ """
+
+ base_folder = "celeba"
+ # There currently does not appear to be a easy way to extract 7z in python (without introducing additional
+ # dependencies). The "in-the-wild" (not aligned+cropped) images are only in 7z, so they are not available
+ # right now.
+ file_list = [
+ # File ID MD5 Hash Filename
+ ("0B7EVK8r0v71pZjFTYXZWM3FlRnM", "00d2c5bc6d35e252742224ab0c1e8fcb", "img_align_celeba.zip"),
+ # ("0B7EVK8r0v71pbWNEUjJKdDQ3dGc", "b6cd7e93bc7a96c2dc33f819aa3ac651", "img_align_celeba_png.7z"),
+ # ("0B7EVK8r0v71peklHb0pGdDl6R28", "b6cd7e93bc7a96c2dc33f819aa3ac651", "img_celeba.7z"),
+ ("0B7EVK8r0v71pblRyaVFSWGxPY0U", "75e246fa4810816ffd6ee81facbd244c", "list_attr_celeba.txt"),
+ ("1_ee_0u7vcNLOfNLegJRHmolfH5ICW-XS", "32bd1bd63d3c78cd57e08160ec5ed1e2", "identity_CelebA.txt"),
+ ("0B7EVK8r0v71pbThiMVRxWXZ4dU0", "00566efa6fedff7a56946cd1c10f1c16", "list_bbox_celeba.txt"),
+ ("0B7EVK8r0v71pd0FJY3Blby1HUTQ", "cc24ecafdb5b50baae59b03474781f8c", "list_landmarks_align_celeba.txt"),
+ # ("0B7EVK8r0v71pTzJIdlJWdHczRlU", "063ee6ddb681f96bc9ca28c6febb9d1a", "list_landmarks_celeba.txt"),
+ ("0B7EVK8r0v71pY0NSMzRuSXJEVkk", "d32c9cbf5e040fd4025c592c306e6668", "list_eval_partition.txt"),
+ ]
+
+ def __init__(self, root, split="train", target_type="attr", transform=None,
+ target_transform=None, download=False):
+ import pandas
+ super(CelebA, self).__init__(root, transform=transform,
+ target_transform=target_transform)
+ self.split = split
+ if isinstance(target_type, list):
+ self.target_type = target_type
+ else:
+ self.target_type = [target_type]
+
+ if not self.target_type and self.target_transform is not None:
+ raise RuntimeError('target_transform is specified but target_type is empty')
+
+ if download:
+ self.download()
+
+ if not self._check_integrity():
+ raise RuntimeError('Dataset not found or corrupted.' +
+ ' You can use download=True to download it')
+
+ split_map = {
+ "train": 0,
+ "valid": 1,
+ "test": 2,
+ "all": None,
+ }
+ split = split_map[verify_str_arg(split.lower(), "split",
+ ("train", "valid", "test", "all"))]
+
+ fn = partial(os.path.join, self.root, self.base_folder)
+ splits = pandas.read_csv(fn("list_eval_partition.txt"), delim_whitespace=True, header=None, index_col=0)
+ identity = pandas.read_csv(fn("identity_CelebA.txt"), delim_whitespace=True, header=None, index_col=0)
+ bbox = pandas.read_csv(fn("list_bbox_celeba.txt"), delim_whitespace=True, header=1, index_col=0)
+ landmarks_align = pandas.read_csv(fn("list_landmarks_align_celeba.txt"), delim_whitespace=True, header=1)
+ attr = pandas.read_csv(fn("list_attr_celeba.txt"), delim_whitespace=True, header=1)
+
+ mask = slice(None) if split is None else (splits[1] == split)
+
+ self.filename = splits[mask].index.values
+ self.identity = torch.as_tensor(identity[mask].values)
+ self.bbox = torch.as_tensor(bbox[mask].values)
+ self.landmarks_align = torch.as_tensor(landmarks_align[mask].values)
+ self.attr = torch.as_tensor(attr[mask].values)
+ self.attr = (self.attr + 1) // 2 # map from {-1, 1} to {0, 1}
+ self.attr_names = list(attr.columns)
+
+ def _check_integrity(self):
+ for (_, md5, filename) in self.file_list:
+ fpath = os.path.join(self.root, self.base_folder, filename)
+ _, ext = os.path.splitext(filename)
+ # Allow original archive to be deleted (zip and 7z)
+ # Only need the extracted images
+ if ext not in [".zip", ".7z"] and not check_integrity(fpath, md5):
+ return False
+
+ # Should check a hash of the images
+ return os.path.isdir(os.path.join(self.root, self.base_folder, "img_align_celeba"))
+
+ def download(self):
+ import zipfile
+
+ if self._check_integrity():
+ print('Files already downloaded and verified')
+ return
+
+ for (file_id, md5, filename) in self.file_list:
+ download_file_from_google_drive(file_id, os.path.join(self.root, self.base_folder), filename, md5)
+
+ with zipfile.ZipFile(os.path.join(self.root, self.base_folder, "img_align_celeba.zip"), "r") as f:
+ f.extractall(os.path.join(self.root, self.base_folder))
+
+ def __getitem__(self, index):
+ X = PIL.Image.open(os.path.join(self.root, self.base_folder, "img_align_celeba", self.filename[index]))
+
+ target = []
+ for t in self.target_type:
+ if t == "attr":
+ target.append(self.attr[index, :])
+ elif t == "identity":
+ target.append(self.identity[index, 0])
+ elif t == "bbox":
+ target.append(self.bbox[index, :])
+ elif t == "landmarks":
+ target.append(self.landmarks_align[index, :])
+ else:
+ # TODO: refactor with utils.verify_str_arg
+ raise ValueError("Target type \"{}\" is not recognized.".format(t))
+
+ if self.transform is not None:
+ X = self.transform(X)
+
+ if target:
+ target = tuple(target) if len(target) > 1 else target[0]
+
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+ else:
+ target = None
+
+ return X, target
+
+ def __len__(self):
+ return len(self.attr)
+
+ def extra_repr(self):
+ lines = ["Target type: {target_type}", "Split: {split}"]
+ return '\n'.join(lines).format(**self.__dict__)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py
new file mode 100644
index 0000000000..127c085cfb
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py
@@ -0,0 +1,162 @@
+from PIL import Image
+import os
+import os.path
+import numpy as np
+import pickle
+
+from .vision import VisionDataset
+from .utils import check_integrity, download_and_extract_archive
+
+
+class CIFAR10(VisionDataset):
+ """`CIFAR10 `_ Dataset.
+
+ Args:
+ root (string): Root directory of dataset where directory
+ ``cifar-10-batches-py`` exists or will be saved to if download is set to True.
+ train (bool, optional): If True, creates dataset from training set, otherwise
+ creates from test set.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+
+ """
+ base_folder = 'cifar-10-batches-py'
+ url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
+ filename = "cifar-10-python.tar.gz"
+ tgz_md5 = 'c58f30108f718f92721af3b95e74349a'
+ train_list = [
+ ['data_batch_1', 'c99cafc152244af753f735de768cd75f'],
+ ['data_batch_2', 'd4bba439e000b95fd0a9bffe97cbabec'],
+ ['data_batch_3', '54ebc095f3ab1f0389bbae665268c751'],
+ ['data_batch_4', '634d18415352ddfa80567beed471001a'],
+ ['data_batch_5', '482c414d41f54cd18b22e5b47cb7c3cb'],
+ ]
+
+ test_list = [
+ ['test_batch', '40351d587109b95175f43aff81a1287e'],
+ ]
+ meta = {
+ 'filename': 'batches.meta',
+ 'key': 'label_names',
+ 'md5': '5ff9c542aee3614f3951f8cda6e48888',
+ }
+
+ def __init__(self, root, train=True, transform=None, target_transform=None,
+ download=False):
+
+ super(CIFAR10, self).__init__(root, transform=transform,
+ target_transform=target_transform)
+
+ self.train = train # training set or test set
+
+ if download:
+ self.download()
+
+ if not self._check_integrity():
+ raise RuntimeError('Dataset not found or corrupted.' +
+ ' You can use download=True to download it')
+
+ if self.train:
+ downloaded_list = self.train_list
+ else:
+ downloaded_list = self.test_list
+
+ self.data = []
+ self.targets = []
+
+ # now load the picked numpy arrays
+ for file_name, checksum in downloaded_list:
+ file_path = os.path.join(self.root, self.base_folder, file_name)
+ with open(file_path, 'rb') as f:
+ entry = pickle.load(f, encoding='latin1')
+ self.data.append(entry['data'])
+ if 'labels' in entry:
+ self.targets.extend(entry['labels'])
+ else:
+ self.targets.extend(entry['fine_labels'])
+
+ self.data = np.vstack(self.data).reshape(-1, 3, 32, 32)
+ self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC
+
+ self._load_meta()
+
+ def _load_meta(self):
+ path = os.path.join(self.root, self.base_folder, self.meta['filename'])
+ if not check_integrity(path, self.meta['md5']):
+ raise RuntimeError('Dataset metadata file not found or corrupted.' +
+ ' You can use download=True to download it')
+ with open(path, 'rb') as infile:
+ data = pickle.load(infile, encoding='latin1')
+ self.classes = data[self.meta['key']]
+ self.class_to_idx = {_class: i for i, _class in enumerate(self.classes)}
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (image, target) where target is index of the target class.
+ """
+ img, target = self.data[index], self.targets[index]
+
+ # doing this so that it is consistent with all other datasets
+ # to return a PIL Image
+ img = Image.fromarray(img)
+
+ if self.transform is not None:
+ img = self.transform(img)
+
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return img, target
+
+ def __len__(self):
+ return len(self.data)
+
+ def _check_integrity(self):
+ root = self.root
+ for fentry in (self.train_list + self.test_list):
+ filename, md5 = fentry[0], fentry[1]
+ fpath = os.path.join(root, self.base_folder, filename)
+ if not check_integrity(fpath, md5):
+ return False
+ return True
+
+ def download(self):
+ if self._check_integrity():
+ print('Files already downloaded and verified')
+ return
+ download_and_extract_archive(self.url, self.root, filename=self.filename, md5=self.tgz_md5)
+
+ def extra_repr(self):
+ return "Split: {}".format("Train" if self.train is True else "Test")
+
+
+class CIFAR100(CIFAR10):
+ """`CIFAR100 `_ Dataset.
+
+ This is a subclass of the `CIFAR10` Dataset.
+ """
+ base_folder = 'cifar-100-python'
+ url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
+ filename = "cifar-100-python.tar.gz"
+ tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85'
+ train_list = [
+ ['train', '16019d7e3df5f24257cddd939b257f8d'],
+ ]
+
+ test_list = [
+ ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'],
+ ]
+ meta = {
+ 'filename': 'meta',
+ 'key': 'fine_label_names',
+ 'md5': '7973b15100ade9c7d40fb424638fde48',
+ }
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cityscapes.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cityscapes.py
new file mode 100644
index 0000000000..6e92361f50
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cityscapes.py
@@ -0,0 +1,207 @@
+import json
+import os
+from collections import namedtuple
+import zipfile
+
+from .utils import extract_archive, verify_str_arg, iterable_to_str
+from .vision import VisionDataset
+from PIL import Image
+
+
+class Cityscapes(VisionDataset):
+ """`Cityscapes `_ Dataset.
+
+ Args:
+ root (string): Root directory of dataset where directory ``leftImg8bit``
+ and ``gtFine`` or ``gtCoarse`` are located.
+ split (string, optional): The image split to use, ``train``, ``test`` or ``val`` if mode="fine"
+ otherwise ``train``, ``train_extra`` or ``val``
+ mode (string, optional): The quality mode to use, ``fine`` or ``coarse``
+ target_type (string or list, optional): Type of target to use, ``instance``, ``semantic``, ``polygon``
+ or ``color``. Can also be a list to output a tuple with all specified target types.
+ transform (callable, optional): A function/transform that takes in a PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ transforms (callable, optional): A function/transform that takes input sample and its target as entry
+ and returns a transformed version.
+
+ Examples:
+
+ Get semantic segmentation target
+
+ .. code-block:: python
+
+ dataset = Cityscapes('./data/cityscapes', split='train', mode='fine',
+ target_type='semantic')
+
+ img, smnt = dataset[0]
+
+ Get multiple targets
+
+ .. code-block:: python
+
+ dataset = Cityscapes('./data/cityscapes', split='train', mode='fine',
+ target_type=['instance', 'color', 'polygon'])
+
+ img, (inst, col, poly) = dataset[0]
+
+ Validate on the "coarse" set
+
+ .. code-block:: python
+
+ dataset = Cityscapes('./data/cityscapes', split='val', mode='coarse',
+ target_type='semantic')
+
+ img, smnt = dataset[0]
+ """
+
+ # Based on https://github.com/mcordts/cityscapesScripts
+ CityscapesClass = namedtuple('CityscapesClass', ['name', 'id', 'train_id', 'category', 'category_id',
+ 'has_instances', 'ignore_in_eval', 'color'])
+
+ classes = [
+ CityscapesClass('unlabeled', 0, 255, 'void', 0, False, True, (0, 0, 0)),
+ CityscapesClass('ego vehicle', 1, 255, 'void', 0, False, True, (0, 0, 0)),
+ CityscapesClass('rectification border', 2, 255, 'void', 0, False, True, (0, 0, 0)),
+ CityscapesClass('out of roi', 3, 255, 'void', 0, False, True, (0, 0, 0)),
+ CityscapesClass('static', 4, 255, 'void', 0, False, True, (0, 0, 0)),
+ CityscapesClass('dynamic', 5, 255, 'void', 0, False, True, (111, 74, 0)),
+ CityscapesClass('ground', 6, 255, 'void', 0, False, True, (81, 0, 81)),
+ CityscapesClass('road', 7, 0, 'flat', 1, False, False, (128, 64, 128)),
+ CityscapesClass('sidewalk', 8, 1, 'flat', 1, False, False, (244, 35, 232)),
+ CityscapesClass('parking', 9, 255, 'flat', 1, False, True, (250, 170, 160)),
+ CityscapesClass('rail track', 10, 255, 'flat', 1, False, True, (230, 150, 140)),
+ CityscapesClass('building', 11, 2, 'construction', 2, False, False, (70, 70, 70)),
+ CityscapesClass('wall', 12, 3, 'construction', 2, False, False, (102, 102, 156)),
+ CityscapesClass('fence', 13, 4, 'construction', 2, False, False, (190, 153, 153)),
+ CityscapesClass('guard rail', 14, 255, 'construction', 2, False, True, (180, 165, 180)),
+ CityscapesClass('bridge', 15, 255, 'construction', 2, False, True, (150, 100, 100)),
+ CityscapesClass('tunnel', 16, 255, 'construction', 2, False, True, (150, 120, 90)),
+ CityscapesClass('pole', 17, 5, 'object', 3, False, False, (153, 153, 153)),
+ CityscapesClass('polegroup', 18, 255, 'object', 3, False, True, (153, 153, 153)),
+ CityscapesClass('traffic light', 19, 6, 'object', 3, False, False, (250, 170, 30)),
+ CityscapesClass('traffic sign', 20, 7, 'object', 3, False, False, (220, 220, 0)),
+ CityscapesClass('vegetation', 21, 8, 'nature', 4, False, False, (107, 142, 35)),
+ CityscapesClass('terrain', 22, 9, 'nature', 4, False, False, (152, 251, 152)),
+ CityscapesClass('sky', 23, 10, 'sky', 5, False, False, (70, 130, 180)),
+ CityscapesClass('person', 24, 11, 'human', 6, True, False, (220, 20, 60)),
+ CityscapesClass('rider', 25, 12, 'human', 6, True, False, (255, 0, 0)),
+ CityscapesClass('car', 26, 13, 'vehicle', 7, True, False, (0, 0, 142)),
+ CityscapesClass('truck', 27, 14, 'vehicle', 7, True, False, (0, 0, 70)),
+ CityscapesClass('bus', 28, 15, 'vehicle', 7, True, False, (0, 60, 100)),
+ CityscapesClass('caravan', 29, 255, 'vehicle', 7, True, True, (0, 0, 90)),
+ CityscapesClass('trailer', 30, 255, 'vehicle', 7, True, True, (0, 0, 110)),
+ CityscapesClass('train', 31, 16, 'vehicle', 7, True, False, (0, 80, 100)),
+ CityscapesClass('motorcycle', 32, 17, 'vehicle', 7, True, False, (0, 0, 230)),
+ CityscapesClass('bicycle', 33, 18, 'vehicle', 7, True, False, (119, 11, 32)),
+ CityscapesClass('license plate', -1, -1, 'vehicle', 7, False, True, (0, 0, 142)),
+ ]
+
+ def __init__(self, root, split='train', mode='fine', target_type='instance',
+ transform=None, target_transform=None, transforms=None):
+ super(Cityscapes, self).__init__(root, transforms, transform, target_transform)
+ self.mode = 'gtFine' if mode == 'fine' else 'gtCoarse'
+ self.images_dir = os.path.join(self.root, 'leftImg8bit', split)
+ self.targets_dir = os.path.join(self.root, self.mode, split)
+ self.target_type = target_type
+ self.split = split
+ self.images = []
+ self.targets = []
+
+ verify_str_arg(mode, "mode", ("fine", "coarse"))
+ if mode == "fine":
+ valid_modes = ("train", "test", "val")
+ else:
+ valid_modes = ("train", "train_extra", "val")
+ msg = ("Unknown value '{}' for argument split if mode is '{}'. "
+ "Valid values are {{{}}}.")
+ msg = msg.format(split, mode, iterable_to_str(valid_modes))
+ verify_str_arg(split, "split", valid_modes, msg)
+
+ if not isinstance(target_type, list):
+ self.target_type = [target_type]
+ [verify_str_arg(value, "target_type",
+ ("instance", "semantic", "polygon", "color"))
+ for value in self.target_type]
+
+ if not os.path.isdir(self.images_dir) or not os.path.isdir(self.targets_dir):
+
+ if split == 'train_extra':
+ image_dir_zip = os.path.join(self.root, 'leftImg8bit{}'.format('_trainextra.zip'))
+ else:
+ image_dir_zip = os.path.join(self.root, 'leftImg8bit{}'.format('_trainvaltest.zip'))
+
+ if self.mode == 'gtFine':
+ target_dir_zip = os.path.join(self.root, '{}{}'.format(self.mode, '_trainvaltest.zip'))
+ elif self.mode == 'gtCoarse':
+ target_dir_zip = os.path.join(self.root, '{}{}'.format(self.mode, '.zip'))
+
+ if os.path.isfile(image_dir_zip) and os.path.isfile(target_dir_zip):
+ extract_archive(from_path=image_dir_zip, to_path=self.root)
+ extract_archive(from_path=target_dir_zip, to_path=self.root)
+ else:
+ raise RuntimeError('Dataset not found or incomplete. Please make sure all required folders for the'
+ ' specified "split" and "mode" are inside the "root" directory')
+
+ for city in os.listdir(self.images_dir):
+ img_dir = os.path.join(self.images_dir, city)
+ target_dir = os.path.join(self.targets_dir, city)
+ for file_name in os.listdir(img_dir):
+ target_types = []
+ for t in self.target_type:
+ target_name = '{}_{}'.format(file_name.split('_leftImg8bit')[0],
+ self._get_target_suffix(self.mode, t))
+ target_types.append(os.path.join(target_dir, target_name))
+
+ self.images.append(os.path.join(img_dir, file_name))
+ self.targets.append(target_types)
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+ Returns:
+ tuple: (image, target) where target is a tuple of all target types if target_type is a list with more
+ than one item. Otherwise target is a json object if target_type="polygon", else the image segmentation.
+ """
+
+ image = Image.open(self.images[index]).convert('RGB')
+
+ targets = []
+ for i, t in enumerate(self.target_type):
+ if t == 'polygon':
+ target = self._load_json(self.targets[index][i])
+ else:
+ target = Image.open(self.targets[index][i])
+
+ targets.append(target)
+
+ target = tuple(targets) if len(targets) > 1 else targets[0]
+
+ if self.transforms is not None:
+ image, target = self.transforms(image, target)
+
+ return image, target
+
+ def __len__(self):
+ return len(self.images)
+
+ def extra_repr(self):
+ lines = ["Split: {split}", "Mode: {mode}", "Type: {target_type}"]
+ return '\n'.join(lines).format(**self.__dict__)
+
+ def _load_json(self, path):
+ with open(path, 'r') as file:
+ data = json.load(file)
+ return data
+
+ def _get_target_suffix(self, mode, target_type):
+ if target_type == 'instance':
+ return '{}_instanceIds.png'.format(mode)
+ elif target_type == 'semantic':
+ return '{}_labelIds.png'.format(mode)
+ elif target_type == 'color':
+ return '{}_color.png'.format(mode)
+ else:
+ return '{}_polygons.json'.format(mode)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py
new file mode 100644
index 0000000000..9dd3c7adf8
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py
@@ -0,0 +1,123 @@
+from .vision import VisionDataset
+from PIL import Image
+import os
+import os.path
+
+
+class CocoCaptions(VisionDataset):
+ """`MS Coco Captions `_ Dataset.
+
+ Args:
+ root (string): Root directory where images are downloaded to.
+ annFile (string): Path to json annotation file.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.ToTensor``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ transforms (callable, optional): A function/transform that takes input sample and its target as entry
+ and returns a transformed version.
+
+ Example:
+
+ .. code:: python
+
+ import torchvision.datasets as dset
+ import torchvision.transforms as transforms
+ cap = dset.CocoCaptions(root = 'dir where images are',
+ annFile = 'json annotation file',
+ transform=transforms.ToTensor())
+
+ print('Number of samples: ', len(cap))
+ img, target = cap[3] # load 4th sample
+
+ print("Image Size: ", img.size())
+ print(target)
+
+ Output: ::
+
+ Number of samples: 82783
+ Image Size: (3L, 427L, 640L)
+ [u'A plane emitting smoke stream flying over a mountain.',
+ u'A plane darts across a bright blue sky behind a mountain covered in snow',
+ u'A plane leaves a contrail above the snowy mountain top.',
+ u'A mountain that has a plane flying overheard in the distance.',
+ u'A mountain view with a plume of smoke in the background']
+
+ """
+
+ def __init__(self, root, annFile, transform=None, target_transform=None, transforms=None):
+ super(CocoCaptions, self).__init__(root, transforms, transform, target_transform)
+ from pycocotools.coco import COCO
+ self.coco = COCO(annFile)
+ self.ids = list(sorted(self.coco.imgs.keys()))
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: Tuple (image, target). target is a list of captions for the image.
+ """
+ coco = self.coco
+ img_id = self.ids[index]
+ ann_ids = coco.getAnnIds(imgIds=img_id)
+ anns = coco.loadAnns(ann_ids)
+ target = [ann['caption'] for ann in anns]
+
+ path = coco.loadImgs(img_id)[0]['file_name']
+
+ img = Image.open(os.path.join(self.root, path)).convert('RGB')
+
+ if self.transforms is not None:
+ img, target = self.transforms(img, target)
+
+ return img, target
+
+ def __len__(self):
+ return len(self.ids)
+
+
+class CocoDetection(VisionDataset):
+ """`MS Coco Detection `_ Dataset.
+
+ Args:
+ root (string): Root directory where images are downloaded to.
+ annFile (string): Path to json annotation file.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.ToTensor``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ transforms (callable, optional): A function/transform that takes input sample and its target as entry
+ and returns a transformed version.
+ """
+
+ def __init__(self, root, annFile, transform=None, target_transform=None, transforms=None):
+ super(CocoDetection, self).__init__(root, transforms, transform, target_transform)
+ from pycocotools.coco import COCO
+ self.coco = COCO(annFile)
+ self.ids = list(sorted(self.coco.imgs.keys()))
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
+ """
+ coco = self.coco
+ img_id = self.ids[index]
+ ann_ids = coco.getAnnIds(imgIds=img_id)
+ target = coco.loadAnns(ann_ids)
+
+ path = coco.loadImgs(img_id)[0]['file_name']
+
+ img = Image.open(os.path.join(self.root, path)).convert('RGB')
+ if self.transforms is not None:
+ img, target = self.transforms(img, target)
+
+ return img, target
+
+ def __len__(self):
+ return len(self.ids)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py
new file mode 100644
index 0000000000..f079c1a92d
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py
@@ -0,0 +1,58 @@
+import torch
+from .vision import VisionDataset
+from .. import transforms
+
+
+class FakeData(VisionDataset):
+ """A fake dataset that returns randomly generated images and returns them as PIL images
+
+ Args:
+ size (int, optional): Size of the dataset. Default: 1000 images
+ image_size(tuple, optional): Size if the returned images. Default: (3, 224, 224)
+ num_classes(int, optional): Number of classes in the datset. Default: 10
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ random_offset (int): Offsets the index-based random seed used to
+ generate each image. Default: 0
+
+ """
+
+ def __init__(self, size=1000, image_size=(3, 224, 224), num_classes=10,
+ transform=None, target_transform=None, random_offset=0):
+ super(FakeData, self).__init__(None, transform=transform,
+ target_transform=target_transform)
+ self.size = size
+ self.num_classes = num_classes
+ self.image_size = image_size
+ self.random_offset = random_offset
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (image, target) where target is class_index of the target class.
+ """
+ # create random image that is consistent with the index id
+ if index >= len(self):
+ raise IndexError("{} index out of range".format(self.__class__.__name__))
+ rng_state = torch.get_rng_state()
+ torch.manual_seed(index + self.random_offset)
+ img = torch.randn(*self.image_size)
+ target = torch.randint(0, self.num_classes, size=(1,), dtype=torch.long)[0]
+ torch.set_rng_state(rng_state)
+
+ # convert to PIL Image
+ img = transforms.ToPILImage()(img)
+ if self.transform is not None:
+ img = self.transform(img)
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return img, target
+
+ def __len__(self):
+ return self.size
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py
new file mode 100644
index 0000000000..77cd430705
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py
@@ -0,0 +1,154 @@
+from collections import defaultdict
+from PIL import Image
+from html.parser import HTMLParser
+
+import glob
+import os
+from .vision import VisionDataset
+
+
+class Flickr8kParser(HTMLParser):
+ """Parser for extracting captions from the Flickr8k dataset web page."""
+
+ def __init__(self, root):
+ super(Flickr8kParser, self).__init__()
+
+ self.root = root
+
+ # Data structure to store captions
+ self.annotations = {}
+
+ # State variables
+ self.in_table = False
+ self.current_tag = None
+ self.current_img = None
+
+ def handle_starttag(self, tag, attrs):
+ self.current_tag = tag
+
+ if tag == 'table':
+ self.in_table = True
+
+ def handle_endtag(self, tag):
+ self.current_tag = None
+
+ if tag == 'table':
+ self.in_table = False
+
+ def handle_data(self, data):
+ if self.in_table:
+ if data == 'Image Not Found':
+ self.current_img = None
+ elif self.current_tag == 'a':
+ img_id = data.split('/')[-2]
+ img_id = os.path.join(self.root, img_id + '_*.jpg')
+ img_id = glob.glob(img_id)[0]
+ self.current_img = img_id
+ self.annotations[img_id] = []
+ elif self.current_tag == 'li' and self.current_img:
+ img_id = self.current_img
+ self.annotations[img_id].append(data.strip())
+
+
+class Flickr8k(VisionDataset):
+ """`Flickr8k Entities `_ Dataset.
+
+ Args:
+ root (string): Root directory where images are downloaded to.
+ ann_file (string): Path to annotation file.
+ transform (callable, optional): A function/transform that takes in a PIL image
+ and returns a transformed version. E.g, ``transforms.ToTensor``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ """
+
+ def __init__(self, root, ann_file, transform=None, target_transform=None):
+ super(Flickr8k, self).__init__(root, transform=transform,
+ target_transform=target_transform)
+ self.ann_file = os.path.expanduser(ann_file)
+
+ # Read annotations and store in a dict
+ parser = Flickr8kParser(self.root)
+ with open(self.ann_file) as fh:
+ parser.feed(fh.read())
+ self.annotations = parser.annotations
+
+ self.ids = list(sorted(self.annotations.keys()))
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: Tuple (image, target). target is a list of captions for the image.
+ """
+ img_id = self.ids[index]
+
+ # Image
+ img = Image.open(img_id).convert('RGB')
+ if self.transform is not None:
+ img = self.transform(img)
+
+ # Captions
+ target = self.annotations[img_id]
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return img, target
+
+ def __len__(self):
+ return len(self.ids)
+
+
+class Flickr30k(VisionDataset):
+ """`Flickr30k Entities `_ Dataset.
+
+ Args:
+ root (string): Root directory where images are downloaded to.
+ ann_file (string): Path to annotation file.
+ transform (callable, optional): A function/transform that takes in a PIL image
+ and returns a transformed version. E.g, ``transforms.ToTensor``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ """
+
+ def __init__(self, root, ann_file, transform=None, target_transform=None):
+ super(Flickr30k, self).__init__(root, transform=transform,
+ target_transform=target_transform)
+ self.ann_file = os.path.expanduser(ann_file)
+
+ # Read annotations and store in a dict
+ self.annotations = defaultdict(list)
+ with open(self.ann_file) as fh:
+ for line in fh:
+ img_id, caption = line.strip().split('\t')
+ self.annotations[img_id[:-2]].append(caption)
+
+ self.ids = list(sorted(self.annotations.keys()))
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: Tuple (image, target). target is a list of captions for the image.
+ """
+ img_id = self.ids[index]
+
+ # Image
+ filename = os.path.join(self.root, img_id)
+ img = Image.open(filename).convert('RGB')
+ if self.transform is not None:
+ img = self.transform(img)
+
+ # Captions
+ target = self.annotations[img_id]
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return img, target
+
+ def __len__(self):
+ return len(self.ids)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py
new file mode 100644
index 0000000000..16d092b716
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py
@@ -0,0 +1,207 @@
+from .vision import VisionDataset
+
+from PIL import Image
+
+import os
+import os.path
+
+
+def has_file_allowed_extension(filename, extensions):
+ """Checks if a file is an allowed extension.
+
+ Args:
+ filename (string): path to a file
+ extensions (tuple of strings): extensions to consider (lowercase)
+
+ Returns:
+ bool: True if the filename ends with one of given extensions
+ """
+ return filename.lower().endswith(extensions)
+
+
+def is_image_file(filename):
+ """Checks if a file is an allowed image extension.
+
+ Args:
+ filename (string): path to a file
+
+ Returns:
+ bool: True if the filename ends with a known image extension
+ """
+ return has_file_allowed_extension(filename, IMG_EXTENSIONS)
+
+
+def make_dataset(directory, class_to_idx, extensions=None, is_valid_file=None):
+ instances = []
+ directory = os.path.expanduser(directory)
+ both_none = extensions is None and is_valid_file is None
+ both_something = extensions is not None and is_valid_file is not None
+ if both_none or both_something:
+ raise ValueError("Both extensions and is_valid_file cannot be None or not None at the same time")
+ if extensions is not None:
+ def is_valid_file(x):
+ return has_file_allowed_extension(x, extensions)
+ for target_class in sorted(class_to_idx.keys()):
+ class_index = class_to_idx[target_class]
+ target_dir = os.path.join(directory, target_class)
+ if not os.path.isdir(target_dir):
+ continue
+ for root, _, fnames in sorted(os.walk(target_dir, followlinks=True)):
+ for fname in sorted(fnames):
+ path = os.path.join(root, fname)
+ if is_valid_file(path):
+ item = path, class_index
+ instances.append(item)
+ return instances
+
+
+class DatasetFolder(VisionDataset):
+ """A generic data loader where the samples are arranged in this way: ::
+
+ root/class_x/xxx.ext
+ root/class_x/xxy.ext
+ root/class_x/xxz.ext
+
+ root/class_y/123.ext
+ root/class_y/nsdf3.ext
+ root/class_y/asd932_.ext
+
+ Args:
+ root (string): Root directory path.
+ loader (callable): A function to load a sample given its path.
+ extensions (tuple[string]): A list of allowed extensions.
+ both extensions and is_valid_file should not be passed.
+ transform (callable, optional): A function/transform that takes in
+ a sample and returns a transformed version.
+ E.g, ``transforms.RandomCrop`` for images.
+ target_transform (callable, optional): A function/transform that takes
+ in the target and transforms it.
+ is_valid_file (callable, optional): A function that takes path of a file
+ and check if the file is a valid file (used to check of corrupt files)
+ both extensions and is_valid_file should not be passed.
+
+ Attributes:
+ classes (list): List of the class names.
+ class_to_idx (dict): Dict with items (class_name, class_index).
+ samples (list): List of (sample path, class_index) tuples
+ targets (list): The class_index value for each image in the dataset
+ """
+
+ def __init__(self, root, loader, extensions=None, transform=None,
+ target_transform=None, is_valid_file=None):
+ super(DatasetFolder, self).__init__(root, transform=transform,
+ target_transform=target_transform)
+ classes, class_to_idx = self._find_classes(self.root)
+ samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file)
+ if len(samples) == 0:
+ raise (RuntimeError("Found 0 files in subfolders of: " + self.root + "\n"
+ "Supported extensions are: " + ",".join(extensions)))
+
+ self.loader = loader
+ self.extensions = extensions
+
+ self.classes = classes
+ self.class_to_idx = class_to_idx
+ self.samples = samples
+ self.targets = [s[1] for s in samples]
+
+ def _find_classes(self, dir):
+ """
+ Finds the class folders in a dataset.
+
+ Args:
+ dir (string): Root directory path.
+
+ Returns:
+ tuple: (classes, class_to_idx) where classes are relative to (dir), and class_to_idx is a dictionary.
+
+ Ensures:
+ No class is a subdirectory of another.
+ """
+ classes = [d.name for d in os.scandir(dir) if d.is_dir()]
+ classes.sort()
+ class_to_idx = {classes[i]: i for i in range(len(classes))}
+ return classes, class_to_idx
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (sample, target) where target is class_index of the target class.
+ """
+ path, target = self.samples[index]
+ sample = self.loader(path)
+ if self.transform is not None:
+ sample = self.transform(sample)
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return sample, target
+
+ def __len__(self):
+ return len(self.samples)
+
+
+IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp')
+
+
+def pil_loader(path):
+ # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+ with open(path, 'rb') as f:
+ img = Image.open(f)
+ return img.convert('RGB')
+
+
+def accimage_loader(path):
+ import accimage
+ try:
+ return accimage.Image(path)
+ except IOError:
+ # Potentially a decoding problem, fall back to PIL.Image
+ return pil_loader(path)
+
+
+def default_loader(path):
+ from torchvision import get_image_backend
+ if get_image_backend() == 'accimage':
+ return accimage_loader(path)
+ else:
+ return pil_loader(path)
+
+
+class ImageFolder(DatasetFolder):
+ """A generic data loader where the images are arranged in this way: ::
+
+ root/dog/xxx.png
+ root/dog/xxy.png
+ root/dog/xxz.png
+
+ root/cat/123.png
+ root/cat/nsdf3.png
+ root/cat/asd932_.png
+
+ Args:
+ root (string): Root directory path.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ loader (callable, optional): A function to load an image given its path.
+ is_valid_file (callable, optional): A function that takes path of an Image file
+ and check if the file is a valid file (used to check of corrupt files)
+
+ Attributes:
+ classes (list): List of the class names.
+ class_to_idx (dict): Dict with items (class_name, class_index).
+ imgs (list): List of (image path, class_index) tuples
+ """
+
+ def __init__(self, root, transform=None, target_transform=None,
+ loader=default_loader, is_valid_file=None):
+ super(ImageFolder, self).__init__(root, loader, IMG_EXTENSIONS if is_valid_file is None else None,
+ transform=transform,
+ target_transform=target_transform,
+ is_valid_file=is_valid_file)
+ self.imgs = self.samples
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py
new file mode 100644
index 0000000000..3b826bfa9a
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py
@@ -0,0 +1,130 @@
+import glob
+import os
+
+from .utils import list_dir
+from .folder import make_dataset
+from .video_utils import VideoClips
+from .vision import VisionDataset
+
+
+class HMDB51(VisionDataset):
+ """
+ `HMDB51 `_
+ dataset.
+
+ HMDB51 is an action recognition video dataset.
+ This dataset consider every video as a collection of video clips of fixed size, specified
+ by ``frames_per_clip``, where the step in frames between each clip is given by
+ ``step_between_clips``.
+
+ To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
+ and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
+ elements will come from video 1, and the next three elements from video 2.
+ Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
+ frames in a video might be present.
+
+ Internally, it uses a VideoClips object to handle clip creation.
+
+ Args:
+ root (string): Root directory of the HMDB51 Dataset.
+ annotation_path (str): Path to the folder containing the split files.
+ frames_per_clip (int): Number of frames in a clip.
+ step_between_clips (int): Number of frames between each clip.
+ fold (int, optional): Which fold to use. Should be between 1 and 3.
+ train (bool, optional): If ``True``, creates a dataset from the train split,
+ otherwise from the ``test`` split.
+ transform (callable, optional): A function/transform that takes in a TxHxWxC video
+ and returns a transformed version.
+
+ Returns:
+ video (Tensor[T, H, W, C]): the `T` video frames
+ audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
+ and `L` is the number of points
+ label (int): class of the video clip
+ """
+
+ data_url = "http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar"
+ splits = {
+ "url": "http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar",
+ "md5": "15e67781e70dcfbdce2d7dbb9b3344b5"
+ }
+ TRAIN_TAG = 1
+ TEST_TAG = 2
+
+ def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1,
+ frame_rate=None, fold=1, train=True, transform=None,
+ _precomputed_metadata=None, num_workers=1, _video_width=0,
+ _video_height=0, _video_min_dimension=0, _audio_samples=0):
+ super(HMDB51, self).__init__(root)
+ if fold not in (1, 2, 3):
+ raise ValueError("fold should be between 1 and 3, got {}".format(fold))
+
+ extensions = ('avi',)
+ classes = sorted(list_dir(root))
+ class_to_idx = {class_: i for (i, class_) in enumerate(classes)}
+ self.samples = make_dataset(
+ self.root,
+ class_to_idx,
+ extensions,
+ )
+
+ video_paths = [path for (path, _) in self.samples]
+ video_clips = VideoClips(
+ video_paths,
+ frames_per_clip,
+ step_between_clips,
+ frame_rate,
+ _precomputed_metadata,
+ num_workers=num_workers,
+ _video_width=_video_width,
+ _video_height=_video_height,
+ _video_min_dimension=_video_min_dimension,
+ _audio_samples=_audio_samples,
+ )
+ self.fold = fold
+ self.train = train
+ self.classes = classes
+ self.video_clips_metadata = video_clips.metadata
+ self.indices = self._select_fold(video_paths, annotation_path, fold, train)
+ self.video_clips = video_clips.subset(self.indices)
+ self.transform = transform
+
+ @property
+ def metadata(self):
+ return self.video_clips_metadata
+
+ def _select_fold(self, video_list, annotations_dir, fold, train):
+ target_tag = self.TRAIN_TAG if train else self.TEST_TAG
+ split_pattern_name = "*test_split{}.txt".format(fold)
+ split_pattern_path = os.path.join(annotations_dir, split_pattern_name)
+ annotation_paths = glob.glob(split_pattern_path)
+ selected_files = []
+ for filepath in annotation_paths:
+ with open(filepath) as fid:
+ lines = fid.readlines()
+ for line in lines:
+ video_filename, tag_string = line.split()
+ tag = int(tag_string)
+ if tag == target_tag:
+ selected_files.append(video_filename)
+ selected_files = set(selected_files)
+
+ indices = []
+ for video_index, video_path in enumerate(video_list):
+ if os.path.basename(video_path) in selected_files:
+ indices.append(video_index)
+
+ return indices
+
+ def __len__(self):
+ return self.video_clips.num_clips()
+
+ def __getitem__(self, idx):
+ video, audio, _, video_idx = self.video_clips.get_clip(idx)
+ sample_index = self.indices[video_idx]
+ _, class_index = self.samples[sample_index]
+
+ if self.transform is not None:
+ video = self.transform(video)
+
+ return video, audio, class_index
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py
new file mode 100644
index 0000000000..a45ff3cd44
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py
@@ -0,0 +1,218 @@
+import warnings
+from contextlib import contextmanager
+import os
+import shutil
+import tempfile
+import torch
+from .folder import ImageFolder
+from .utils import check_integrity, extract_archive, verify_str_arg
+
+ARCHIVE_META = {
+ 'train': ('ILSVRC2012_img_train.tar', '1d675b47d978889d74fa0da5fadfb00e'),
+ 'val': ('ILSVRC2012_img_val.tar', '29b22e2961454d5413ddabcf34fc5622'),
+ 'devkit': ('ILSVRC2012_devkit_t12.tar.gz', 'fa75699e90414af021442c21a62c3abf')
+}
+
+META_FILE = "meta.bin"
+
+
+class ImageNet(ImageFolder):
+ """`ImageNet `_ 2012 Classification Dataset.
+
+ Args:
+ root (string): Root directory of the ImageNet Dataset.
+ split (string, optional): The dataset split, supports ``train``, or ``val``.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ loader (callable, optional): A function to load an image given its path.
+
+ Attributes:
+ classes (list): List of the class name tuples.
+ class_to_idx (dict): Dict with items (class_name, class_index).
+ wnids (list): List of the WordNet IDs.
+ wnid_to_idx (dict): Dict with items (wordnet_id, class_index).
+ imgs (list): List of (image path, class_index) tuples
+ targets (list): The class_index value for each image in the dataset
+ """
+
+ def __init__(self, root, split='train', download=None, **kwargs):
+ if download is True:
+ msg = ("The dataset is no longer publicly accessible. You need to "
+ "download the archives externally and place them in the root "
+ "directory.")
+ raise RuntimeError(msg)
+ elif download is False:
+ msg = ("The use of the download flag is deprecated, since the dataset "
+ "is no longer publicly accessible.")
+ warnings.warn(msg, RuntimeWarning)
+
+ root = self.root = os.path.expanduser(root)
+ self.split = verify_str_arg(split, "split", ("train", "val"))
+
+ self.parse_archives()
+ wnid_to_classes = load_meta_file(self.root)[0]
+
+ super(ImageNet, self).__init__(self.split_folder, **kwargs)
+ self.root = root
+
+ self.wnids = self.classes
+ self.wnid_to_idx = self.class_to_idx
+ self.classes = [wnid_to_classes[wnid] for wnid in self.wnids]
+ self.class_to_idx = {cls: idx
+ for idx, clss in enumerate(self.classes)
+ for cls in clss}
+
+ def parse_archives(self):
+ if not check_integrity(os.path.join(self.root, META_FILE)):
+ parse_devkit_archive(self.root)
+
+ if not os.path.isdir(self.split_folder):
+ if self.split == 'train':
+ parse_train_archive(self.root)
+ elif self.split == 'val':
+ parse_val_archive(self.root)
+
+ @property
+ def split_folder(self):
+ return os.path.join(self.root, self.split)
+
+ def extra_repr(self):
+ return "Split: {split}".format(**self.__dict__)
+
+
+def load_meta_file(root, file=None):
+ if file is None:
+ file = META_FILE
+ file = os.path.join(root, file)
+
+ if check_integrity(file):
+ return torch.load(file)
+ else:
+ msg = ("The meta file {} is not present in the root directory or is corrupted. "
+ "This file is automatically created by the ImageNet dataset.")
+ raise RuntimeError(msg.format(file, root))
+
+
+def _verify_archive(root, file, md5):
+ if not check_integrity(os.path.join(root, file), md5):
+ msg = ("The archive {} is not present in the root directory or is corrupted. "
+ "You need to download it externally and place it in {}.")
+ raise RuntimeError(msg.format(file, root))
+
+
+def parse_devkit_archive(root, file=None):
+ """Parse the devkit archive of the ImageNet2012 classification dataset and save
+ the meta information in a binary file.
+
+ Args:
+ root (str): Root directory containing the devkit archive
+ file (str, optional): Name of devkit archive. Defaults to
+ 'ILSVRC2012_devkit_t12.tar.gz'
+ """
+ import scipy.io as sio
+
+ def parse_meta_mat(devkit_root):
+ metafile = os.path.join(devkit_root, "data", "meta.mat")
+ meta = sio.loadmat(metafile, squeeze_me=True)['synsets']
+ nums_children = list(zip(*meta))[4]
+ meta = [meta[idx] for idx, num_children in enumerate(nums_children)
+ if num_children == 0]
+ idcs, wnids, classes = list(zip(*meta))[:3]
+ classes = [tuple(clss.split(', ')) for clss in classes]
+ idx_to_wnid = {idx: wnid for idx, wnid in zip(idcs, wnids)}
+ wnid_to_classes = {wnid: clss for wnid, clss in zip(wnids, classes)}
+ return idx_to_wnid, wnid_to_classes
+
+ def parse_val_groundtruth_txt(devkit_root):
+ file = os.path.join(devkit_root, "data",
+ "ILSVRC2012_validation_ground_truth.txt")
+ with open(file, 'r') as txtfh:
+ val_idcs = txtfh.readlines()
+ return [int(val_idx) for val_idx in val_idcs]
+
+ @contextmanager
+ def get_tmp_dir():
+ tmp_dir = tempfile.mkdtemp()
+ try:
+ yield tmp_dir
+ finally:
+ shutil.rmtree(tmp_dir)
+
+ archive_meta = ARCHIVE_META["devkit"]
+ if file is None:
+ file = archive_meta[0]
+ md5 = archive_meta[1]
+
+ _verify_archive(root, file, md5)
+
+ with get_tmp_dir() as tmp_dir:
+ extract_archive(os.path.join(root, file), tmp_dir)
+
+ devkit_root = os.path.join(tmp_dir, "ILSVRC2012_devkit_t12")
+ idx_to_wnid, wnid_to_classes = parse_meta_mat(devkit_root)
+ val_idcs = parse_val_groundtruth_txt(devkit_root)
+ val_wnids = [idx_to_wnid[idx] for idx in val_idcs]
+
+ torch.save((wnid_to_classes, val_wnids), os.path.join(root, META_FILE))
+
+
+def parse_train_archive(root, file=None, folder="train"):
+ """Parse the train images archive of the ImageNet2012 classification dataset and
+ prepare it for usage with the ImageNet dataset.
+
+ Args:
+ root (str): Root directory containing the train images archive
+ file (str, optional): Name of train images archive. Defaults to
+ 'ILSVRC2012_img_train.tar'
+ folder (str, optional): Optional name for train images folder. Defaults to
+ 'train'
+ """
+ archive_meta = ARCHIVE_META["train"]
+ if file is None:
+ file = archive_meta[0]
+ md5 = archive_meta[1]
+
+ _verify_archive(root, file, md5)
+
+ train_root = os.path.join(root, folder)
+ extract_archive(os.path.join(root, file), train_root)
+
+ archives = [os.path.join(train_root, archive) for archive in os.listdir(train_root)]
+ for archive in archives:
+ extract_archive(archive, os.path.splitext(archive)[0], remove_finished=True)
+
+
+def parse_val_archive(root, file=None, wnids=None, folder="val"):
+ """Parse the validation images archive of the ImageNet2012 classification dataset
+ and prepare it for usage with the ImageNet dataset.
+
+ Args:
+ root (str): Root directory containing the validation images archive
+ file (str, optional): Name of validation images archive. Defaults to
+ 'ILSVRC2012_img_val.tar'
+ wnids (list, optional): List of WordNet IDs of the validation images. If None
+ is given, the IDs are loaded from the meta file in the root directory
+ folder (str, optional): Optional name for validation images folder. Defaults to
+ 'val'
+ """
+ archive_meta = ARCHIVE_META["val"]
+ if file is None:
+ file = archive_meta[0]
+ md5 = archive_meta[1]
+ if wnids is None:
+ wnids = load_meta_file(root)[1]
+
+ _verify_archive(root, file, md5)
+
+ val_root = os.path.join(root, folder)
+ extract_archive(os.path.join(root, file), val_root)
+
+ images = sorted([os.path.join(val_root, image) for image in os.listdir(val_root)])
+
+ for wnid in set(wnids):
+ os.mkdir(os.path.join(val_root, wnid))
+
+ for wnid, img_file in zip(wnids, images):
+ shutil.move(img_file, os.path.join(val_root, wnid, os.path.basename(img_file)))
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py
new file mode 100644
index 0000000000..07db91cc19
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py
@@ -0,0 +1,79 @@
+from .utils import list_dir
+from .folder import make_dataset
+from .video_utils import VideoClips
+from .vision import VisionDataset
+
+
+class Kinetics400(VisionDataset):
+ """
+ `Kinetics-400 `_
+ dataset.
+
+ Kinetics-400 is an action recognition video dataset.
+ This dataset consider every video as a collection of video clips of fixed size, specified
+ by ``frames_per_clip``, where the step in frames between each clip is given by
+ ``step_between_clips``.
+
+ To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
+ and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
+ elements will come from video 1, and the next three elements from video 2.
+ Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
+ frames in a video might be present.
+
+ Internally, it uses a VideoClips object to handle clip creation.
+
+ Args:
+ root (string): Root directory of the Kinetics-400 Dataset.
+ frames_per_clip (int): number of frames in a clip
+ step_between_clips (int): number of frames between each clip
+ transform (callable, optional): A function/transform that takes in a TxHxWxC video
+ and returns a transformed version.
+
+ Returns:
+ video (Tensor[T, H, W, C]): the `T` video frames
+ audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
+ and `L` is the number of points
+ label (int): class of the video clip
+ """
+
+ def __init__(self, root, frames_per_clip, step_between_clips=1, frame_rate=None,
+ extensions=('avi',), transform=None, _precomputed_metadata=None,
+ num_workers=1, _video_width=0, _video_height=0,
+ _video_min_dimension=0, _audio_samples=0, _audio_channels=0):
+ super(Kinetics400, self).__init__(root)
+
+ classes = list(sorted(list_dir(root)))
+ class_to_idx = {classes[i]: i for i in range(len(classes))}
+ self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None)
+ self.classes = classes
+ video_list = [x[0] for x in self.samples]
+ self.video_clips = VideoClips(
+ video_list,
+ frames_per_clip,
+ step_between_clips,
+ frame_rate,
+ _precomputed_metadata,
+ num_workers=num_workers,
+ _video_width=_video_width,
+ _video_height=_video_height,
+ _video_min_dimension=_video_min_dimension,
+ _audio_samples=_audio_samples,
+ _audio_channels=_audio_channels,
+ )
+ self.transform = transform
+
+ @property
+ def metadata(self):
+ return self.video_clips.metadata
+
+ def __len__(self):
+ return self.video_clips.num_clips()
+
+ def __getitem__(self, idx):
+ video, audio, info, video_idx = self.video_clips.get_clip(idx)
+ label = self.samples[video_idx][1]
+
+ if self.transform is not None:
+ video = self.transform(video)
+
+ return video, audio, label
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py
new file mode 100644
index 0000000000..fc67f8f024
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py
@@ -0,0 +1,152 @@
+from .vision import VisionDataset
+from PIL import Image
+import os
+import os.path
+import io
+import string
+from collections.abc import Iterable
+import pickle
+from .utils import verify_str_arg, iterable_to_str
+
+
+class LSUNClass(VisionDataset):
+ def __init__(self, root, transform=None, target_transform=None):
+ import lmdb
+ super(LSUNClass, self).__init__(root, transform=transform,
+ target_transform=target_transform)
+
+ self.env = lmdb.open(root, max_readers=1, readonly=True, lock=False,
+ readahead=False, meminit=False)
+ with self.env.begin(write=False) as txn:
+ self.length = txn.stat()['entries']
+ cache_file = '_cache_' + ''.join(c for c in root if c in string.ascii_letters)
+ if os.path.isfile(cache_file):
+ self.keys = pickle.load(open(cache_file, "rb"))
+ else:
+ with self.env.begin(write=False) as txn:
+ self.keys = [key for key, _ in txn.cursor()]
+ pickle.dump(self.keys, open(cache_file, "wb"))
+
+ def __getitem__(self, index):
+ img, target = None, None
+ env = self.env
+ with env.begin(write=False) as txn:
+ imgbuf = txn.get(self.keys[index])
+
+ buf = io.BytesIO()
+ buf.write(imgbuf)
+ buf.seek(0)
+ img = Image.open(buf).convert('RGB')
+
+ if self.transform is not None:
+ img = self.transform(img)
+
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return img, target
+
+ def __len__(self):
+ return self.length
+
+
+class LSUN(VisionDataset):
+ """
+ `LSUN `_ dataset.
+
+ Args:
+ root (string): Root directory for the database files.
+ classes (string or list): One of {'train', 'val', 'test'} or a list of
+ categories to load. e,g. ['bedroom_train', 'church_outdoor_train'].
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ """
+
+ def __init__(self, root, classes='train', transform=None, target_transform=None):
+ super(LSUN, self).__init__(root, transform=transform,
+ target_transform=target_transform)
+ self.classes = self._verify_classes(classes)
+
+ # for each class, create an LSUNClassDataset
+ self.dbs = []
+ for c in self.classes:
+ self.dbs.append(LSUNClass(
+ root=root + '/' + c + '_lmdb',
+ transform=transform))
+
+ self.indices = []
+ count = 0
+ for db in self.dbs:
+ count += len(db)
+ self.indices.append(count)
+
+ self.length = count
+
+ def _verify_classes(self, classes):
+ categories = ['bedroom', 'bridge', 'church_outdoor', 'classroom',
+ 'conference_room', 'dining_room', 'kitchen',
+ 'living_room', 'restaurant', 'tower']
+ dset_opts = ['train', 'val', 'test']
+
+ try:
+ verify_str_arg(classes, "classes", dset_opts)
+ if classes == 'test':
+ classes = [classes]
+ else:
+ classes = [c + '_' + classes for c in categories]
+ except ValueError:
+ if not isinstance(classes, Iterable):
+ msg = ("Expected type str or Iterable for argument classes, "
+ "but got type {}.")
+ raise ValueError(msg.format(type(classes)))
+
+ classes = list(classes)
+ msg_fmtstr = ("Expected type str for elements in argument classes, "
+ "but got type {}.")
+ for c in classes:
+ verify_str_arg(c, custom_msg=msg_fmtstr.format(type(c)))
+ c_short = c.split('_')
+ category, dset_opt = '_'.join(c_short[:-1]), c_short[-1]
+
+ msg_fmtstr = "Unknown value '{}' for {}. Valid values are {{{}}}."
+ msg = msg_fmtstr.format(category, "LSUN class",
+ iterable_to_str(categories))
+ verify_str_arg(category, valid_values=categories, custom_msg=msg)
+
+ msg = msg_fmtstr.format(dset_opt, "postfix", iterable_to_str(dset_opts))
+ verify_str_arg(dset_opt, valid_values=dset_opts, custom_msg=msg)
+
+ return classes
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: Tuple (image, target) where target is the index of the target category.
+ """
+ target = 0
+ sub = 0
+ for ind in self.indices:
+ if index < ind:
+ break
+ target += 1
+ sub = ind
+
+ db = self.dbs[target]
+ index = index - sub
+
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ img, _ = db[index]
+ return img, target
+
+ def __len__(self):
+ return self.length
+
+ def extra_repr(self):
+ return "Classes: {classes}".format(**self.__dict__)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py
new file mode 100644
index 0000000000..74bc0c16aa
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py
@@ -0,0 +1,485 @@
+from .vision import VisionDataset
+import warnings
+from PIL import Image
+import os
+import os.path
+import numpy as np
+import torch
+import codecs
+import string
+from .utils import download_url, download_and_extract_archive, extract_archive, \
+ verify_str_arg
+
+
+class MNIST(VisionDataset):
+ """`MNIST `_ Dataset.
+
+ Args:
+ root (string): Root directory of dataset where ``MNIST/processed/training.pt``
+ and ``MNIST/processed/test.pt`` exist.
+ train (bool, optional): If True, creates dataset from ``training.pt``,
+ otherwise from ``test.pt``.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ """
+
+ resources = [
+ ("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"),
+ ("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432"),
+ ("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"),
+ ("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c")
+ ]
+
+ training_file = 'training.pt'
+ test_file = 'test.pt'
+ classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four',
+ '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
+
+ @property
+ def train_labels(self):
+ warnings.warn("train_labels has been renamed targets")
+ return self.targets
+
+ @property
+ def test_labels(self):
+ warnings.warn("test_labels has been renamed targets")
+ return self.targets
+
+ @property
+ def train_data(self):
+ warnings.warn("train_data has been renamed data")
+ return self.data
+
+ @property
+ def test_data(self):
+ warnings.warn("test_data has been renamed data")
+ return self.data
+
+ def __init__(self, root, train=True, transform=None, target_transform=None,
+ download=False):
+ super(MNIST, self).__init__(root, transform=transform,
+ target_transform=target_transform)
+ self.train = train # training set or test set
+
+ if download:
+ self.download()
+
+ if not self._check_exists():
+ raise RuntimeError('Dataset not found.' +
+ ' You can use download=True to download it')
+
+ if self.train:
+ data_file = self.training_file
+ else:
+ data_file = self.test_file
+ self.data, self.targets = torch.load(os.path.join(self.processed_folder, data_file))
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (image, target) where target is index of the target class.
+ """
+ img, target = self.data[index], int(self.targets[index])
+
+ # doing this so that it is consistent with all other datasets
+ # to return a PIL Image
+ img = Image.fromarray(img.numpy(), mode='L')
+
+ if self.transform is not None:
+ img = self.transform(img)
+
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return img, target
+
+ def __len__(self):
+ return len(self.data)
+
+ @property
+ def raw_folder(self):
+ return os.path.join(self.root, self.__class__.__name__, 'raw')
+
+ @property
+ def processed_folder(self):
+ return os.path.join(self.root, self.__class__.__name__, 'processed')
+
+ @property
+ def class_to_idx(self):
+ return {_class: i for i, _class in enumerate(self.classes)}
+
+ def _check_exists(self):
+ return (os.path.exists(os.path.join(self.processed_folder,
+ self.training_file)) and
+ os.path.exists(os.path.join(self.processed_folder,
+ self.test_file)))
+
+ def download(self):
+ """Download the MNIST data if it doesn't exist in processed_folder already."""
+
+ if self._check_exists():
+ return
+
+ os.makedirs(self.raw_folder, exist_ok=True)
+ os.makedirs(self.processed_folder, exist_ok=True)
+
+ # download files
+ for url, md5 in self.resources:
+ filename = url.rpartition('/')[2]
+ download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5)
+
+ # process and save as torch files
+ print('Processing...')
+
+ training_set = (
+ read_image_file(os.path.join(self.raw_folder, 'train-images-idx3-ubyte')),
+ read_label_file(os.path.join(self.raw_folder, 'train-labels-idx1-ubyte'))
+ )
+ test_set = (
+ read_image_file(os.path.join(self.raw_folder, 't10k-images-idx3-ubyte')),
+ read_label_file(os.path.join(self.raw_folder, 't10k-labels-idx1-ubyte'))
+ )
+ with open(os.path.join(self.processed_folder, self.training_file), 'wb') as f:
+ torch.save(training_set, f)
+ with open(os.path.join(self.processed_folder, self.test_file), 'wb') as f:
+ torch.save(test_set, f)
+
+ print('Done!')
+
+ def extra_repr(self):
+ return "Split: {}".format("Train" if self.train is True else "Test")
+
+
+class FashionMNIST(MNIST):
+ """`Fashion-MNIST `_ Dataset.
+
+ Args:
+ root (string): Root directory of dataset where ``Fashion-MNIST/processed/training.pt``
+ and ``Fashion-MNIST/processed/test.pt`` exist.
+ train (bool, optional): If True, creates dataset from ``training.pt``,
+ otherwise from ``test.pt``.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ """
+ resources = [
+ ("http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz",
+ "8d4fb7e6c68d591d4c3dfef9ec88bf0d"),
+ ("http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz",
+ "25c81989df183df01b3e8a0aad5dffbe"),
+ ("http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz",
+ "bef4ecab320f06d8554ea6380940ec79"),
+ ("http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz",
+ "bb300cfdad3c16e7a12a480ee83cd310")
+ ]
+ classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
+ 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
+
+
+class KMNIST(MNIST):
+ """`Kuzushiji-MNIST `_ Dataset.
+
+ Args:
+ root (string): Root directory of dataset where ``KMNIST/processed/training.pt``
+ and ``KMNIST/processed/test.pt`` exist.
+ train (bool, optional): If True, creates dataset from ``training.pt``,
+ otherwise from ``test.pt``.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ """
+ resources = [
+ ("http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-images-idx3-ubyte.gz", "bdb82020997e1d708af4cf47b453dcf7"),
+ ("http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz", "e144d726b3acfaa3e44228e80efcd344"),
+ ("http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-images-idx3-ubyte.gz", "5c965bf0a639b31b8f53240b1b52f4d7"),
+ ("http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-labels-idx1-ubyte.gz", "7320c461ea6c1c855c0b718fb2a4b134")
+ ]
+ classes = ['o', 'ki', 'su', 'tsu', 'na', 'ha', 'ma', 'ya', 're', 'wo']
+
+
+class EMNIST(MNIST):
+ """`EMNIST `_ Dataset.
+
+ Args:
+ root (string): Root directory of dataset where ``EMNIST/processed/training.pt``
+ and ``EMNIST/processed/test.pt`` exist.
+ split (string): The dataset has 6 different splits: ``byclass``, ``bymerge``,
+ ``balanced``, ``letters``, ``digits`` and ``mnist``. This argument specifies
+ which one to use.
+ train (bool, optional): If True, creates dataset from ``training.pt``,
+ otherwise from ``test.pt``.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ """
+ # Updated URL from https://www.nist.gov/node/1298471/emnist-dataset since the
+ # _official_ download link
+ # https://cloudstor.aarnet.edu.au/plus/s/ZNmuFiuQTqZlu9W/download
+ # is (currently) unavailable
+ url = 'http://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip'
+ md5 = "58c8d27c78d21e728a6bc7b3cc06412e"
+ splits = ('byclass', 'bymerge', 'balanced', 'letters', 'digits', 'mnist')
+ # Merged Classes assumes Same structure for both uppercase and lowercase version
+ _merged_classes = set(['C', 'I', 'J', 'K', 'L', 'M', 'O', 'P', 'S', 'U', 'V', 'W', 'X', 'Y', 'Z'])
+ _all_classes = set(list(string.digits + string.ascii_letters))
+ classes_split_dict = {
+ 'byclass': list(_all_classes),
+ 'bymerge': sorted(list(_all_classes - _merged_classes)),
+ 'balanced': sorted(list(_all_classes - _merged_classes)),
+ 'letters': list(string.ascii_lowercase),
+ 'digits': list(string.digits),
+ 'mnist': list(string.digits),
+ }
+
+ def __init__(self, root, split, **kwargs):
+ self.split = verify_str_arg(split, "split", self.splits)
+ self.training_file = self._training_file(split)
+ self.test_file = self._test_file(split)
+ super(EMNIST, self).__init__(root, **kwargs)
+ self.classes = self.classes_split_dict[self.split]
+
+ @staticmethod
+ def _training_file(split):
+ return 'training_{}.pt'.format(split)
+
+ @staticmethod
+ def _test_file(split):
+ return 'test_{}.pt'.format(split)
+
+ def download(self):
+ """Download the EMNIST data if it doesn't exist in processed_folder already."""
+ import shutil
+
+ if self._check_exists():
+ return
+
+ os.makedirs(self.raw_folder, exist_ok=True)
+ os.makedirs(self.processed_folder, exist_ok=True)
+
+ # download files
+ print('Downloading and extracting zip archive')
+ download_and_extract_archive(self.url, download_root=self.raw_folder, filename="emnist.zip",
+ remove_finished=True, md5=self.md5)
+ gzip_folder = os.path.join(self.raw_folder, 'gzip')
+ for gzip_file in os.listdir(gzip_folder):
+ if gzip_file.endswith('.gz'):
+ extract_archive(os.path.join(gzip_folder, gzip_file), gzip_folder)
+
+ # process and save as torch files
+ for split in self.splits:
+ print('Processing ' + split)
+ training_set = (
+ read_image_file(os.path.join(gzip_folder, 'emnist-{}-train-images-idx3-ubyte'.format(split))),
+ read_label_file(os.path.join(gzip_folder, 'emnist-{}-train-labels-idx1-ubyte'.format(split)))
+ )
+ test_set = (
+ read_image_file(os.path.join(gzip_folder, 'emnist-{}-test-images-idx3-ubyte'.format(split))),
+ read_label_file(os.path.join(gzip_folder, 'emnist-{}-test-labels-idx1-ubyte'.format(split)))
+ )
+ with open(os.path.join(self.processed_folder, self._training_file(split)), 'wb') as f:
+ torch.save(training_set, f)
+ with open(os.path.join(self.processed_folder, self._test_file(split)), 'wb') as f:
+ torch.save(test_set, f)
+ shutil.rmtree(gzip_folder)
+
+ print('Done!')
+
+
+class QMNIST(MNIST):
+ """`QMNIST `_ Dataset.
+
+ Args:
+ root (string): Root directory of dataset whose ``processed''
+ subdir contains torch binary files with the datasets.
+ what (string,optional): Can be 'train', 'test', 'test10k',
+ 'test50k', or 'nist' for respectively the mnist compatible
+ training set, the 60k qmnist testing set, the 10k qmnist
+ examples that match the mnist testing set, the 50k
+ remaining qmnist testing examples, or all the nist
+ digits. The default is to select 'train' or 'test'
+ according to the compatibility argument 'train'.
+ compat (bool,optional): A boolean that says whether the target
+ for each example is class number (for compatibility with
+ the MNIST dataloader) or a torch vector containing the
+ full qmnist information. Default=True.
+ download (bool, optional): If true, downloads the dataset from
+ the internet and puts it in root directory. If dataset is
+ already downloaded, it is not downloaded again.
+ transform (callable, optional): A function/transform that
+ takes in an PIL image and returns a transformed
+ version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform
+ that takes in the target and transforms it.
+ train (bool,optional,compatibility): When argument 'what' is
+ not specified, this boolean decides whether to load the
+ training set ot the testing set. Default: True.
+
+ """
+
+ subsets = {
+ 'train': 'train',
+ 'test': 'test',
+ 'test10k': 'test',
+ 'test50k': 'test',
+ 'nist': 'nist'
+ }
+ resources = {
+ 'train': [('https://raw.githubusercontent.com/facebookresearch/qmnist/master/qmnist-train-images-idx3-ubyte.gz',
+ 'ed72d4157d28c017586c42bc6afe6370'),
+ ('https://raw.githubusercontent.com/facebookresearch/qmnist/master/qmnist-train-labels-idx2-int.gz',
+ '0058f8dd561b90ffdd0f734c6a30e5e4')],
+ 'test': [('https://raw.githubusercontent.com/facebookresearch/qmnist/master/qmnist-test-images-idx3-ubyte.gz',
+ '1394631089c404de565df7b7aeaf9412'),
+ ('https://raw.githubusercontent.com/facebookresearch/qmnist/master/qmnist-test-labels-idx2-int.gz',
+ '5b5b05890a5e13444e108efe57b788aa')],
+ 'nist': [('https://raw.githubusercontent.com/facebookresearch/qmnist/master/xnist-images-idx3-ubyte.xz',
+ '7f124b3b8ab81486c9d8c2749c17f834'),
+ ('https://raw.githubusercontent.com/facebookresearch/qmnist/master/xnist-labels-idx2-int.xz',
+ '5ed0e788978e45d4a8bd4b7caec3d79d')]
+ }
+ classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four',
+ '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
+
+ def __init__(self, root, what=None, compat=True, train=True, **kwargs):
+ if what is None:
+ what = 'train' if train else 'test'
+ self.what = verify_str_arg(what, "what", tuple(self.subsets.keys()))
+ self.compat = compat
+ self.data_file = what + '.pt'
+ self.training_file = self.data_file
+ self.test_file = self.data_file
+ super(QMNIST, self).__init__(root, train, **kwargs)
+
+ def download(self):
+ """Download the QMNIST data if it doesn't exist in processed_folder already.
+ Note that we only download what has been asked for (argument 'what').
+ """
+ if self._check_exists():
+ return
+ os.makedirs(self.raw_folder, exist_ok=True)
+ os.makedirs(self.processed_folder, exist_ok=True)
+ split = self.resources[self.subsets[self.what]]
+ files = []
+
+ # download data files if not already there
+ for url, md5 in split:
+ filename = url.rpartition('/')[2]
+ file_path = os.path.join(self.raw_folder, filename)
+ if not os.path.isfile(file_path):
+ download_url(url, root=self.raw_folder, filename=filename, md5=md5)
+ files.append(file_path)
+
+ # process and save as torch files
+ print('Processing...')
+ data = read_sn3_pascalvincent_tensor(files[0])
+ assert(data.dtype == torch.uint8)
+ assert(data.ndimension() == 3)
+ targets = read_sn3_pascalvincent_tensor(files[1]).long()
+ assert(targets.ndimension() == 2)
+ if self.what == 'test10k':
+ data = data[0:10000, :, :].clone()
+ targets = targets[0:10000, :].clone()
+ if self.what == 'test50k':
+ data = data[10000:, :, :].clone()
+ targets = targets[10000:, :].clone()
+ with open(os.path.join(self.processed_folder, self.data_file), 'wb') as f:
+ torch.save((data, targets), f)
+
+ def __getitem__(self, index):
+ # redefined to handle the compat flag
+ img, target = self.data[index], self.targets[index]
+ img = Image.fromarray(img.numpy(), mode='L')
+ if self.transform is not None:
+ img = self.transform(img)
+ if self.compat:
+ target = int(target[0])
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+ return img, target
+
+ def extra_repr(self):
+ return "Split: {}".format(self.what)
+
+
+def get_int(b):
+ return int(codecs.encode(b, 'hex'), 16)
+
+
+def open_maybe_compressed_file(path):
+ """Return a file object that possibly decompresses 'path' on the fly.
+ Decompression occurs when argument `path` is a string and ends with '.gz' or '.xz'.
+ """
+ if not isinstance(path, torch._six.string_classes):
+ return path
+ if path.endswith('.gz'):
+ import gzip
+ return gzip.open(path, 'rb')
+ if path.endswith('.xz'):
+ import lzma
+ return lzma.open(path, 'rb')
+ return open(path, 'rb')
+
+
+def read_sn3_pascalvincent_tensor(path, strict=True):
+ """Read a SN3 file in "Pascal Vincent" format (Lush file 'libidx/idx-io.lsh').
+ Argument may be a filename, compressed filename, or file object.
+ """
+ # typemap
+ if not hasattr(read_sn3_pascalvincent_tensor, 'typemap'):
+ read_sn3_pascalvincent_tensor.typemap = {
+ 8: (torch.uint8, np.uint8, np.uint8),
+ 9: (torch.int8, np.int8, np.int8),
+ 11: (torch.int16, np.dtype('>i2'), 'i2'),
+ 12: (torch.int32, np.dtype('>i4'), 'i4'),
+ 13: (torch.float32, np.dtype('>f4'), 'f4'),
+ 14: (torch.float64, np.dtype('>f8'), 'f8')}
+ # read
+ with open_maybe_compressed_file(path) as f:
+ data = f.read()
+ # parse
+ magic = get_int(data[0:4])
+ nd = magic % 256
+ ty = magic // 256
+ assert nd >= 1 and nd <= 3
+ assert ty >= 8 and ty <= 14
+ m = read_sn3_pascalvincent_tensor.typemap[ty]
+ s = [get_int(data[4 * (i + 1): 4 * (i + 2)]) for i in range(nd)]
+ parsed = np.frombuffer(data, dtype=m[1], offset=(4 * (nd + 1)))
+ assert parsed.shape[0] == np.prod(s) or not strict
+ return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
+
+
+def read_label_file(path):
+ with open(path, 'rb') as f:
+ x = read_sn3_pascalvincent_tensor(f, strict=False)
+ assert(x.dtype == torch.uint8)
+ assert(x.ndimension() == 1)
+ return x.long()
+
+
+def read_image_file(path):
+ with open(path, 'rb') as f:
+ x = read_sn3_pascalvincent_tensor(f, strict=False)
+ assert(x.dtype == torch.uint8)
+ assert(x.ndimension() == 3)
+ return x
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py
new file mode 100644
index 0000000000..dd86128488
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py
@@ -0,0 +1,91 @@
+from PIL import Image
+from os.path import join
+import os
+from .vision import VisionDataset
+from .utils import download_and_extract_archive, check_integrity, list_dir, list_files
+
+
+class Omniglot(VisionDataset):
+ """`Omniglot `_ Dataset.
+ Args:
+ root (string): Root directory of dataset where directory
+ ``omniglot-py`` exists.
+ background (bool, optional): If True, creates dataset from the "background" set, otherwise
+ creates from the "evaluation" set. This terminology is defined by the authors.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ download (bool, optional): If true, downloads the dataset zip files from the internet and
+ puts it in root directory. If the zip files are already downloaded, they are not
+ downloaded again.
+ """
+ folder = 'omniglot-py'
+ download_url_prefix = 'https://github.com/brendenlake/omniglot/raw/master/python'
+ zips_md5 = {
+ 'images_background': '68d2efa1b9178cc56df9314c21c6e718',
+ 'images_evaluation': '6b91aef0f799c5bb55b94e3f2daec811'
+ }
+
+ def __init__(self, root, background=True, transform=None, target_transform=None,
+ download=False):
+ super(Omniglot, self).__init__(join(root, self.folder), transform=transform,
+ target_transform=target_transform)
+ self.background = background
+
+ if download:
+ self.download()
+
+ if not self._check_integrity():
+ raise RuntimeError('Dataset not found or corrupted.' +
+ ' You can use download=True to download it')
+
+ self.target_folder = join(self.root, self._get_target_folder())
+ self._alphabets = list_dir(self.target_folder)
+ self._characters = sum([[join(a, c) for c in list_dir(join(self.target_folder, a))]
+ for a in self._alphabets], [])
+ self._character_images = [[(image, idx) for image in list_files(join(self.target_folder, character), '.png')]
+ for idx, character in enumerate(self._characters)]
+ self._flat_character_images = sum(self._character_images, [])
+
+ def __len__(self):
+ return len(self._flat_character_images)
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (image, target) where target is index of the target character class.
+ """
+ image_name, character_class = self._flat_character_images[index]
+ image_path = join(self.target_folder, self._characters[character_class], image_name)
+ image = Image.open(image_path, mode='r').convert('L')
+
+ if self.transform:
+ image = self.transform(image)
+
+ if self.target_transform:
+ character_class = self.target_transform(character_class)
+
+ return image, character_class
+
+ def _check_integrity(self):
+ zip_filename = self._get_target_folder()
+ if not check_integrity(join(self.root, zip_filename + '.zip'), self.zips_md5[zip_filename]):
+ return False
+ return True
+
+ def download(self):
+ if self._check_integrity():
+ print('Files already downloaded and verified')
+ return
+
+ filename = self._get_target_folder()
+ zip_filename = filename + '.zip'
+ url = self.download_url_prefix + '/' + zip_filename
+ download_and_extract_archive(url, self.root, filename=zip_filename, md5=self.zips_md5[filename])
+
+ def _get_target_folder(self):
+ return 'images_background' if self.background else 'images_evaluation'
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py
new file mode 100644
index 0000000000..47591e3db8
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py
@@ -0,0 +1,209 @@
+import os
+import numpy as np
+from PIL import Image
+
+import torch
+from .vision import VisionDataset
+
+from .utils import download_url
+
+
+class PhotoTour(VisionDataset):
+ """`Learning Local Image Descriptors Data `_ Dataset.
+
+
+ Args:
+ root (string): Root directory where images are.
+ name (string): Name of the dataset to load.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+
+ """
+ urls = {
+ 'notredame_harris': [
+ 'http://matthewalunbrown.com/patchdata/notredame_harris.zip',
+ 'notredame_harris.zip',
+ '69f8c90f78e171349abdf0307afefe4d'
+ ],
+ 'yosemite_harris': [
+ 'http://matthewalunbrown.com/patchdata/yosemite_harris.zip',
+ 'yosemite_harris.zip',
+ 'a73253d1c6fbd3ba2613c45065c00d46'
+ ],
+ 'liberty_harris': [
+ 'http://matthewalunbrown.com/patchdata/liberty_harris.zip',
+ 'liberty_harris.zip',
+ 'c731fcfb3abb4091110d0ae8c7ba182c'
+ ],
+ 'notredame': [
+ 'http://icvl.ee.ic.ac.uk/vbalnt/notredame.zip',
+ 'notredame.zip',
+ '509eda8535847b8c0a90bbb210c83484'
+ ],
+ 'yosemite': [
+ 'http://icvl.ee.ic.ac.uk/vbalnt/yosemite.zip',
+ 'yosemite.zip',
+ '533b2e8eb7ede31be40abc317b2fd4f0'
+ ],
+ 'liberty': [
+ 'http://icvl.ee.ic.ac.uk/vbalnt/liberty.zip',
+ 'liberty.zip',
+ 'fdd9152f138ea5ef2091746689176414'
+ ],
+ }
+ mean = {'notredame': 0.4854, 'yosemite': 0.4844, 'liberty': 0.4437,
+ 'notredame_harris': 0.4854, 'yosemite_harris': 0.4844, 'liberty_harris': 0.4437}
+ std = {'notredame': 0.1864, 'yosemite': 0.1818, 'liberty': 0.2019,
+ 'notredame_harris': 0.1864, 'yosemite_harris': 0.1818, 'liberty_harris': 0.2019}
+ lens = {'notredame': 468159, 'yosemite': 633587, 'liberty': 450092,
+ 'liberty_harris': 379587, 'yosemite_harris': 450912, 'notredame_harris': 325295}
+ image_ext = 'bmp'
+ info_file = 'info.txt'
+ matches_files = 'm50_100000_100000_0.txt'
+
+ def __init__(self, root, name, train=True, transform=None, download=False):
+ super(PhotoTour, self).__init__(root, transform=transform)
+ self.name = name
+ self.data_dir = os.path.join(self.root, name)
+ self.data_down = os.path.join(self.root, '{}.zip'.format(name))
+ self.data_file = os.path.join(self.root, '{}.pt'.format(name))
+
+ self.train = train
+ self.mean = self.mean[name]
+ self.std = self.std[name]
+
+ if download:
+ self.download()
+
+ if not self._check_datafile_exists():
+ raise RuntimeError('Dataset not found.' +
+ ' You can use download=True to download it')
+
+ # load the serialized data
+ self.data, self.labels, self.matches = torch.load(self.data_file)
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (data1, data2, matches)
+ """
+ if self.train:
+ data = self.data[index]
+ if self.transform is not None:
+ data = self.transform(data)
+ return data
+ m = self.matches[index]
+ data1, data2 = self.data[m[0]], self.data[m[1]]
+ if self.transform is not None:
+ data1 = self.transform(data1)
+ data2 = self.transform(data2)
+ return data1, data2, m[2]
+
+ def __len__(self):
+ if self.train:
+ return self.lens[self.name]
+ return len(self.matches)
+
+ def _check_datafile_exists(self):
+ return os.path.exists(self.data_file)
+
+ def _check_downloaded(self):
+ return os.path.exists(self.data_dir)
+
+ def download(self):
+ if self._check_datafile_exists():
+ print('# Found cached data {}'.format(self.data_file))
+ return
+
+ if not self._check_downloaded():
+ # download files
+ url = self.urls[self.name][0]
+ filename = self.urls[self.name][1]
+ md5 = self.urls[self.name][2]
+ fpath = os.path.join(self.root, filename)
+
+ download_url(url, self.root, filename, md5)
+
+ print('# Extracting data {}\n'.format(self.data_down))
+
+ import zipfile
+ with zipfile.ZipFile(fpath, 'r') as z:
+ z.extractall(self.data_dir)
+
+ os.unlink(fpath)
+
+ # process and save as torch files
+ print('# Caching data {}'.format(self.data_file))
+
+ dataset = (
+ read_image_file(self.data_dir, self.image_ext, self.lens[self.name]),
+ read_info_file(self.data_dir, self.info_file),
+ read_matches_files(self.data_dir, self.matches_files)
+ )
+
+ with open(self.data_file, 'wb') as f:
+ torch.save(dataset, f)
+
+ def extra_repr(self):
+ return "Split: {}".format("Train" if self.train is True else "Test")
+
+
+def read_image_file(data_dir, image_ext, n):
+ """Return a Tensor containing the patches
+ """
+
+ def PIL2array(_img):
+ """Convert PIL image type to numpy 2D array
+ """
+ return np.array(_img.getdata(), dtype=np.uint8).reshape(64, 64)
+
+ def find_files(_data_dir, _image_ext):
+ """Return a list with the file names of the images containing the patches
+ """
+ files = []
+ # find those files with the specified extension
+ for file_dir in os.listdir(_data_dir):
+ if file_dir.endswith(_image_ext):
+ files.append(os.path.join(_data_dir, file_dir))
+ return sorted(files) # sort files in ascend order to keep relations
+
+ patches = []
+ list_files = find_files(data_dir, image_ext)
+
+ for fpath in list_files:
+ img = Image.open(fpath)
+ for y in range(0, 1024, 64):
+ for x in range(0, 1024, 64):
+ patch = img.crop((x, y, x + 64, y + 64))
+ patches.append(PIL2array(patch))
+ return torch.ByteTensor(np.array(patches[:n]))
+
+
+def read_info_file(data_dir, info_file):
+ """Return a Tensor containing the list of labels
+ Read the file and keep only the ID of the 3D point.
+ """
+ labels = []
+ with open(os.path.join(data_dir, info_file), 'r') as f:
+ labels = [int(line.split()[0]) for line in f]
+ return torch.LongTensor(labels)
+
+
+def read_matches_files(data_dir, matches_file):
+ """Return a Tensor containing the ground truth matches
+ Read the file and keep only 3D point ID.
+ Matches are represented with a 1, non matches with a 0.
+ """
+ matches = []
+ with open(os.path.join(data_dir, matches_file), 'r') as f:
+ for line in f:
+ line_split = line.split()
+ matches.append([int(line_split[0]), int(line_split[3]),
+ int(line_split[1] == line_split[4])])
+ return torch.LongTensor(matches)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/__init__.py
new file mode 100644
index 0000000000..870322d39b
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/__init__.py
@@ -0,0 +1,3 @@
+from .clip_sampler import DistributedSampler, UniformClipSampler, RandomClipSampler
+
+__all__ = ('DistributedSampler', 'UniformClipSampler', 'RandomClipSampler')
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py
new file mode 100644
index 0000000000..2432a6d20d
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py
@@ -0,0 +1,174 @@
+import math
+import torch
+from torch.utils.data import Sampler
+import torch.distributed as dist
+from torchvision.datasets.video_utils import VideoClips
+
+
+class DistributedSampler(Sampler):
+ """
+ Extension of DistributedSampler, as discussed in
+ https://github.com/pytorch/pytorch/issues/23430
+
+ Example:
+ dataset: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
+ num_replicas: 4
+ shuffle: False
+
+ when group_size = 1
+ RANK | shard_dataset
+ =========================
+ rank_0 | [0, 4, 8, 12]
+ rank_1 | [1, 5, 9, 13]
+ rank_2 | [2, 6, 10, 0]
+ rank_3 | [3, 7, 11, 1]
+
+ when group_size = 2
+
+ RANK | shard_dataset
+ =========================
+ rank_0 | [0, 1, 8, 9]
+ rank_1 | [2, 3, 10, 11]
+ rank_2 | [4, 5, 12, 13]
+ rank_3 | [6, 7, 0, 1]
+
+ """
+
+ def __init__(self, dataset, num_replicas=None, rank=None, shuffle=False, group_size=1):
+ if num_replicas is None:
+ if not dist.is_available():
+ raise RuntimeError("Requires distributed package to be available")
+ num_replicas = dist.get_world_size()
+ if rank is None:
+ if not dist.is_available():
+ raise RuntimeError("Requires distributed package to be available")
+ rank = dist.get_rank()
+ assert len(dataset) % group_size == 0, (
+ "dataset length must be a multiplier of group size"
+ "dataset length: %d, group size: %d" % (len(dataset), group_size)
+ )
+ self.dataset = dataset
+ self.group_size = group_size
+ self.num_replicas = num_replicas
+ self.rank = rank
+ self.epoch = 0
+ dataset_group_length = len(dataset) // group_size
+ self.num_group_samples = int(
+ math.ceil(dataset_group_length * 1.0 / self.num_replicas)
+ )
+ self.num_samples = self.num_group_samples * group_size
+ self.total_size = self.num_samples * self.num_replicas
+ self.shuffle = shuffle
+
+ def __iter__(self):
+ # deterministically shuffle based on epoch
+ g = torch.Generator()
+ g.manual_seed(self.epoch)
+ if self.shuffle:
+ indices = torch.randperm(len(self.dataset), generator=g).tolist()
+ else:
+ indices = list(range(len(self.dataset)))
+
+ # add extra samples to make it evenly divisible
+ indices += indices[:(self.total_size - len(indices))]
+ assert len(indices) == self.total_size
+
+ total_group_size = self.total_size // self.group_size
+ indices = torch.reshape(
+ torch.LongTensor(indices), (total_group_size, self.group_size)
+ )
+
+ # subsample
+ indices = indices[self.rank:total_group_size:self.num_replicas, :]
+ indices = torch.reshape(indices, (-1,)).tolist()
+ assert len(indices) == self.num_samples
+
+ if isinstance(self.dataset, Sampler):
+ orig_indices = list(iter(self.dataset))
+ indices = [orig_indices[i] for i in indices]
+
+ return iter(indices)
+
+ def __len__(self):
+ return self.num_samples
+
+ def set_epoch(self, epoch):
+ self.epoch = epoch
+
+
+class UniformClipSampler(Sampler):
+ """
+ Sample `num_video_clips_per_video` clips for each video, equally spaced.
+ When number of unique clips in the video is fewer than num_video_clips_per_video,
+ repeat the clips until `num_video_clips_per_video` clips are collected
+
+ Arguments:
+ video_clips (VideoClips): video clips to sample from
+ num_clips_per_video (int): number of clips to be sampled per video
+ """
+ def __init__(self, video_clips, num_clips_per_video):
+ if not isinstance(video_clips, VideoClips):
+ raise TypeError("Expected video_clips to be an instance of VideoClips, "
+ "got {}".format(type(video_clips)))
+ self.video_clips = video_clips
+ self.num_clips_per_video = num_clips_per_video
+
+ def __iter__(self):
+ idxs = []
+ s = 0
+ # select num_clips_per_video for each video, uniformly spaced
+ for c in self.video_clips.clips:
+ length = len(c)
+ if length == 0:
+ # corner case where video decoding fails
+ continue
+
+ sampled = (
+ torch.linspace(s, s + length - 1, steps=self.num_clips_per_video)
+ .floor()
+ .to(torch.int64)
+ )
+ s += length
+ idxs.append(sampled)
+ idxs = torch.cat(idxs).tolist()
+ return iter(idxs)
+
+ def __len__(self):
+ return sum(
+ self.num_clips_per_video for c in self.video_clips.clips if len(c) > 0
+ )
+
+
+class RandomClipSampler(Sampler):
+ """
+ Samples at most `max_video_clips_per_video` clips for each video randomly
+
+ Arguments:
+ video_clips (VideoClips): video clips to sample from
+ max_clips_per_video (int): maximum number of clips to be sampled per video
+ """
+ def __init__(self, video_clips, max_clips_per_video):
+ if not isinstance(video_clips, VideoClips):
+ raise TypeError("Expected video_clips to be an instance of VideoClips, "
+ "got {}".format(type(video_clips)))
+ self.video_clips = video_clips
+ self.max_clips_per_video = max_clips_per_video
+
+ def __iter__(self):
+ idxs = []
+ s = 0
+ # select at most max_clips_per_video for each video, randomly
+ for c in self.video_clips.clips:
+ length = len(c)
+ size = min(length, self.max_clips_per_video)
+ sampled = torch.randperm(length)[:size] + s
+ s += length
+ idxs.append(sampled)
+ idxs = torch.cat(idxs)
+ # shuffle all clips randomly
+ perm = torch.randperm(len(idxs))
+ idxs = idxs[perm].tolist()
+ return iter(idxs)
+
+ def __len__(self):
+ return sum(min(len(c), self.max_clips_per_video) for c in self.video_clips.clips)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py
new file mode 100644
index 0000000000..c4713f7257
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py
@@ -0,0 +1,124 @@
+import os
+import shutil
+from .vision import VisionDataset
+
+import numpy as np
+
+from PIL import Image
+from .utils import download_url, verify_str_arg
+from .voc import download_extract
+
+
+class SBDataset(VisionDataset):
+ """`Semantic Boundaries Dataset `_
+
+ The SBD currently contains annotations from 11355 images taken from the PASCAL VOC 2011 dataset.
+
+ .. note ::
+
+ Please note that the train and val splits included with this dataset are different from
+ the splits in the PASCAL VOC dataset. In particular some "train" images might be part of
+ VOC2012 val.
+ If you are interested in testing on VOC 2012 val, then use `image_set='train_noval'`,
+ which excludes all val images.
+
+ .. warning::
+
+ This class needs `scipy `_ to load target files from `.mat` format.
+
+ Args:
+ root (string): Root directory of the Semantic Boundaries Dataset
+ image_set (string, optional): Select the image_set to use, ``train``, ``val`` or ``train_noval``.
+ Image set ``train_noval`` excludes VOC 2012 val images.
+ mode (string, optional): Select target type. Possible values 'boundaries' or 'segmentation'.
+ In case of 'boundaries', the target is an array of shape `[num_classes, H, W]`,
+ where `num_classes=20`.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+ transforms (callable, optional): A function/transform that takes input sample and its target as entry
+ and returns a transformed version. Input sample is PIL image and target is a numpy array
+ if `mode='boundaries'` or PIL image if `mode='segmentation'`.
+ """
+
+ url = "http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz"
+ md5 = "82b4d87ceb2ed10f6038a1cba92111cb"
+ filename = "benchmark.tgz"
+
+ voc_train_url = "http://home.bharathh.info/pubs/codes/SBD/train_noval.txt"
+ voc_split_filename = "train_noval.txt"
+ voc_split_md5 = "79bff800c5f0b1ec6b21080a3c066722"
+
+ def __init__(self,
+ root,
+ image_set='train',
+ mode='boundaries',
+ download=False,
+ transforms=None):
+
+ try:
+ from scipy.io import loadmat
+ self._loadmat = loadmat
+ except ImportError:
+ raise RuntimeError("Scipy is not found. This dataset needs to have scipy installed: "
+ "pip install scipy")
+
+ super(SBDataset, self).__init__(root, transforms)
+ self.image_set = verify_str_arg(image_set, "image_set",
+ ("train", "val", "train_noval"))
+ self.mode = verify_str_arg(mode, "mode", ("segmentation", "boundaries"))
+ self.num_classes = 20
+
+ sbd_root = self.root
+ image_dir = os.path.join(sbd_root, 'img')
+ mask_dir = os.path.join(sbd_root, 'cls')
+
+ if download:
+ download_extract(self.url, self.root, self.filename, self.md5)
+ extracted_ds_root = os.path.join(self.root, "benchmark_RELEASE", "dataset")
+ for f in ["cls", "img", "inst", "train.txt", "val.txt"]:
+ old_path = os.path.join(extracted_ds_root, f)
+ shutil.move(old_path, sbd_root)
+ download_url(self.voc_train_url, sbd_root, self.voc_split_filename,
+ self.voc_split_md5)
+
+ if not os.path.isdir(sbd_root):
+ raise RuntimeError('Dataset not found or corrupted.' +
+ ' You can use download=True to download it')
+
+ split_f = os.path.join(sbd_root, image_set.rstrip('\n') + '.txt')
+
+ with open(os.path.join(split_f), "r") as f:
+ file_names = [x.strip() for x in f.readlines()]
+
+ self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
+ self.masks = [os.path.join(mask_dir, x + ".mat") for x in file_names]
+ assert (len(self.images) == len(self.masks))
+
+ self._get_target = self._get_segmentation_target \
+ if self.mode == "segmentation" else self._get_boundaries_target
+
+ def _get_segmentation_target(self, filepath):
+ mat = self._loadmat(filepath)
+ return Image.fromarray(mat['GTcls'][0]['Segmentation'][0])
+
+ def _get_boundaries_target(self, filepath):
+ mat = self._loadmat(filepath)
+ return np.concatenate([np.expand_dims(mat['GTcls'][0]['Boundaries'][0][i][0].toarray(), axis=0)
+ for i in range(self.num_classes)], axis=0)
+
+ def __getitem__(self, index):
+ img = Image.open(self.images[index]).convert('RGB')
+ target = self._get_target(self.masks[index])
+
+ if self.transforms is not None:
+ img, target = self.transforms(img, target)
+
+ return img, target
+
+ def __len__(self):
+ return len(self.images)
+
+ def extra_repr(self):
+ lines = ["Image set: {image_set}", "Mode: {mode}"]
+ return '\n'.join(lines).format(**self.__dict__)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py
new file mode 100644
index 0000000000..70cb68344b
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py
@@ -0,0 +1,107 @@
+from PIL import Image
+from .utils import download_url, check_integrity
+
+import os
+from .vision import VisionDataset
+
+
+class SBU(VisionDataset):
+ """`SBU Captioned Photo `_ Dataset.
+
+ Args:
+ root (string): Root directory of dataset where tarball
+ ``SBUCaptionedPhotoDataset.tar.gz`` exists.
+ transform (callable, optional): A function/transform that takes in a PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ download (bool, optional): If True, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+ """
+ url = "http://www.cs.virginia.edu/~vicente/sbucaptions/SBUCaptionedPhotoDataset.tar.gz"
+ filename = "SBUCaptionedPhotoDataset.tar.gz"
+ md5_checksum = '9aec147b3488753cf758b4d493422285'
+
+ def __init__(self, root, transform=None, target_transform=None, download=True):
+ super(SBU, self).__init__(root, transform=transform,
+ target_transform=target_transform)
+
+ if download:
+ self.download()
+
+ if not self._check_integrity():
+ raise RuntimeError('Dataset not found or corrupted.' +
+ ' You can use download=True to download it')
+
+ # Read the caption for each photo
+ self.photos = []
+ self.captions = []
+
+ file1 = os.path.join(self.root, 'dataset', 'SBU_captioned_photo_dataset_urls.txt')
+ file2 = os.path.join(self.root, 'dataset', 'SBU_captioned_photo_dataset_captions.txt')
+
+ for line1, line2 in zip(open(file1), open(file2)):
+ url = line1.rstrip()
+ photo = os.path.basename(url)
+ filename = os.path.join(self.root, 'dataset', photo)
+ if os.path.exists(filename):
+ caption = line2.rstrip()
+ self.photos.append(photo)
+ self.captions.append(caption)
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (image, target) where target is a caption for the photo.
+ """
+ filename = os.path.join(self.root, 'dataset', self.photos[index])
+ img = Image.open(filename).convert('RGB')
+ if self.transform is not None:
+ img = self.transform(img)
+
+ target = self.captions[index]
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return img, target
+
+ def __len__(self):
+ """The number of photos in the dataset."""
+ return len(self.photos)
+
+ def _check_integrity(self):
+ """Check the md5 checksum of the downloaded tarball."""
+ root = self.root
+ fpath = os.path.join(root, self.filename)
+ if not check_integrity(fpath, self.md5_checksum):
+ return False
+ return True
+
+ def download(self):
+ """Download and extract the tarball, and download each individual photo."""
+ import tarfile
+
+ if self._check_integrity():
+ print('Files already downloaded and verified')
+ return
+
+ download_url(self.url, self.root, self.filename, self.md5_checksum)
+
+ # Extract file
+ with tarfile.open(os.path.join(self.root, self.filename), 'r:gz') as tar:
+ tar.extractall(path=self.root)
+
+ # Download individual photos
+ with open(os.path.join(self.root, 'dataset', 'SBU_captioned_photo_dataset_urls.txt')) as fh:
+ for line in fh:
+ url = line.rstrip()
+ try:
+ download_url(url, os.path.join(self.root, 'dataset'))
+ except OSError:
+ # The images point to public images on Flickr.
+ # Note: Images might be removed by users at anytime.
+ pass
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py
new file mode 100644
index 0000000000..12c92c4a35
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py
@@ -0,0 +1,84 @@
+from PIL import Image
+import os
+import os.path
+import numpy as np
+from .vision import VisionDataset
+from .utils import download_url, check_integrity
+
+
+class SEMEION(VisionDataset):
+ """`SEMEION `_ Dataset.
+ Args:
+ root (string): Root directory of dataset where directory
+ ``semeion.py`` exists.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+ """
+ url = "http://archive.ics.uci.edu/ml/machine-learning-databases/semeion/semeion.data"
+ filename = "semeion.data"
+ md5_checksum = 'cb545d371d2ce14ec121470795a77432'
+
+ def __init__(self, root, transform=None, target_transform=None, download=True):
+ super(SEMEION, self).__init__(root, transform=transform,
+ target_transform=target_transform)
+
+ if download:
+ self.download()
+
+ if not self._check_integrity():
+ raise RuntimeError('Dataset not found or corrupted.' +
+ ' You can use download=True to download it')
+
+ self.data = []
+ self.labels = []
+ fp = os.path.join(self.root, self.filename)
+ data = np.loadtxt(fp)
+ # convert value to 8 bit unsigned integer
+ # color (white #255) the pixels
+ self.data = (data[:, :256] * 255).astype('uint8')
+ self.data = np.reshape(self.data, (-1, 16, 16))
+ self.labels = np.nonzero(data[:, 256:])[1]
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+ Returns:
+ tuple: (image, target) where target is index of the target class.
+ """
+ img, target = self.data[index], int(self.labels[index])
+
+ # doing this so that it is consistent with all other datasets
+ # to return a PIL Image
+ img = Image.fromarray(img, mode='L')
+
+ if self.transform is not None:
+ img = self.transform(img)
+
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return img, target
+
+ def __len__(self):
+ return len(self.data)
+
+ def _check_integrity(self):
+ root = self.root
+ fpath = os.path.join(root, self.filename)
+ if not check_integrity(fpath, self.md5_checksum):
+ return False
+ return True
+
+ def download(self):
+ if self._check_integrity():
+ print('Files already downloaded and verified')
+ return
+
+ root = self.root
+ download_url(self.url, root, self.filename, self.md5_checksum)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py
new file mode 100644
index 0000000000..6bec45afe2
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py
@@ -0,0 +1,176 @@
+from PIL import Image
+import os
+import os.path
+import numpy as np
+
+from .vision import VisionDataset
+from .utils import check_integrity, download_and_extract_archive, verify_str_arg
+
+
+class STL10(VisionDataset):
+ """`STL10 `_ Dataset.
+
+ Args:
+ root (string): Root directory of dataset where directory
+ ``stl10_binary`` exists.
+ split (string): One of {'train', 'test', 'unlabeled', 'train+unlabeled'}.
+ Accordingly dataset is selected.
+ folds (int, optional): One of {0-9} or None.
+ For training, loads one of the 10 pre-defined folds of 1k samples for the
+ standard evaluation procedure. If no value is passed, loads the 5k samples.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+
+ """
+ base_folder = 'stl10_binary'
+ url = "http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz"
+ filename = "stl10_binary.tar.gz"
+ tgz_md5 = '91f7769df0f17e558f3565bffb0c7dfb'
+ class_names_file = 'class_names.txt'
+ folds_list_file = 'fold_indices.txt'
+ train_list = [
+ ['train_X.bin', '918c2871b30a85fa023e0c44e0bee87f'],
+ ['train_y.bin', '5a34089d4802c674881badbb80307741'],
+ ['unlabeled_X.bin', '5242ba1fed5e4be9e1e742405eb56ca4']
+ ]
+
+ test_list = [
+ ['test_X.bin', '7f263ba9f9e0b06b93213547f721ac82'],
+ ['test_y.bin', '36f9794fa4beb8a2c72628de14fa638e']
+ ]
+ splits = ('train', 'train+unlabeled', 'unlabeled', 'test')
+
+ def __init__(self, root, split='train', folds=None, transform=None,
+ target_transform=None, download=False):
+ super(STL10, self).__init__(root, transform=transform,
+ target_transform=target_transform)
+ self.split = verify_str_arg(split, "split", self.splits)
+ self.folds = self._verify_folds(folds)
+
+ if download:
+ self.download()
+ elif not self._check_integrity():
+ raise RuntimeError(
+ 'Dataset not found or corrupted. '
+ 'You can use download=True to download it')
+
+ # now load the picked numpy arrays
+ if self.split == 'train':
+ self.data, self.labels = self.__loadfile(
+ self.train_list[0][0], self.train_list[1][0])
+ self.__load_folds(folds)
+
+ elif self.split == 'train+unlabeled':
+ self.data, self.labels = self.__loadfile(
+ self.train_list[0][0], self.train_list[1][0])
+ self.__load_folds(folds)
+ unlabeled_data, _ = self.__loadfile(self.train_list[2][0])
+ self.data = np.concatenate((self.data, unlabeled_data))
+ self.labels = np.concatenate(
+ (self.labels, np.asarray([-1] * unlabeled_data.shape[0])))
+
+ elif self.split == 'unlabeled':
+ self.data, _ = self.__loadfile(self.train_list[2][0])
+ self.labels = np.asarray([-1] * self.data.shape[0])
+ else: # self.split == 'test':
+ self.data, self.labels = self.__loadfile(
+ self.test_list[0][0], self.test_list[1][0])
+
+ class_file = os.path.join(
+ self.root, self.base_folder, self.class_names_file)
+ if os.path.isfile(class_file):
+ with open(class_file) as f:
+ self.classes = f.read().splitlines()
+
+ def _verify_folds(self, folds):
+ if folds is None:
+ return folds
+ elif isinstance(folds, int):
+ if folds in range(10):
+ return folds
+ msg = ("Value for argument folds should be in the range [0, 10), "
+ "but got {}.")
+ raise ValueError(msg.format(folds))
+ else:
+ msg = "Expected type None or int for argument folds, but got type {}."
+ raise ValueError(msg.format(type(folds)))
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (image, target) where target is index of the target class.
+ """
+ if self.labels is not None:
+ img, target = self.data[index], int(self.labels[index])
+ else:
+ img, target = self.data[index], None
+
+ # doing this so that it is consistent with all other datasets
+ # to return a PIL Image
+ img = Image.fromarray(np.transpose(img, (1, 2, 0)))
+
+ if self.transform is not None:
+ img = self.transform(img)
+
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return img, target
+
+ def __len__(self):
+ return self.data.shape[0]
+
+ def __loadfile(self, data_file, labels_file=None):
+ labels = None
+ if labels_file:
+ path_to_labels = os.path.join(
+ self.root, self.base_folder, labels_file)
+ with open(path_to_labels, 'rb') as f:
+ labels = np.fromfile(f, dtype=np.uint8) - 1 # 0-based
+
+ path_to_data = os.path.join(self.root, self.base_folder, data_file)
+ with open(path_to_data, 'rb') as f:
+ # read whole file in uint8 chunks
+ everything = np.fromfile(f, dtype=np.uint8)
+ images = np.reshape(everything, (-1, 3, 96, 96))
+ images = np.transpose(images, (0, 1, 3, 2))
+
+ return images, labels
+
+ def _check_integrity(self):
+ root = self.root
+ for fentry in (self.train_list + self.test_list):
+ filename, md5 = fentry[0], fentry[1]
+ fpath = os.path.join(root, self.base_folder, filename)
+ if not check_integrity(fpath, md5):
+ return False
+ return True
+
+ def download(self):
+ if self._check_integrity():
+ print('Files already downloaded and verified')
+ return
+ download_and_extract_archive(self.url, self.root, filename=self.filename, md5=self.tgz_md5)
+ self._check_integrity()
+
+ def extra_repr(self):
+ return "Split: {split}".format(**self.__dict__)
+
+ def __load_folds(self, folds):
+ # loads one of the folds if specified
+ if folds is None:
+ return
+ path_to_folds = os.path.join(
+ self.root, self.base_folder, self.folds_list_file)
+ with open(path_to_folds, 'r') as f:
+ str_idx = f.read().splitlines()[folds]
+ list_idx = np.fromstring(str_idx, dtype=np.uint8, sep=' ')
+ self.data, self.labels = self.data[list_idx, :, :, :], self.labels[list_idx]
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py
new file mode 100644
index 0000000000..d96d0f3f43
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py
@@ -0,0 +1,114 @@
+from .vision import VisionDataset
+from PIL import Image
+import os
+import os.path
+import numpy as np
+from .utils import download_url, check_integrity, verify_str_arg
+
+
+class SVHN(VisionDataset):
+ """`SVHN `_ Dataset.
+ Note: The SVHN dataset assigns the label `10` to the digit `0`. However, in this Dataset,
+ we assign the label `0` to the digit `0` to be compatible with PyTorch loss functions which
+ expect the class labels to be in the range `[0, C-1]`
+
+ .. warning::
+
+ This class needs `scipy `_ to load data from `.mat` format.
+
+ Args:
+ root (string): Root directory of dataset where directory
+ ``SVHN`` exists.
+ split (string): One of {'train', 'test', 'extra'}.
+ Accordingly dataset is selected. 'extra' is Extra training set.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+
+ """
+
+ split_list = {
+ 'train': ["http://ufldl.stanford.edu/housenumbers/train_32x32.mat",
+ "train_32x32.mat", "e26dedcc434d2e4c54c9b2d4a06d8373"],
+ 'test': ["http://ufldl.stanford.edu/housenumbers/test_32x32.mat",
+ "test_32x32.mat", "eb5a983be6a315427106f1b164d9cef3"],
+ 'extra': ["http://ufldl.stanford.edu/housenumbers/extra_32x32.mat",
+ "extra_32x32.mat", "a93ce644f1a588dc4d68dda5feec44a7"]}
+
+ def __init__(self, root, split='train', transform=None, target_transform=None,
+ download=False):
+ super(SVHN, self).__init__(root, transform=transform,
+ target_transform=target_transform)
+ self.split = verify_str_arg(split, "split", tuple(self.split_list.keys()))
+ self.url = self.split_list[split][0]
+ self.filename = self.split_list[split][1]
+ self.file_md5 = self.split_list[split][2]
+
+ if download:
+ self.download()
+
+ if not self._check_integrity():
+ raise RuntimeError('Dataset not found or corrupted.' +
+ ' You can use download=True to download it')
+
+ # import here rather than at top of file because this is
+ # an optional dependency for torchvision
+ import scipy.io as sio
+
+ # reading(loading) mat file as array
+ loaded_mat = sio.loadmat(os.path.join(self.root, self.filename))
+
+ self.data = loaded_mat['X']
+ # loading from the .mat file gives an np array of type np.uint8
+ # converting to np.int64, so that we have a LongTensor after
+ # the conversion from the numpy array
+ # the squeeze is needed to obtain a 1D tensor
+ self.labels = loaded_mat['y'].astype(np.int64).squeeze()
+
+ # the svhn dataset assigns the class label "10" to the digit 0
+ # this makes it inconsistent with several loss functions
+ # which expect the class labels to be in the range [0, C-1]
+ np.place(self.labels, self.labels == 10, 0)
+ self.data = np.transpose(self.data, (3, 2, 0, 1))
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (image, target) where target is index of the target class.
+ """
+ img, target = self.data[index], int(self.labels[index])
+
+ # doing this so that it is consistent with all other datasets
+ # to return a PIL Image
+ img = Image.fromarray(np.transpose(img, (1, 2, 0)))
+
+ if self.transform is not None:
+ img = self.transform(img)
+
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return img, target
+
+ def __len__(self):
+ return len(self.data)
+
+ def _check_integrity(self):
+ root = self.root
+ md5 = self.split_list[self.split][2]
+ fpath = os.path.join(root, self.filename)
+ return check_integrity(fpath, md5)
+
+ def download(self):
+ md5 = self.split_list[self.split][2]
+ download_url(self.url, self.root, self.filename, md5)
+
+ def extra_repr(self):
+ return "Split: {split}".format(**self.__dict__)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py
new file mode 100644
index 0000000000..43d8124bd4
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py
@@ -0,0 +1,107 @@
+import glob
+import os
+
+from .utils import list_dir
+from .folder import make_dataset
+from .video_utils import VideoClips
+from .vision import VisionDataset
+
+
+class UCF101(VisionDataset):
+ """
+ `UCF101 `_ dataset.
+
+ UCF101 is an action recognition video dataset.
+ This dataset consider every video as a collection of video clips of fixed size, specified
+ by ``frames_per_clip``, where the step in frames between each clip is given by
+ ``step_between_clips``.
+
+ To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
+ and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
+ elements will come from video 1, and the next three elements from video 2.
+ Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
+ frames in a video might be present.
+
+ Internally, it uses a VideoClips object to handle clip creation.
+
+ Args:
+ root (string): Root directory of the UCF101 Dataset.
+ annotation_path (str): path to the folder containing the split files
+ frames_per_clip (int): number of frames in a clip.
+ step_between_clips (int, optional): number of frames between each clip.
+ fold (int, optional): which fold to use. Should be between 1 and 3.
+ train (bool, optional): if ``True``, creates a dataset from the train split,
+ otherwise from the ``test`` split.
+ transform (callable, optional): A function/transform that takes in a TxHxWxC video
+ and returns a transformed version.
+
+ Returns:
+ video (Tensor[T, H, W, C]): the `T` video frames
+ audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
+ and `L` is the number of points
+ label (int): class of the video clip
+ """
+
+ def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1,
+ frame_rate=None, fold=1, train=True, transform=None,
+ _precomputed_metadata=None, num_workers=1, _video_width=0,
+ _video_height=0, _video_min_dimension=0, _audio_samples=0):
+ super(UCF101, self).__init__(root)
+ if not 1 <= fold <= 3:
+ raise ValueError("fold should be between 1 and 3, got {}".format(fold))
+
+ extensions = ('avi',)
+ self.fold = fold
+ self.train = train
+
+ classes = list(sorted(list_dir(root)))
+ class_to_idx = {classes[i]: i for i in range(len(classes))}
+ self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None)
+ self.classes = classes
+ video_list = [x[0] for x in self.samples]
+ video_clips = VideoClips(
+ video_list,
+ frames_per_clip,
+ step_between_clips,
+ frame_rate,
+ _precomputed_metadata,
+ num_workers=num_workers,
+ _video_width=_video_width,
+ _video_height=_video_height,
+ _video_min_dimension=_video_min_dimension,
+ _audio_samples=_audio_samples,
+ )
+ self.video_clips_metadata = video_clips.metadata
+ self.indices = self._select_fold(video_list, annotation_path, fold, train)
+ self.video_clips = video_clips.subset(self.indices)
+ self.transform = transform
+
+ @property
+ def metadata(self):
+ return self.video_clips_metadata
+
+ def _select_fold(self, video_list, annotation_path, fold, train):
+ name = "train" if train else "test"
+ name = "{}list{:02d}.txt".format(name, fold)
+ f = os.path.join(annotation_path, name)
+ selected_files = []
+ with open(f, "r") as fid:
+ data = fid.readlines()
+ data = [x.strip().split(" ") for x in data]
+ data = [x[0] for x in data]
+ selected_files.extend(data)
+ selected_files = set(selected_files)
+ indices = [i for i in range(len(video_list)) if video_list[i][len(self.root) + 1:] in selected_files]
+ return indices
+
+ def __len__(self):
+ return self.video_clips.num_clips()
+
+ def __getitem__(self, idx):
+ video, audio, info, video_idx = self.video_clips.get_clip(idx)
+ label = self.samples[self.indices[video_idx]][1]
+
+ if self.transform is not None:
+ video = self.transform(video)
+
+ return video, audio, label
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py
new file mode 100644
index 0000000000..06f1fd0596
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py
@@ -0,0 +1,84 @@
+from PIL import Image
+import os
+import numpy as np
+
+from .utils import download_url
+from .vision import VisionDataset
+
+
+class USPS(VisionDataset):
+ """`USPS `_ Dataset.
+ The data-format is : [label [index:value ]*256 \\n] * num_lines, where ``label`` lies in ``[1, 10]``.
+ The value for each pixel lies in ``[-1, 1]``. Here we transform the ``label`` into ``[0, 9]``
+ and make pixel values in ``[0, 255]``.
+
+ Args:
+ root (string): Root directory of dataset to store``USPS`` data files.
+ train (bool, optional): If True, creates dataset from ``usps.bz2``,
+ otherwise from ``usps.t.bz2``.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+
+ """
+ split_list = {
+ 'train': [
+ "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/usps.bz2",
+ "usps.bz2", 'ec16c51db3855ca6c91edd34d0e9b197'
+ ],
+ 'test': [
+ "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/usps.t.bz2",
+ "usps.t.bz2", '8ea070ee2aca1ac39742fdd1ef5ed118'
+ ],
+ }
+
+ def __init__(self, root, train=True, transform=None, target_transform=None,
+ download=False):
+ super(USPS, self).__init__(root, transform=transform,
+ target_transform=target_transform)
+ split = 'train' if train else 'test'
+ url, filename, checksum = self.split_list[split]
+ full_path = os.path.join(self.root, filename)
+
+ if download and not os.path.exists(full_path):
+ download_url(url, self.root, filename, md5=checksum)
+
+ import bz2
+ with bz2.open(full_path) as fp:
+ raw_data = [l.decode().split() for l in fp.readlines()]
+ imgs = [[x.split(':')[-1] for x in data[1:]] for data in raw_data]
+ imgs = np.asarray(imgs, dtype=np.float32).reshape((-1, 16, 16))
+ imgs = ((imgs + 1) / 2 * 255).astype(dtype=np.uint8)
+ targets = [int(d[0]) - 1 for d in raw_data]
+
+ self.data = imgs
+ self.targets = targets
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (image, target) where target is index of the target class.
+ """
+ img, target = self.data[index], int(self.targets[index])
+
+ # doing this so that it is consistent with all other datasets
+ # to return a PIL Image
+ img = Image.fromarray(img, mode='L')
+
+ if self.transform is not None:
+ img = self.transform(img)
+
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+
+ return img, target
+
+ def __len__(self):
+ return len(self.data)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py
new file mode 100644
index 0000000000..6689eef649
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py
@@ -0,0 +1,282 @@
+import os
+import os.path
+import hashlib
+import gzip
+import errno
+import tarfile
+import zipfile
+
+import torch
+from torch.utils.model_zoo import tqdm
+
+
+def gen_bar_updater():
+ pbar = tqdm(total=None)
+
+ def bar_update(count, block_size, total_size):
+ if pbar.total is None and total_size:
+ pbar.total = total_size
+ progress_bytes = count * block_size
+ pbar.update(progress_bytes - pbar.n)
+
+ return bar_update
+
+
+def calculate_md5(fpath, chunk_size=1024 * 1024):
+ md5 = hashlib.md5()
+ with open(fpath, 'rb') as f:
+ for chunk in iter(lambda: f.read(chunk_size), b''):
+ md5.update(chunk)
+ return md5.hexdigest()
+
+
+def check_md5(fpath, md5, **kwargs):
+ return md5 == calculate_md5(fpath, **kwargs)
+
+
+def check_integrity(fpath, md5=None):
+ if not os.path.isfile(fpath):
+ return False
+ if md5 is None:
+ return True
+ return check_md5(fpath, md5)
+
+
+def download_url(url, root, filename=None, md5=None):
+ """Download a file from a url and place it in root.
+
+ Args:
+ url (str): URL to download file from
+ root (str): Directory to place downloaded file in
+ filename (str, optional): Name to save the file under. If None, use the basename of the URL
+ md5 (str, optional): MD5 checksum of the download. If None, do not check
+ """
+ import urllib
+
+ root = os.path.expanduser(root)
+ if not filename:
+ filename = os.path.basename(url)
+ fpath = os.path.join(root, filename)
+
+ os.makedirs(root, exist_ok=True)
+
+ # check if file is already present locally
+ if check_integrity(fpath, md5):
+ print('Using downloaded and verified file: ' + fpath)
+ else: # download the file
+ try:
+ print('Downloading ' + url + ' to ' + fpath)
+ urllib.request.urlretrieve(
+ url, fpath,
+ reporthook=gen_bar_updater()
+ )
+ except (urllib.error.URLError, IOError) as e:
+ if url[:5] == 'https':
+ url = url.replace('https:', 'http:')
+ print('Failed download. Trying https -> http instead.'
+ ' Downloading ' + url + ' to ' + fpath)
+ urllib.request.urlretrieve(
+ url, fpath,
+ reporthook=gen_bar_updater()
+ )
+ else:
+ raise e
+ # check integrity of downloaded file
+ if not check_integrity(fpath, md5):
+ raise RuntimeError("File not found or corrupted.")
+
+
+def list_dir(root, prefix=False):
+ """List all directories at a given root
+
+ Args:
+ root (str): Path to directory whose folders need to be listed
+ prefix (bool, optional): If true, prepends the path to each result, otherwise
+ only returns the name of the directories found
+ """
+ root = os.path.expanduser(root)
+ directories = list(
+ filter(
+ lambda p: os.path.isdir(os.path.join(root, p)),
+ os.listdir(root)
+ )
+ )
+
+ if prefix is True:
+ directories = [os.path.join(root, d) for d in directories]
+
+ return directories
+
+
+def list_files(root, suffix, prefix=False):
+ """List all files ending with a suffix at a given root
+
+ Args:
+ root (str): Path to directory whose folders need to be listed
+ suffix (str or tuple): Suffix of the files to match, e.g. '.png' or ('.jpg', '.png').
+ It uses the Python "str.endswith" method and is passed directly
+ prefix (bool, optional): If true, prepends the path to each result, otherwise
+ only returns the name of the files found
+ """
+ root = os.path.expanduser(root)
+ files = list(
+ filter(
+ lambda p: os.path.isfile(os.path.join(root, p)) and p.endswith(suffix),
+ os.listdir(root)
+ )
+ )
+
+ if prefix is True:
+ files = [os.path.join(root, d) for d in files]
+
+ return files
+
+
+def download_file_from_google_drive(file_id, root, filename=None, md5=None):
+ """Download a Google Drive file from and place it in root.
+
+ Args:
+ file_id (str): id of file to be downloaded
+ root (str): Directory to place downloaded file in
+ filename (str, optional): Name to save the file under. If None, use the id of the file.
+ md5 (str, optional): MD5 checksum of the download. If None, do not check
+ """
+ # Based on https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url
+ import requests
+ url = "https://docs.google.com/uc?export=download"
+
+ root = os.path.expanduser(root)
+ if not filename:
+ filename = file_id
+ fpath = os.path.join(root, filename)
+
+ os.makedirs(root, exist_ok=True)
+
+ if os.path.isfile(fpath) and check_integrity(fpath, md5):
+ print('Using downloaded and verified file: ' + fpath)
+ else:
+ session = requests.Session()
+
+ response = session.get(url, params={'id': file_id}, stream=True)
+ token = _get_confirm_token(response)
+
+ if token:
+ params = {'id': file_id, 'confirm': token}
+ response = session.get(url, params=params, stream=True)
+
+ _save_response_content(response, fpath)
+
+
+def _get_confirm_token(response):
+ for key, value in response.cookies.items():
+ if key.startswith('download_warning'):
+ return value
+
+ return None
+
+
+def _save_response_content(response, destination, chunk_size=32768):
+ with open(destination, "wb") as f:
+ pbar = tqdm(total=None)
+ progress = 0
+ for chunk in response.iter_content(chunk_size):
+ if chunk: # filter out keep-alive new chunks
+ f.write(chunk)
+ progress += len(chunk)
+ pbar.update(progress - pbar.n)
+ pbar.close()
+
+
+def _is_tarxz(filename):
+ return filename.endswith(".tar.xz")
+
+
+def _is_tar(filename):
+ return filename.endswith(".tar")
+
+
+def _is_targz(filename):
+ return filename.endswith(".tar.gz")
+
+
+def _is_tgz(filename):
+ return filename.endswith(".tgz")
+
+
+def _is_gzip(filename):
+ return filename.endswith(".gz") and not filename.endswith(".tar.gz")
+
+
+def _is_zip(filename):
+ return filename.endswith(".zip")
+
+
+def extract_archive(from_path, to_path=None, remove_finished=False):
+ if to_path is None:
+ to_path = os.path.dirname(from_path)
+
+ if _is_tar(from_path):
+ with tarfile.open(from_path, 'r') as tar:
+ tar.extractall(path=to_path)
+ elif _is_targz(from_path) or _is_tgz(from_path):
+ with tarfile.open(from_path, 'r:gz') as tar:
+ tar.extractall(path=to_path)
+ elif _is_tarxz(from_path):
+ with tarfile.open(from_path, 'r:xz') as tar:
+ tar.extractall(path=to_path)
+ elif _is_gzip(from_path):
+ to_path = os.path.join(to_path, os.path.splitext(os.path.basename(from_path))[0])
+ with open(to_path, "wb") as out_f, gzip.GzipFile(from_path) as zip_f:
+ out_f.write(zip_f.read())
+ elif _is_zip(from_path):
+ with zipfile.ZipFile(from_path, 'r') as z:
+ z.extractall(to_path)
+ else:
+ raise ValueError("Extraction of {} not supported".format(from_path))
+
+ if remove_finished:
+ os.remove(from_path)
+
+
+def download_and_extract_archive(url, download_root, extract_root=None, filename=None,
+ md5=None, remove_finished=False):
+ download_root = os.path.expanduser(download_root)
+ if extract_root is None:
+ extract_root = download_root
+ if not filename:
+ filename = os.path.basename(url)
+
+ download_url(url, download_root, filename, md5)
+
+ archive = os.path.join(download_root, filename)
+ print("Extracting {} to {}".format(archive, extract_root))
+ extract_archive(archive, extract_root, remove_finished)
+
+
+def iterable_to_str(iterable):
+ return "'" + "', '".join([str(item) for item in iterable]) + "'"
+
+
+def verify_str_arg(value, arg=None, valid_values=None, custom_msg=None):
+ if not isinstance(value, torch._six.string_classes):
+ if arg is None:
+ msg = "Expected type str, but got type {type}."
+ else:
+ msg = "Expected type str for argument {arg}, but got type {type}."
+ msg = msg.format(type=type(value), arg=arg)
+ raise ValueError(msg)
+
+ if valid_values is None:
+ return value
+
+ if value not in valid_values:
+ if custom_msg is not None:
+ msg = custom_msg
+ else:
+ msg = ("Unknown value '{value}' for argument {arg}. "
+ "Valid values are {{{valid_values}}}.")
+ msg = msg.format(value=value, arg=arg,
+ valid_values=iterable_to_str(valid_values))
+ raise ValueError(msg)
+
+ return value
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py
new file mode 100644
index 0000000000..5c9244e545
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py
@@ -0,0 +1,367 @@
+import bisect
+import math
+from fractions import Fraction
+
+import torch
+from torchvision.io import (
+ _probe_video_from_file,
+ _read_video_from_file,
+ _read_video_timestamps_from_file,
+ read_video,
+ read_video_timestamps,
+)
+
+from .utils import tqdm
+
+
+def pts_convert(pts, timebase_from, timebase_to, round_func=math.floor):
+ """convert pts between different time bases
+ Args:
+ pts: presentation timestamp, float
+ timebase_from: original timebase. Fraction
+ timebase_to: new timebase. Fraction
+ round_func: rounding function.
+ """
+ new_pts = Fraction(pts, 1) * timebase_from / timebase_to
+ return round_func(new_pts)
+
+
+def unfold(tensor, size, step, dilation=1):
+ """
+ similar to tensor.unfold, but with the dilation
+ and specialized for 1d tensors
+
+ Returns all consecutive windows of `size` elements, with
+ `step` between windows. The distance between each element
+ in a window is given by `dilation`.
+ """
+ assert tensor.dim() == 1
+ o_stride = tensor.stride(0)
+ numel = tensor.numel()
+ new_stride = (step * o_stride, dilation * o_stride)
+ new_size = ((numel - (dilation * (size - 1) + 1)) // step + 1, size)
+ if new_size[0] < 1:
+ new_size = (0, size)
+ return torch.as_strided(tensor, new_size, new_stride)
+
+
+class _DummyDataset(object):
+ """
+ Dummy dataset used for DataLoader in VideoClips.
+ Defined at top level so it can be pickled when forking.
+ """
+
+ def __init__(self, x):
+ self.x = x
+
+ def __len__(self):
+ return len(self.x)
+
+ def __getitem__(self, idx):
+ return read_video_timestamps(self.x[idx])
+
+
+class VideoClips(object):
+ """
+ Given a list of video files, computes all consecutive subvideos of size
+ `clip_length_in_frames`, where the distance between each subvideo in the
+ same video is defined by `frames_between_clips`.
+ If `frame_rate` is specified, it will also resample all the videos to have
+ the same frame rate, and the clips will refer to this frame rate.
+
+ Creating this instance the first time is time-consuming, as it needs to
+ decode all the videos in `video_paths`. It is recommended that you
+ cache the results after instantiation of the class.
+
+ Recreating the clips for different clip lengths is fast, and can be done
+ with the `compute_clips` method.
+
+ Arguments:
+ video_paths (List[str]): paths to the video files
+ clip_length_in_frames (int): size of a clip in number of frames
+ frames_between_clips (int): step (in frames) between each clip
+ frame_rate (int, optional): if specified, it will resample the video
+ so that it has `frame_rate`, and then the clips will be defined
+ on the resampled video
+ num_workers (int): how many subprocesses to use for data loading.
+ 0 means that the data will be loaded in the main process. (default: 0)
+ """
+
+ def __init__(
+ self,
+ video_paths,
+ clip_length_in_frames=16,
+ frames_between_clips=1,
+ frame_rate=None,
+ _precomputed_metadata=None,
+ num_workers=0,
+ _video_width=0,
+ _video_height=0,
+ _video_min_dimension=0,
+ _video_max_dimension=0,
+ _audio_samples=0,
+ _audio_channels=0,
+ ):
+
+ self.video_paths = video_paths
+ self.num_workers = num_workers
+
+ # these options are not valid for pyav backend
+ self._video_width = _video_width
+ self._video_height = _video_height
+ self._video_min_dimension = _video_min_dimension
+ self._video_max_dimension = _video_max_dimension
+ self._audio_samples = _audio_samples
+ self._audio_channels = _audio_channels
+
+ if _precomputed_metadata is None:
+ self._compute_frame_pts()
+ else:
+ self._init_from_metadata(_precomputed_metadata)
+ self.compute_clips(clip_length_in_frames, frames_between_clips, frame_rate)
+
+ def _collate_fn(self, x):
+ return x
+
+ def _compute_frame_pts(self):
+ self.video_pts = []
+ self.video_fps = []
+
+ # strategy: use a DataLoader to parallelize read_video_timestamps
+ # so need to create a dummy dataset first
+ import torch.utils.data
+
+ dl = torch.utils.data.DataLoader(
+ _DummyDataset(self.video_paths),
+ batch_size=16,
+ num_workers=self.num_workers,
+ collate_fn=self._collate_fn,
+ )
+
+ with tqdm(total=len(dl)) as pbar:
+ for batch in dl:
+ pbar.update(1)
+ clips, fps = list(zip(*batch))
+ clips = [torch.as_tensor(c) for c in clips]
+ self.video_pts.extend(clips)
+ self.video_fps.extend(fps)
+
+ def _init_from_metadata(self, metadata):
+ self.video_paths = metadata["video_paths"]
+ assert len(self.video_paths) == len(metadata["video_pts"])
+ self.video_pts = metadata["video_pts"]
+ assert len(self.video_paths) == len(metadata["video_fps"])
+ self.video_fps = metadata["video_fps"]
+
+ @property
+ def metadata(self):
+ _metadata = {
+ "video_paths": self.video_paths,
+ "video_pts": self.video_pts,
+ "video_fps": self.video_fps,
+ }
+ return _metadata
+
+ def subset(self, indices):
+ video_paths = [self.video_paths[i] for i in indices]
+ video_pts = [self.video_pts[i] for i in indices]
+ video_fps = [self.video_fps[i] for i in indices]
+ metadata = {
+ "video_paths": video_paths,
+ "video_pts": video_pts,
+ "video_fps": video_fps,
+ }
+ return type(self)(
+ video_paths,
+ self.num_frames,
+ self.step,
+ self.frame_rate,
+ _precomputed_metadata=metadata,
+ num_workers=self.num_workers,
+ _video_width=self._video_width,
+ _video_height=self._video_height,
+ _video_min_dimension=self._video_min_dimension,
+ _video_max_dimension=self._video_max_dimension,
+ _audio_samples=self._audio_samples,
+ _audio_channels=self._audio_channels,
+ )
+
+ @staticmethod
+ def compute_clips_for_video(video_pts, num_frames, step, fps, frame_rate):
+ if fps is None:
+ # if for some reason the video doesn't have fps (because doesn't have a video stream)
+ # set the fps to 1. The value doesn't matter, because video_pts is empty anyway
+ fps = 1
+ if frame_rate is None:
+ frame_rate = fps
+ total_frames = len(video_pts) * (float(frame_rate) / fps)
+ idxs = VideoClips._resample_video_idx(
+ int(math.floor(total_frames)), fps, frame_rate
+ )
+ video_pts = video_pts[idxs]
+ clips = unfold(video_pts, num_frames, step)
+ if isinstance(idxs, slice):
+ idxs = [idxs] * len(clips)
+ else:
+ idxs = unfold(idxs, num_frames, step)
+ return clips, idxs
+
+ def compute_clips(self, num_frames, step, frame_rate=None):
+ """
+ Compute all consecutive sequences of clips from video_pts.
+ Always returns clips of size `num_frames`, meaning that the
+ last few frames in a video can potentially be dropped.
+
+ Arguments:
+ num_frames (int): number of frames for the clip
+ step (int): distance between two clips
+ """
+ self.num_frames = num_frames
+ self.step = step
+ self.frame_rate = frame_rate
+ self.clips = []
+ self.resampling_idxs = []
+ for video_pts, fps in zip(self.video_pts, self.video_fps):
+ clips, idxs = self.compute_clips_for_video(
+ video_pts, num_frames, step, fps, frame_rate
+ )
+ self.clips.append(clips)
+ self.resampling_idxs.append(idxs)
+ clip_lengths = torch.as_tensor([len(v) for v in self.clips])
+ self.cumulative_sizes = clip_lengths.cumsum(0).tolist()
+
+ def __len__(self):
+ return self.num_clips()
+
+ def num_videos(self):
+ return len(self.video_paths)
+
+ def num_clips(self):
+ """
+ Number of subclips that are available in the video list.
+ """
+ return self.cumulative_sizes[-1]
+
+ def get_clip_location(self, idx):
+ """
+ Converts a flattened representation of the indices into a video_idx, clip_idx
+ representation.
+ """
+ video_idx = bisect.bisect_right(self.cumulative_sizes, idx)
+ if video_idx == 0:
+ clip_idx = idx
+ else:
+ clip_idx = idx - self.cumulative_sizes[video_idx - 1]
+ return video_idx, clip_idx
+
+ @staticmethod
+ def _resample_video_idx(num_frames, original_fps, new_fps):
+ step = float(original_fps) / new_fps
+ if step.is_integer():
+ # optimization: if step is integer, don't need to perform
+ # advanced indexing
+ step = int(step)
+ return slice(None, None, step)
+ idxs = torch.arange(num_frames, dtype=torch.float32) * step
+ idxs = idxs.floor().to(torch.int64)
+ return idxs
+
+ def get_clip(self, idx):
+ """
+ Gets a subclip from a list of videos.
+
+ Arguments:
+ idx (int): index of the subclip. Must be between 0 and num_clips().
+
+ Returns:
+ video (Tensor)
+ audio (Tensor)
+ info (Dict)
+ video_idx (int): index of the video in `video_paths`
+ """
+ if idx >= self.num_clips():
+ raise IndexError(
+ "Index {} out of range "
+ "({} number of clips)".format(idx, self.num_clips())
+ )
+ video_idx, clip_idx = self.get_clip_location(idx)
+ video_path = self.video_paths[video_idx]
+ clip_pts = self.clips[video_idx][clip_idx]
+
+ from torchvision import get_video_backend
+
+ backend = get_video_backend()
+
+ if backend == "pyav":
+ # check for invalid options
+ if self._video_width != 0:
+ raise ValueError("pyav backend doesn't support _video_width != 0")
+ if self._video_height != 0:
+ raise ValueError("pyav backend doesn't support _video_height != 0")
+ if self._video_min_dimension != 0:
+ raise ValueError(
+ "pyav backend doesn't support _video_min_dimension != 0"
+ )
+ if self._video_max_dimension != 0:
+ raise ValueError(
+ "pyav backend doesn't support _video_max_dimension != 0"
+ )
+ if self._audio_samples != 0:
+ raise ValueError("pyav backend doesn't support _audio_samples != 0")
+
+ if backend == "pyav":
+ start_pts = clip_pts[0].item()
+ end_pts = clip_pts[-1].item()
+ video, audio, info = read_video(video_path, start_pts, end_pts)
+ else:
+ info = _probe_video_from_file(video_path)
+ video_fps = info.video_fps
+ audio_fps = None
+
+ video_start_pts = clip_pts[0].item()
+ video_end_pts = clip_pts[-1].item()
+
+ audio_start_pts, audio_end_pts = 0, -1
+ audio_timebase = Fraction(0, 1)
+ video_timebase = Fraction(
+ info.video_timebase.numerator, info.video_timebase.denominator
+ )
+ if info.has_audio:
+ audio_timebase = Fraction(
+ info.audio_timebase.numerator, info.audio_timebase.denominator
+ )
+ audio_start_pts = pts_convert(
+ video_start_pts, video_timebase, audio_timebase, math.floor
+ )
+ audio_end_pts = pts_convert(
+ video_end_pts, video_timebase, audio_timebase, math.ceil
+ )
+ audio_fps = info.audio_sample_rate
+ video, audio, info = _read_video_from_file(
+ video_path,
+ video_width=self._video_width,
+ video_height=self._video_height,
+ video_min_dimension=self._video_min_dimension,
+ video_max_dimension=self._video_max_dimension,
+ video_pts_range=(video_start_pts, video_end_pts),
+ video_timebase=video_timebase,
+ audio_samples=self._audio_samples,
+ audio_channels=self._audio_channels,
+ audio_pts_range=(audio_start_pts, audio_end_pts),
+ audio_timebase=audio_timebase,
+ )
+
+ info = {"video_fps": video_fps}
+ if audio_fps is not None:
+ info["audio_fps"] = audio_fps
+
+ if self.frame_rate is not None:
+ resampling_idx = self.resampling_idxs[video_idx][clip_idx]
+ if isinstance(resampling_idx, torch.Tensor):
+ resampling_idx = resampling_idx - resampling_idx[0]
+ video = video[resampling_idx]
+ info["video_fps"] = self.frame_rate
+ assert len(video) == self.num_frames, "{} x {}".format(
+ video.shape, self.num_frames
+ )
+ return video, audio, info, video_idx
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py
new file mode 100644
index 0000000000..7ee5a84dfc
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py
@@ -0,0 +1,80 @@
+import os
+import torch
+import torch.utils.data as data
+
+
+class VisionDataset(data.Dataset):
+ _repr_indent = 4
+
+ def __init__(self, root, transforms=None, transform=None, target_transform=None):
+ if isinstance(root, torch._six.string_classes):
+ root = os.path.expanduser(root)
+ self.root = root
+
+ has_transforms = transforms is not None
+ has_separate_transform = transform is not None or target_transform is not None
+ if has_transforms and has_separate_transform:
+ raise ValueError("Only transforms or transform/target_transform can "
+ "be passed as argument")
+
+ # for backwards-compatibility
+ self.transform = transform
+ self.target_transform = target_transform
+
+ if has_separate_transform:
+ transforms = StandardTransform(transform, target_transform)
+ self.transforms = transforms
+
+ def __getitem__(self, index):
+ raise NotImplementedError
+
+ def __len__(self):
+ raise NotImplementedError
+
+ def __repr__(self):
+ head = "Dataset " + self.__class__.__name__
+ body = ["Number of datapoints: {}".format(self.__len__())]
+ if self.root is not None:
+ body.append("Root location: {}".format(self.root))
+ body += self.extra_repr().splitlines()
+ if hasattr(self, "transforms") and self.transforms is not None:
+ body += [repr(self.transforms)]
+ lines = [head] + [" " * self._repr_indent + line for line in body]
+ return '\n'.join(lines)
+
+ def _format_transform_repr(self, transform, head):
+ lines = transform.__repr__().splitlines()
+ return (["{}{}".format(head, lines[0])] +
+ ["{}{}".format(" " * len(head), line) for line in lines[1:]])
+
+ def extra_repr(self):
+ return ""
+
+
+class StandardTransform(object):
+ def __init__(self, transform=None, target_transform=None):
+ self.transform = transform
+ self.target_transform = target_transform
+
+ def __call__(self, input, target):
+ if self.transform is not None:
+ input = self.transform(input)
+ if self.target_transform is not None:
+ target = self.target_transform(target)
+ return input, target
+
+ def _format_transform_repr(self, transform, head):
+ lines = transform.__repr__().splitlines()
+ return (["{}{}".format(head, lines[0])] +
+ ["{}{}".format(" " * len(head), line) for line in lines[1:]])
+
+ def __repr__(self):
+ body = [self.__class__.__name__]
+ if self.transform is not None:
+ body += self._format_transform_repr(self.transform,
+ "Transform: ")
+ if self.target_transform is not None:
+ body += self._format_transform_repr(self.target_transform,
+ "Target transform: ")
+
+ return '\n'.join(body)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py
new file mode 100644
index 0000000000..2be53c4fcc
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py
@@ -0,0 +1,242 @@
+import os
+import tarfile
+import collections
+from .vision import VisionDataset
+import xml.etree.ElementTree as ET
+from PIL import Image
+from .utils import download_url, check_integrity, verify_str_arg
+
+DATASET_YEAR_DICT = {
+ '2012': {
+ 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
+ 'filename': 'VOCtrainval_11-May-2012.tar',
+ 'md5': '6cd6e144f989b92b3379bac3b3de84fd',
+ 'base_dir': os.path.join('VOCdevkit', 'VOC2012')
+ },
+ '2011': {
+ 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2011/VOCtrainval_25-May-2011.tar',
+ 'filename': 'VOCtrainval_25-May-2011.tar',
+ 'md5': '6c3384ef61512963050cb5d687e5bf1e',
+ 'base_dir': os.path.join('TrainVal', 'VOCdevkit', 'VOC2011')
+ },
+ '2010': {
+ 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar',
+ 'filename': 'VOCtrainval_03-May-2010.tar',
+ 'md5': 'da459979d0c395079b5c75ee67908abb',
+ 'base_dir': os.path.join('VOCdevkit', 'VOC2010')
+ },
+ '2009': {
+ 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2009/VOCtrainval_11-May-2009.tar',
+ 'filename': 'VOCtrainval_11-May-2009.tar',
+ 'md5': '59065e4b188729180974ef6572f6a212',
+ 'base_dir': os.path.join('VOCdevkit', 'VOC2009')
+ },
+ '2008': {
+ 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2008/VOCtrainval_14-Jul-2008.tar',
+ 'filename': 'VOCtrainval_11-May-2012.tar',
+ 'md5': '2629fa636546599198acfcfbfcf1904a',
+ 'base_dir': os.path.join('VOCdevkit', 'VOC2008')
+ },
+ '2007': {
+ 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',
+ 'filename': 'VOCtrainval_06-Nov-2007.tar',
+ 'md5': 'c52e279531787c972589f7e41ab4ae64',
+ 'base_dir': os.path.join('VOCdevkit', 'VOC2007')
+ },
+ '2007-test': {
+ 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
+ 'filename': 'VOCtest_06-Nov-2007.tar',
+ 'md5': 'b6e924de25625d8de591ea690078ad9f',
+ 'base_dir': os.path.join('VOCdevkit', 'VOC2007')
+ }
+}
+
+
+class VOCSegmentation(VisionDataset):
+ """`Pascal VOC `_ Segmentation Dataset.
+
+ Args:
+ root (string): Root directory of the VOC Dataset.
+ year (string, optional): The dataset year, supports years 2007 to 2012.
+ image_set (string, optional): Select the image_set to use, ``train``, ``trainval`` or ``val``
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, optional): A function/transform that takes in the
+ target and transforms it.
+ transforms (callable, optional): A function/transform that takes input sample and its target as entry
+ and returns a transformed version.
+ """
+
+ def __init__(self,
+ root,
+ year='2012',
+ image_set='train',
+ download=False,
+ transform=None,
+ target_transform=None,
+ transforms=None):
+ super(VOCSegmentation, self).__init__(root, transforms, transform, target_transform)
+ self.year = year
+ if year == "2007" and image_set == "test":
+ year = "2007-test"
+ self.url = DATASET_YEAR_DICT[year]['url']
+ self.filename = DATASET_YEAR_DICT[year]['filename']
+ self.md5 = DATASET_YEAR_DICT[year]['md5']
+ valid_sets = ["train", "trainval", "val"]
+ if year == "2007-test":
+ valid_sets.append("test")
+ self.image_set = verify_str_arg(image_set, "image_set", valid_sets)
+ base_dir = DATASET_YEAR_DICT[year]['base_dir']
+ voc_root = os.path.join(self.root, base_dir)
+ image_dir = os.path.join(voc_root, 'JPEGImages')
+ mask_dir = os.path.join(voc_root, 'SegmentationClass')
+
+ if download:
+ download_extract(self.url, self.root, self.filename, self.md5)
+
+ if not os.path.isdir(voc_root):
+ raise RuntimeError('Dataset not found or corrupted.' +
+ ' You can use download=True to download it')
+
+ splits_dir = os.path.join(voc_root, 'ImageSets/Segmentation')
+
+ split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')
+
+ with open(os.path.join(split_f), "r") as f:
+ file_names = [x.strip() for x in f.readlines()]
+
+ self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
+ self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names]
+ assert (len(self.images) == len(self.masks))
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (image, target) where target is the image segmentation.
+ """
+ img = Image.open(self.images[index]).convert('RGB')
+ target = Image.open(self.masks[index])
+
+ if self.transforms is not None:
+ img, target = self.transforms(img, target)
+
+ return img, target
+
+ def __len__(self):
+ return len(self.images)
+
+
+class VOCDetection(VisionDataset):
+ """`Pascal VOC `_ Detection Dataset.
+
+ Args:
+ root (string): Root directory of the VOC Dataset.
+ year (string, optional): The dataset year, supports years 2007 to 2012.
+ image_set (string, optional): Select the image_set to use, ``train``, ``trainval`` or ``val``
+ download (bool, optional): If true, downloads the dataset from the internet and
+ puts it in root directory. If dataset is already downloaded, it is not
+ downloaded again.
+ (default: alphabetic indexing of VOC's 20 classes).
+ transform (callable, optional): A function/transform that takes in an PIL image
+ and returns a transformed version. E.g, ``transforms.RandomCrop``
+ target_transform (callable, required): A function/transform that takes in the
+ target and transforms it.
+ transforms (callable, optional): A function/transform that takes input sample and its target as entry
+ and returns a transformed version.
+ """
+
+ def __init__(self,
+ root,
+ year='2012',
+ image_set='train',
+ download=False,
+ transform=None,
+ target_transform=None,
+ transforms=None):
+ super(VOCDetection, self).__init__(root, transforms, transform, target_transform)
+ self.year = year
+ if year == "2007" and image_set == "test":
+ year = "2007-test"
+ self.url = DATASET_YEAR_DICT[year]['url']
+ self.filename = DATASET_YEAR_DICT[year]['filename']
+ self.md5 = DATASET_YEAR_DICT[year]['md5']
+ valid_sets = ["train", "trainval", "val"]
+ if year == "2007-test":
+ valid_sets.append("test")
+ self.image_set = verify_str_arg(image_set, "image_set", valid_sets)
+
+ base_dir = DATASET_YEAR_DICT[year]['base_dir']
+ voc_root = os.path.join(self.root, base_dir)
+ image_dir = os.path.join(voc_root, 'JPEGImages')
+ annotation_dir = os.path.join(voc_root, 'Annotations')
+
+ if download:
+ download_extract(self.url, self.root, self.filename, self.md5)
+
+ if not os.path.isdir(voc_root):
+ raise RuntimeError('Dataset not found or corrupted.' +
+ ' You can use download=True to download it')
+
+ splits_dir = os.path.join(voc_root, 'ImageSets/Main')
+
+ split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')
+
+ with open(os.path.join(split_f), "r") as f:
+ file_names = [x.strip() for x in f.readlines()]
+
+ self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
+ self.annotations = [os.path.join(annotation_dir, x + ".xml") for x in file_names]
+ assert (len(self.images) == len(self.annotations))
+
+ def __getitem__(self, index):
+ """
+ Args:
+ index (int): Index
+
+ Returns:
+ tuple: (image, target) where target is a dictionary of the XML tree.
+ """
+ img = Image.open(self.images[index]).convert('RGB')
+ target = self.parse_voc_xml(
+ ET.parse(self.annotations[index]).getroot())
+
+ if self.transforms is not None:
+ img, target = self.transforms(img, target)
+
+ return img, target
+
+ def __len__(self):
+ return len(self.images)
+
+ def parse_voc_xml(self, node):
+ voc_dict = {}
+ children = list(node)
+ if children:
+ def_dic = collections.defaultdict(list)
+ for dc in map(self.parse_voc_xml, children):
+ for ind, v in dc.items():
+ def_dic[ind].append(v)
+ if node.tag == 'annotation':
+ def_dic['object'] = [def_dic['object']]
+ voc_dict = {
+ node.tag:
+ {ind: v[0] if len(v) == 1 else v
+ for ind, v in def_dic.items()}
+ }
+ if node.text:
+ text = node.text.strip()
+ if not children:
+ voc_dict[node.tag] = text
+ return voc_dict
+
+
+def download_extract(url, root, filename, md5):
+ download_url(url, root, filename, md5)
+ with tarfile.open(os.path.join(root, filename), "r") as tar:
+ tar.extractall(path=root)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py
new file mode 100644
index 0000000000..db3356aa67
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py
@@ -0,0 +1,58 @@
+_HAS_OPS = False
+
+
+def _register_extensions():
+ import os
+ import importlib
+ import torch
+
+ # load the custom_op_library and register the custom ops
+ lib_dir = os.path.dirname(__file__)
+ loader_details = (
+ importlib.machinery.ExtensionFileLoader,
+ importlib.machinery.EXTENSION_SUFFIXES
+ )
+
+ extfinder = importlib.machinery.FileFinder(lib_dir, loader_details)
+ ext_specs = extfinder.find_spec("_C")
+ if ext_specs is None:
+ raise ImportError
+ torch.ops.load_library(ext_specs.origin)
+
+
+try:
+ _register_extensions()
+ _HAS_OPS = True
+except (ImportError, OSError):
+ pass
+
+
+def _check_cuda_version():
+ """
+ Make sure that CUDA versions match between the pytorch install and torchvision install
+ """
+ if not _HAS_OPS:
+ return -1
+ import torch
+ _version = torch.ops.torchvision._cuda_version()
+ if _version != -1 and torch.version.cuda is not None:
+ tv_version = str(_version)
+ if int(tv_version) < 10000:
+ tv_major = int(tv_version[0])
+ tv_minor = int(tv_version[2])
+ else:
+ tv_major = int(tv_version[0:2])
+ tv_minor = int(tv_version[3])
+ t_version = torch.version.cuda
+ t_version = t_version.split('.')
+ t_major = int(t_version[0])
+ t_minor = int(t_version[1])
+ if t_major != tv_major or t_minor != tv_minor:
+ raise RuntimeError("Detected that PyTorch and torchvision were compiled with different CUDA versions. "
+ "PyTorch has CUDA Version={}.{} and torchvision has CUDA Version={}.{}. "
+ "Please reinstall the torchvision that matches your PyTorch install."
+ .format(t_major, t_minor, tv_major, tv_minor))
+ return _version
+
+
+_check_cuda_version()
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/__init__.py
new file mode 100644
index 0000000000..cbbf560412
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/__init__.py
@@ -0,0 +1,34 @@
+from ._video_opt import (
+ Timebase,
+ VideoMetaData,
+ _HAS_VIDEO_OPT,
+ _probe_video_from_file,
+ _probe_video_from_memory,
+ _read_video_from_file,
+ _read_video_from_memory,
+ _read_video_timestamps_from_file,
+ _read_video_timestamps_from_memory,
+)
+from .video import (
+ read_video,
+ read_video_timestamps,
+ write_video,
+)
+
+
+__all__ = [
+ "write_video",
+ "read_video",
+ "read_video_timestamps",
+ "_read_video_from_file",
+ "_read_video_timestamps_from_file",
+ "_probe_video_from_file",
+ "_read_video_from_memory",
+ "_read_video_timestamps_from_memory",
+ "_probe_video_from_memory",
+ "_HAS_VIDEO_OPT",
+ "_read_video_clip_from_memory",
+ "_read_video_meta_data",
+ "VideoMetaData",
+ "Timebase",
+]
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py
new file mode 100644
index 0000000000..da37c66cfa
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py
@@ -0,0 +1,551 @@
+
+import importlib
+import math
+import os
+import warnings
+from fractions import Fraction
+from typing import List, Tuple
+
+import numpy as np
+import torch
+
+
+_HAS_VIDEO_OPT = False
+
+try:
+ lib_dir = os.path.join(os.path.dirname(__file__), "..")
+
+ loader_details = (
+ importlib.machinery.ExtensionFileLoader,
+ importlib.machinery.EXTENSION_SUFFIXES
+ )
+
+ extfinder = importlib.machinery.FileFinder(lib_dir, loader_details)
+ ext_specs = extfinder.find_spec("video_reader")
+ if ext_specs is not None:
+ torch.ops.load_library(ext_specs.origin)
+ _HAS_VIDEO_OPT = True
+except (ImportError, OSError):
+ pass
+
+
+default_timebase = Fraction(0, 1)
+
+
+# simple class for torch scripting
+# the complex Fraction class from fractions module is not scriptable
+@torch.jit.script
+class Timebase(object):
+ __annotations__ = {"numerator": int, "denominator": int}
+ __slots__ = ["numerator", "denominator"]
+
+ def __init__(
+ self,
+ numerator, # type: int
+ denominator, # type: int
+ ):
+ # type: (...) -> None
+ self.numerator = numerator
+ self.denominator = denominator
+
+
+@torch.jit.script
+class VideoMetaData(object):
+ __annotations__ = {
+ "has_video": bool,
+ "video_timebase": Timebase,
+ "video_duration": float,
+ "video_fps": float,
+ "has_audio": bool,
+ "audio_timebase": Timebase,
+ "audio_duration": float,
+ "audio_sample_rate": float,
+ }
+ __slots__ = [
+ "has_video",
+ "video_timebase",
+ "video_duration",
+ "video_fps",
+ "has_audio",
+ "audio_timebase",
+ "audio_duration",
+ "audio_sample_rate",
+ ]
+
+ def __init__(self):
+ self.has_video = False
+ self.video_timebase = Timebase(0, 1)
+ self.video_duration = 0.0
+ self.video_fps = 0.0
+ self.has_audio = False
+ self.audio_timebase = Timebase(0, 1)
+ self.audio_duration = 0.0
+ self.audio_sample_rate = 0.0
+
+
+def _validate_pts(pts_range):
+ # type: (List[int])
+ if pts_range[1] > 0:
+ assert (
+ pts_range[0] <= pts_range[1]
+ ), """Start pts should not be smaller than end pts, got
+ start pts: %d and end pts: %d""" % (
+ pts_range[0],
+ pts_range[1],
+ )
+
+
+def _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration):
+ # type: (torch.Tensor,torch.Tensor,torch.Tensor,torch.Tensor,torch.Tensor,torch.Tensor) -> VideoMetaData
+ """
+ Build update VideoMetaData struct with info about the video
+ """
+ meta = VideoMetaData()
+ if vtimebase.numel() > 0:
+ meta.video_timebase = Timebase(
+ int(vtimebase[0].item()), int(vtimebase[1].item())
+ )
+ timebase = vtimebase[0].item() / float(vtimebase[1].item())
+ if vduration.numel() > 0:
+ meta.has_video = True
+ meta.video_duration = float(vduration.item()) * timebase
+ if vfps.numel() > 0:
+ meta.video_fps = float(vfps.item())
+ if atimebase.numel() > 0:
+ meta.audio_timebase = Timebase(
+ int(atimebase[0].item()), int(atimebase[1].item())
+ )
+ timebase = atimebase[0].item() / float(atimebase[1].item())
+ if aduration.numel() > 0:
+ meta.has_audio = True
+ meta.audio_duration = float(aduration.item()) * timebase
+ if asample_rate.numel() > 0:
+ meta.audio_sample_rate = float(asample_rate.item())
+
+ return meta
+
+
+def _align_audio_frames(aframes, aframe_pts, audio_pts_range):
+ # type: (torch.Tensor, torch.Tensor, List[int]) -> torch.Tensor
+ start, end = aframe_pts[0], aframe_pts[-1]
+ num_samples = aframes.size(0)
+ step_per_aframe = float(end - start + 1) / float(num_samples)
+ s_idx = 0
+ e_idx = num_samples
+ if start < audio_pts_range[0]:
+ s_idx = int((audio_pts_range[0] - start) / step_per_aframe)
+ if end > audio_pts_range[1]:
+ e_idx = int((audio_pts_range[1] - end) / step_per_aframe)
+ return aframes[s_idx:e_idx, :]
+
+
+def _read_video_from_file(
+ filename,
+ seek_frame_margin=0.25,
+ read_video_stream=True,
+ video_width=0,
+ video_height=0,
+ video_min_dimension=0,
+ video_max_dimension=0,
+ video_pts_range=(0, -1),
+ video_timebase=default_timebase,
+ read_audio_stream=True,
+ audio_samples=0,
+ audio_channels=0,
+ audio_pts_range=(0, -1),
+ audio_timebase=default_timebase,
+):
+ """
+ Reads a video from a file, returning both the video frames as well as
+ the audio frames
+
+ Args
+ ----------
+ filename : str
+ path to the video file
+ seek_frame_margin: double, optional
+ seeking frame in the stream is imprecise. Thus, when video_start_pts
+ is specified, we seek the pts earlier by seek_frame_margin seconds
+ read_video_stream: int, optional
+ whether read video stream. If yes, set to 1. Otherwise, 0
+ video_width/video_height/video_min_dimension/video_max_dimension: int
+ together decide the size of decoded frames
+ - When video_width = 0, video_height = 0, video_min_dimension = 0,
+ and video_max_dimension = 0, keep the orignal frame resolution
+ - When video_width = 0, video_height = 0, video_min_dimension != 0,
+ and video_max_dimension = 0, keep the aspect ratio and resize the
+ frame so that shorter edge size is video_min_dimension
+ - When video_width = 0, video_height = 0, video_min_dimension = 0,
+ and video_max_dimension != 0, keep the aspect ratio and resize
+ the frame so that longer edge size is video_max_dimension
+ - When video_width = 0, video_height = 0, video_min_dimension != 0,
+ and video_max_dimension != 0, resize the frame so that shorter
+ edge size is video_min_dimension, and longer edge size is
+ video_max_dimension. The aspect ratio may not be preserved
+ - When video_width = 0, video_height != 0, video_min_dimension = 0,
+ and video_max_dimension = 0, keep the aspect ratio and resize
+ the frame so that frame video_height is $video_height
+ - When video_width != 0, video_height == 0, video_min_dimension = 0,
+ and video_max_dimension = 0, keep the aspect ratio and resize
+ the frame so that frame video_width is $video_width
+ - When video_width != 0, video_height != 0, video_min_dimension = 0,
+ and video_max_dimension = 0, resize the frame so that frame
+ video_width and video_height are set to $video_width and
+ $video_height, respectively
+ video_pts_range : list(int), optional
+ the start and end presentation timestamp of video stream
+ video_timebase: Fraction, optional
+ a Fraction rational number which denotes timebase in video stream
+ read_audio_stream: int, optional
+ whether read audio stream. If yes, set to 1. Otherwise, 0
+ audio_samples: int, optional
+ audio sampling rate
+ audio_channels: int optional
+ audio channels
+ audio_pts_range : list(int), optional
+ the start and end presentation timestamp of audio stream
+ audio_timebase: Fraction, optional
+ a Fraction rational number which denotes time base in audio stream
+
+ Returns
+ -------
+ vframes : Tensor[T, H, W, C]
+ the `T` video frames
+ aframes : Tensor[L, K]
+ the audio frames, where `L` is the number of points and
+ `K` is the number of audio_channels
+ info : Dict
+ metadata for the video and audio. Can contain the fields video_fps (float)
+ and audio_fps (int)
+ """
+ _validate_pts(video_pts_range)
+ _validate_pts(audio_pts_range)
+
+ result = torch.ops.video_reader.read_video_from_file(
+ filename,
+ seek_frame_margin,
+ 0, # getPtsOnly
+ read_video_stream,
+ video_width,
+ video_height,
+ video_min_dimension,
+ video_max_dimension,
+ video_pts_range[0],
+ video_pts_range[1],
+ video_timebase.numerator,
+ video_timebase.denominator,
+ read_audio_stream,
+ audio_samples,
+ audio_channels,
+ audio_pts_range[0],
+ audio_pts_range[1],
+ audio_timebase.numerator,
+ audio_timebase.denominator,
+ )
+ vframes, _vframe_pts, vtimebase, vfps, vduration, \
+ aframes, aframe_pts, atimebase, asample_rate, aduration = (
+ result
+ )
+ info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration)
+ if aframes.numel() > 0:
+ # when audio stream is found
+ aframes = _align_audio_frames(aframes, aframe_pts, audio_pts_range)
+ return vframes, aframes, info
+
+
+def _read_video_timestamps_from_file(filename):
+ """
+ Decode all video- and audio frames in the video. Only pts
+ (presentation timestamp) is returned. The actual frame pixel data is not
+ copied. Thus, it is much faster than read_video(...)
+ """
+ result = torch.ops.video_reader.read_video_from_file(
+ filename,
+ 0, # seek_frame_margin
+ 1, # getPtsOnly
+ 1, # read_video_stream
+ 0, # video_width
+ 0, # video_height
+ 0, # video_min_dimension
+ 0, # video_max_dimension
+ 0, # video_start_pts
+ -1, # video_end_pts
+ 0, # video_timebase_num
+ 1, # video_timebase_den
+ 1, # read_audio_stream
+ 0, # audio_samples
+ 0, # audio_channels
+ 0, # audio_start_pts
+ -1, # audio_end_pts
+ 0, # audio_timebase_num
+ 1, # audio_timebase_den
+ )
+ _vframes, vframe_pts, vtimebase, vfps, vduration, \
+ _aframes, aframe_pts, atimebase, asample_rate, aduration = (result)
+ info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration)
+
+ vframe_pts = vframe_pts.numpy().tolist()
+ aframe_pts = aframe_pts.numpy().tolist()
+ return vframe_pts, aframe_pts, info
+
+
+def _probe_video_from_file(filename):
+ """
+ Probe a video file and return VideoMetaData with info about the video
+ """
+ result = torch.ops.video_reader.probe_video_from_file(filename)
+ vtimebase, vfps, vduration, atimebase, asample_rate, aduration = result
+ info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration)
+ return info
+
+
+def _read_video_from_memory(
+ video_data, # type: torch.Tensor
+ seek_frame_margin=0.25, # type: float
+ read_video_stream=1, # type: int
+ video_width=0, # type: int
+ video_height=0, # type: int
+ video_min_dimension=0, # type: int
+ video_max_dimension=0, # type: int
+ video_pts_range=(0, -1), # type: List[int]
+ video_timebase_numerator=0, # type: int
+ video_timebase_denominator=1, # type: int
+ read_audio_stream=1, # type: int
+ audio_samples=0, # type: int
+ audio_channels=0, # type: int
+ audio_pts_range=(0, -1), # type: List[int]
+ audio_timebase_numerator=0, # type: int
+ audio_timebase_denominator=1, # type: int
+):
+ # type: (...) -> Tuple[torch.Tensor, torch.Tensor]
+ """
+ Reads a video from memory, returning both the video frames as well as
+ the audio frames
+ This function is torchscriptable.
+
+ Args
+ ----------
+ video_data : data type could be 1) torch.Tensor, dtype=torch.int8 or 2) python bytes
+ compressed video content stored in either 1) torch.Tensor 2) python bytes
+ seek_frame_margin: double, optional
+ seeking frame in the stream is imprecise. Thus, when video_start_pts is specified,
+ we seek the pts earlier by seek_frame_margin seconds
+ read_video_stream: int, optional
+ whether read video stream. If yes, set to 1. Otherwise, 0
+ video_width/video_height/video_min_dimension/video_max_dimension: int
+ together decide the size of decoded frames
+ - When video_width = 0, video_height = 0, video_min_dimension = 0,
+ and video_max_dimension = 0, keep the orignal frame resolution
+ - When video_width = 0, video_height = 0, video_min_dimension != 0,
+ and video_max_dimension = 0, keep the aspect ratio and resize the
+ frame so that shorter edge size is video_min_dimension
+ - When video_width = 0, video_height = 0, video_min_dimension = 0,
+ and video_max_dimension != 0, keep the aspect ratio and resize
+ the frame so that longer edge size is video_max_dimension
+ - When video_width = 0, video_height = 0, video_min_dimension != 0,
+ and video_max_dimension != 0, resize the frame so that shorter
+ edge size is video_min_dimension, and longer edge size is
+ video_max_dimension. The aspect ratio may not be preserved
+ - When video_width = 0, video_height != 0, video_min_dimension = 0,
+ and video_max_dimension = 0, keep the aspect ratio and resize
+ the frame so that frame video_height is $video_height
+ - When video_width != 0, video_height == 0, video_min_dimension = 0,
+ and video_max_dimension = 0, keep the aspect ratio and resize
+ the frame so that frame video_width is $video_width
+ - When video_width != 0, video_height != 0, video_min_dimension = 0,
+ and video_max_dimension = 0, resize the frame so that frame
+ video_width and video_height are set to $video_width and
+ $video_height, respectively
+ video_pts_range : list(int), optional
+ the start and end presentation timestamp of video stream
+ video_timebase_numerator / video_timebase_denominator: optional
+ a rational number which denotes timebase in video stream
+ read_audio_stream: int, optional
+ whether read audio stream. If yes, set to 1. Otherwise, 0
+ audio_samples: int, optional
+ audio sampling rate
+ audio_channels: int optional
+ audio audio_channels
+ audio_pts_range : list(int), optional
+ the start and end presentation timestamp of audio stream
+ audio_timebase_numerator / audio_timebase_denominator: optional
+ a rational number which denotes time base in audio stream
+
+ Returns
+ -------
+ vframes : Tensor[T, H, W, C]
+ the `T` video frames
+ aframes : Tensor[L, K]
+ the audio frames, where `L` is the number of points and
+ `K` is the number of channels
+ """
+
+ _validate_pts(video_pts_range)
+ _validate_pts(audio_pts_range)
+
+ result = torch.ops.video_reader.read_video_from_memory(
+ video_data,
+ seek_frame_margin,
+ 0, # getPtsOnly
+ read_video_stream,
+ video_width,
+ video_height,
+ video_min_dimension,
+ video_max_dimension,
+ video_pts_range[0],
+ video_pts_range[1],
+ video_timebase_numerator,
+ video_timebase_denominator,
+ read_audio_stream,
+ audio_samples,
+ audio_channels,
+ audio_pts_range[0],
+ audio_pts_range[1],
+ audio_timebase_numerator,
+ audio_timebase_denominator,
+ )
+
+ vframes, _vframe_pts, vtimebase, vfps, vduration, \
+ aframes, aframe_pts, atimebase, asample_rate, aduration = (
+ result
+ )
+
+ if aframes.numel() > 0:
+ # when audio stream is found
+ aframes = _align_audio_frames(aframes, aframe_pts, audio_pts_range)
+
+ return vframes, aframes
+
+
+def _read_video_timestamps_from_memory(video_data):
+ """
+ Decode all frames in the video. Only pts (presentation timestamp) is returned.
+ The actual frame pixel data is not copied. Thus, read_video_timestamps(...)
+ is much faster than read_video(...)
+ """
+ if not isinstance(video_data, torch.Tensor):
+ video_data = torch.from_numpy(np.frombuffer(video_data, dtype=np.uint8))
+ result = torch.ops.video_reader.read_video_from_memory(
+ video_data,
+ 0, # seek_frame_margin
+ 1, # getPtsOnly
+ 1, # read_video_stream
+ 0, # video_width
+ 0, # video_height
+ 0, # video_min_dimension
+ 0, # video_max_dimension
+ 0, # video_start_pts
+ -1, # video_end_pts
+ 0, # video_timebase_num
+ 1, # video_timebase_den
+ 1, # read_audio_stream
+ 0, # audio_samples
+ 0, # audio_channels
+ 0, # audio_start_pts
+ -1, # audio_end_pts
+ 0, # audio_timebase_num
+ 1, # audio_timebase_den
+ )
+ _vframes, vframe_pts, vtimebase, vfps, vduration, \
+ _aframes, aframe_pts, atimebase, asample_rate, aduration = (
+ result
+ )
+ info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration)
+
+ vframe_pts = vframe_pts.numpy().tolist()
+ aframe_pts = aframe_pts.numpy().tolist()
+ return vframe_pts, aframe_pts, info
+
+
+def _probe_video_from_memory(video_data):
+ # type: (torch.Tensor) -> VideoMetaData
+ """
+ Probe a video in memory and return VideoMetaData with info about the video
+ This function is torchscriptable
+ """
+ if not isinstance(video_data, torch.Tensor):
+ video_data = torch.from_numpy(np.frombuffer(video_data, dtype=np.uint8))
+ result = torch.ops.video_reader.probe_video_from_memory(video_data)
+ vtimebase, vfps, vduration, atimebase, asample_rate, aduration = result
+ info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration)
+ return info
+
+
+def _read_video(filename, start_pts=0, end_pts=None, pts_unit="pts"):
+ if end_pts is None:
+ end_pts = float("inf")
+
+ if pts_unit == "pts":
+ warnings.warn(
+ "The pts_unit 'pts' gives wrong results and will be removed in a "
+ + "follow-up version. Please use pts_unit 'sec'."
+ )
+
+ info = _probe_video_from_file(filename)
+
+ has_video = info.has_video
+ has_audio = info.has_audio
+
+ def get_pts(time_base):
+ start_offset = start_pts
+ end_offset = end_pts
+ if pts_unit == "sec":
+ start_offset = int(math.floor(start_pts * (1 / time_base)))
+ if end_offset != float("inf"):
+ end_offset = int(math.ceil(end_pts * (1 / time_base)))
+ if end_offset == float("inf"):
+ end_offset = -1
+ return start_offset, end_offset
+
+ video_pts_range = (0, -1)
+ video_timebase = default_timebase
+ if has_video:
+ video_timebase = Fraction(
+ info.video_timebase.numerator, info.video_timebase.denominator
+ )
+ video_pts_range = get_pts(video_timebase)
+
+ audio_pts_range = (0, -1)
+ audio_timebase = default_timebase
+ if has_audio:
+ audio_timebase = Fraction(
+ info.audio_timebase.numerator, info.audio_timebase.denominator
+ )
+ audio_pts_range = get_pts(audio_timebase)
+
+ vframes, aframes, info = _read_video_from_file(
+ filename,
+ read_video_stream=True,
+ video_pts_range=video_pts_range,
+ video_timebase=video_timebase,
+ read_audio_stream=True,
+ audio_pts_range=audio_pts_range,
+ audio_timebase=audio_timebase,
+ )
+ _info = {}
+ if has_video:
+ _info["video_fps"] = info.video_fps
+ if has_audio:
+ _info["audio_fps"] = info.audio_sample_rate
+
+ return vframes, aframes, _info
+
+
+def _read_video_timestamps(filename, pts_unit="pts"):
+ if pts_unit == "pts":
+ warnings.warn(
+ "The pts_unit 'pts' gives wrong results and will be removed in a "
+ + "follow-up version. Please use pts_unit 'sec'."
+ )
+
+ pts, _, info = _read_video_timestamps_from_file(filename)
+
+ if pts_unit == "sec":
+ video_time_base = Fraction(
+ info.video_timebase.numerator, info.video_timebase.denominator
+ )
+ pts = [x * video_time_base for x in pts]
+
+ video_fps = info.video_fps if info.has_video else None
+
+ return pts, video_fps
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py
new file mode 100644
index 0000000000..40d1cfeed8
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py
@@ -0,0 +1,349 @@
+import gc
+import math
+import re
+import warnings
+from typing import Tuple, List
+
+import numpy as np
+import torch
+
+from . import _video_opt
+from ._video_opt import VideoMetaData
+
+
+try:
+ import av
+
+ av.logging.set_level(av.logging.ERROR)
+ if not hasattr(av.video.frame.VideoFrame, "pict_type"):
+ av = ImportError(
+ """\
+Your version of PyAV is too old for the necessary video operations in torchvision.
+If you are on Python 3.5, you will have to build from source (the conda-forge
+packages are not up-to-date). See
+https://github.com/mikeboers/PyAV#installation for instructions on how to
+install PyAV on your system.
+"""
+ )
+except ImportError:
+ av = ImportError(
+ """\
+PyAV is not installed, and is necessary for the video operations in torchvision.
+See https://github.com/mikeboers/PyAV#installation for instructions on how to
+install PyAV on your system.
+"""
+ )
+
+
+def _check_av_available():
+ if isinstance(av, Exception):
+ raise av
+
+
+def _av_available():
+ return not isinstance(av, Exception)
+
+
+# PyAV has some reference cycles
+_CALLED_TIMES = 0
+_GC_COLLECTION_INTERVAL = 10
+
+
+def write_video(filename, video_array, fps, video_codec="libx264", options=None):
+ """
+ Writes a 4d tensor in [T, H, W, C] format in a video file
+
+ Parameters
+ ----------
+ filename : str
+ path where the video will be saved
+ video_array : Tensor[T, H, W, C]
+ tensor containing the individual frames, as a uint8 tensor in [T, H, W, C] format
+ fps : Number
+ frames per second
+ """
+ _check_av_available()
+ video_array = torch.as_tensor(video_array, dtype=torch.uint8).numpy()
+
+ container = av.open(filename, mode="w")
+
+ stream = container.add_stream(video_codec, rate=fps)
+ stream.width = video_array.shape[2]
+ stream.height = video_array.shape[1]
+ stream.pix_fmt = "yuv420p" if video_codec != "libx264rgb" else "rgb24"
+ stream.options = options or {}
+
+ for img in video_array:
+ frame = av.VideoFrame.from_ndarray(img, format="rgb24")
+ frame.pict_type = "NONE"
+ for packet in stream.encode(frame):
+ container.mux(packet)
+
+ # Flush stream
+ for packet in stream.encode():
+ container.mux(packet)
+
+ # Close the file
+ container.close()
+
+
+def _read_from_stream(
+ container, start_offset, end_offset, pts_unit, stream, stream_name
+):
+ global _CALLED_TIMES, _GC_COLLECTION_INTERVAL
+ _CALLED_TIMES += 1
+ if _CALLED_TIMES % _GC_COLLECTION_INTERVAL == _GC_COLLECTION_INTERVAL - 1:
+ gc.collect()
+
+ if pts_unit == "sec":
+ start_offset = int(math.floor(start_offset * (1 / stream.time_base)))
+ if end_offset != float("inf"):
+ end_offset = int(math.ceil(end_offset * (1 / stream.time_base)))
+ else:
+ warnings.warn(
+ "The pts_unit 'pts' gives wrong results and will be removed in a "
+ + "follow-up version. Please use pts_unit 'sec'."
+ )
+
+ frames = {}
+ should_buffer = False
+ max_buffer_size = 5
+ if stream.type == "video":
+ # DivX-style packed B-frames can have out-of-order pts (2 frames in a single pkt)
+ # so need to buffer some extra frames to sort everything
+ # properly
+ extradata = stream.codec_context.extradata
+ # overly complicated way of finding if `divx_packed` is set, following
+ # https://github.com/FFmpeg/FFmpeg/commit/d5a21172283572af587b3d939eba0091484d3263
+ if extradata and b"DivX" in extradata:
+ # can't use regex directly because of some weird characters sometimes...
+ pos = extradata.find(b"DivX")
+ d = extradata[pos:]
+ o = re.search(br"DivX(\d+)Build(\d+)(\w)", d)
+ if o is None:
+ o = re.search(br"DivX(\d+)b(\d+)(\w)", d)
+ if o is not None:
+ should_buffer = o.group(3) == b"p"
+ seek_offset = start_offset
+ # some files don't seek to the right location, so better be safe here
+ seek_offset = max(seek_offset - 1, 0)
+ if should_buffer:
+ # FIXME this is kind of a hack, but we will jump to the previous keyframe
+ # so this will be safe
+ seek_offset = max(seek_offset - max_buffer_size, 0)
+ try:
+ # TODO check if stream needs to always be the video stream here or not
+ container.seek(seek_offset, any_frame=False, backward=True, stream=stream)
+ except av.AVError:
+ # TODO add some warnings in this case
+ # print("Corrupted file?", container.name)
+ return []
+ buffer_count = 0
+ try:
+ for _idx, frame in enumerate(container.decode(**stream_name)):
+ frames[frame.pts] = frame
+ if frame.pts >= end_offset:
+ if should_buffer and buffer_count < max_buffer_size:
+ buffer_count += 1
+ continue
+ break
+ except av.AVError:
+ # TODO add a warning
+ pass
+ # ensure that the results are sorted wrt the pts
+ result = [
+ frames[i] for i in sorted(frames) if start_offset <= frames[i].pts <= end_offset
+ ]
+ if len(frames) > 0 and start_offset > 0 and start_offset not in frames:
+ # if there is no frame that exactly matches the pts of start_offset
+ # add the last frame smaller than start_offset, to guarantee that
+ # we will have all the necessary data. This is most useful for audio
+ preceding_frames = [i for i in frames if i < start_offset]
+ if len(preceding_frames) > 0:
+ first_frame_pts = max(preceding_frames)
+ result.insert(0, frames[first_frame_pts])
+ return result
+
+
+def _align_audio_frames(aframes, audio_frames, ref_start, ref_end):
+ start, end = audio_frames[0].pts, audio_frames[-1].pts
+ total_aframes = aframes.shape[1]
+ step_per_aframe = (end - start + 1) / total_aframes
+ s_idx = 0
+ e_idx = total_aframes
+ if start < ref_start:
+ s_idx = int((ref_start - start) / step_per_aframe)
+ if end > ref_end:
+ e_idx = int((ref_end - end) / step_per_aframe)
+ return aframes[:, s_idx:e_idx]
+
+
+def read_video(filename, start_pts=0, end_pts=None, pts_unit="pts"):
+ """
+ Reads a video from a file, returning both the video frames as well as
+ the audio frames
+
+ Parameters
+ ----------
+ filename : str
+ path to the video file
+ start_pts : int if pts_unit = 'pts', optional
+ float / Fraction if pts_unit = 'sec', optional
+ the start presentation time of the video
+ end_pts : int if pts_unit = 'pts', optional
+ float / Fraction if pts_unit = 'sec', optional
+ the end presentation time
+ pts_unit : str, optional
+ unit in which start_pts and end_pts values will be interpreted, either 'pts' or 'sec'. Defaults to 'pts'.
+
+ Returns
+ -------
+ vframes : Tensor[T, H, W, C]
+ the `T` video frames
+ aframes : Tensor[K, L]
+ the audio frames, where `K` is the number of channels and `L` is the
+ number of points
+ info : Dict
+ metadata for the video and audio. Can contain the fields video_fps (float)
+ and audio_fps (int)
+ """
+
+ from torchvision import get_video_backend
+
+ if get_video_backend() != "pyav":
+ return _video_opt._read_video(filename, start_pts, end_pts, pts_unit)
+
+ _check_av_available()
+
+ if end_pts is None:
+ end_pts = float("inf")
+
+ if end_pts < start_pts:
+ raise ValueError(
+ "end_pts should be larger than start_pts, got "
+ "start_pts={} and end_pts={}".format(start_pts, end_pts)
+ )
+
+ info = {}
+ video_frames = []
+ audio_frames = []
+
+ try:
+ container = av.open(filename, metadata_errors="ignore")
+ except av.AVError:
+ # TODO raise a warning?
+ pass
+ else:
+ if container.streams.video:
+ video_frames = _read_from_stream(
+ container,
+ start_pts,
+ end_pts,
+ pts_unit,
+ container.streams.video[0],
+ {"video": 0},
+ )
+ video_fps = container.streams.video[0].average_rate
+ # guard against potentially corrupted files
+ if video_fps is not None:
+ info["video_fps"] = float(video_fps)
+
+ if container.streams.audio:
+ audio_frames = _read_from_stream(
+ container,
+ start_pts,
+ end_pts,
+ pts_unit,
+ container.streams.audio[0],
+ {"audio": 0},
+ )
+ info["audio_fps"] = container.streams.audio[0].rate
+
+ container.close()
+
+ vframes = [frame.to_rgb().to_ndarray() for frame in video_frames]
+ aframes = [frame.to_ndarray() for frame in audio_frames]
+
+ if vframes:
+ vframes = torch.as_tensor(np.stack(vframes))
+ else:
+ vframes = torch.empty((0, 1, 1, 3), dtype=torch.uint8)
+
+ if aframes:
+ aframes = np.concatenate(aframes, 1)
+ aframes = torch.as_tensor(aframes)
+ aframes = _align_audio_frames(aframes, audio_frames, start_pts, end_pts)
+ else:
+ aframes = torch.empty((1, 0), dtype=torch.float32)
+
+ return vframes, aframes, info
+
+
+def _can_read_timestamps_from_packets(container):
+ extradata = container.streams[0].codec_context.extradata
+ if extradata is None:
+ return False
+ if b"Lavc" in extradata:
+ return True
+ return False
+
+
+def read_video_timestamps(filename, pts_unit="pts"):
+ """
+ List the video frames timestamps.
+
+ Note that the function decodes the whole video frame-by-frame.
+
+ Parameters
+ ----------
+ filename : str
+ path to the video file
+ pts_unit : str, optional
+ unit in which timestamp values will be returned either 'pts' or 'sec'. Defaults to 'pts'.
+
+ Returns
+ -------
+ pts : List[int] if pts_unit = 'pts'
+ List[Fraction] if pts_unit = 'sec'
+ presentation timestamps for each one of the frames in the video.
+ video_fps : int
+ the frame rate for the video
+
+ """
+ from torchvision import get_video_backend
+
+ if get_video_backend() != "pyav":
+ return _video_opt._read_video_timestamps(filename, pts_unit)
+
+ _check_av_available()
+
+ video_frames = []
+ video_fps = None
+
+ try:
+ container = av.open(filename, metadata_errors="ignore")
+ except av.AVError:
+ # TODO add a warning
+ pass
+ else:
+ if container.streams.video:
+ video_stream = container.streams.video[0]
+ video_time_base = video_stream.time_base
+ if _can_read_timestamps_from_packets(container):
+ # fast path
+ video_frames = [
+ x for x in container.demux(video=0) if x.pts is not None
+ ]
+ else:
+ video_frames = _read_from_stream(
+ container, 0, float("inf"), pts_unit, video_stream, {"video": 0}
+ )
+ video_fps = float(video_stream.average_rate)
+ container.close()
+
+ pts = [x.pts for x in video_frames]
+
+ if pts_unit == "sec":
+ pts = [x * video_time_base for x in pts]
+
+ return pts, video_fps
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/__init__.py
new file mode 100644
index 0000000000..283e544e98
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/__init__.py
@@ -0,0 +1,14 @@
+from .alexnet import *
+from .resnet import *
+from .vgg import *
+from .squeezenet import *
+from .inception import *
+from .densenet import *
+from .googlenet import *
+from .mobilenet import *
+from .mnasnet import *
+from .shufflenetv2 import *
+from . import segmentation
+from . import detection
+from . import video
+from . import quantization
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py
new file mode 100644
index 0000000000..f4e1cd8450
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py
@@ -0,0 +1,83 @@
+from collections import OrderedDict
+from typing import Dict, Optional
+
+from torch import nn
+
+
+class IntermediateLayerGetter(nn.ModuleDict):
+ """
+ Module wrapper that returns intermediate layers from a model
+
+ It has a strong assumption that the modules have been registered
+ into the model in the same order as they are used.
+ This means that one should **not** reuse the same nn.Module
+ twice in the forward if you want this to work.
+
+ Additionally, it is only able to query submodules that are directly
+ assigned to the model. So if `model` is passed, `model.feature1` can
+ be returned, but not `model.feature1.layer2`.
+
+ Args:
+ model (nn.Module): model on which we will extract the features
+ return_layers (Dict[name, new_name]): a dict containing the names
+ of the modules for which the activations will be returned as
+ the key of the dict, and the value of the dict is the name
+ of the returned activation (which the user can specify).
+
+ Examples::
+
+ >>> m = torchvision.models.resnet18(pretrained=True)
+ >>> # extract layer1 and layer3, giving as names `feat1` and feat2`
+ >>> new_m = torchvision.models._utils.IntermediateLayerGetter(m,
+ >>> {'layer1': 'feat1', 'layer3': 'feat2'})
+ >>> out = new_m(torch.rand(1, 3, 224, 224))
+ >>> print([(k, v.shape) for k, v in out.items()])
+ >>> [('feat1', torch.Size([1, 64, 56, 56])),
+ >>> ('feat2', torch.Size([1, 256, 14, 14]))]
+ """
+
+ _version = 2
+ __annotations__ = {
+ "return_layers": Dict[str, str],
+ }
+
+ def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None:
+ if not set(return_layers).issubset([name for name, _ in model.named_children()]):
+ raise ValueError("return_layers are not present in model")
+ orig_return_layers = return_layers
+ return_layers = {str(k): str(v) for k, v in return_layers.items()}
+ layers = OrderedDict()
+ for name, module in model.named_children():
+ layers[name] = module
+ if name in return_layers:
+ del return_layers[name]
+ if not return_layers:
+ break
+
+ super().__init__(layers)
+ self.return_layers = orig_return_layers
+
+ def forward(self, x):
+ out = OrderedDict()
+ for name, module in self.items():
+ x = module(x)
+ if name in self.return_layers:
+ out_name = self.return_layers[name]
+ out[out_name] = x
+ return out
+
+
+def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int:
+ """
+ This function is taken from the original tf repo.
+ It ensures that all layers have a channel number that is divisible by 8
+ It can be seen here:
+ https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+ """
+ if min_value is None:
+ min_value = divisor
+ new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+ # Make sure that round down does not go down by more than 10%.
+ if new_v < 0.9 * v:
+ new_v += divisor
+ return new_v
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py
new file mode 100644
index 0000000000..291041d7b5
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py
@@ -0,0 +1,67 @@
+from collections import OrderedDict
+
+import torch
+from torch import nn
+from torch.jit.annotations import Dict
+
+
+class IntermediateLayerGetter(nn.ModuleDict):
+ """
+ Module wrapper that returns intermediate layers from a model
+
+ It has a strong assumption that the modules have been registered
+ into the model in the same order as they are used.
+ This means that one should **not** reuse the same nn.Module
+ twice in the forward if you want this to work.
+
+ Additionally, it is only able to query submodules that are directly
+ assigned to the model. So if `model` is passed, `model.feature1` can
+ be returned, but not `model.feature1.layer2`.
+
+ Arguments:
+ model (nn.Module): model on which we will extract the features
+ return_layers (Dict[name, new_name]): a dict containing the names
+ of the modules for which the activations will be returned as
+ the key of the dict, and the value of the dict is the name
+ of the returned activation (which the user can specify).
+
+ Examples::
+
+ >>> m = torchvision.models.resnet18(pretrained=True)
+ >>> # extract layer1 and layer3, giving as names `feat1` and feat2`
+ >>> new_m = torchvision.models._utils.IntermediateLayerGetter(m,
+ >>> {'layer1': 'feat1', 'layer3': 'feat2'})
+ >>> out = new_m(torch.rand(1, 3, 224, 224))
+ >>> print([(k, v.shape) for k, v in out.items()])
+ >>> [('feat1', torch.Size([1, 64, 56, 56])),
+ >>> ('feat2', torch.Size([1, 256, 14, 14]))]
+ """
+ _version = 2
+ __annotations__ = {
+ "return_layers": Dict[str, str],
+ }
+
+ def __init__(self, model, return_layers):
+ if not set(return_layers).issubset([name for name, _ in model.named_children()]):
+ raise ValueError("return_layers are not present in model")
+ orig_return_layers = return_layers
+ return_layers = {str(k): str(v) for k, v in return_layers.items()}
+ layers = OrderedDict()
+ for name, module in model.named_children():
+ layers[name] = module
+ if name in return_layers:
+ del return_layers[name]
+ if not return_layers:
+ break
+
+ super(IntermediateLayerGetter, self).__init__(layers)
+ self.return_layers = orig_return_layers
+
+ def forward(self, x):
+ out = OrderedDict()
+ for name, module in self.items():
+ x = module(x)
+ if name in self.return_layers:
+ out_name = self.return_layers[name]
+ out[out_name] = x
+ return out
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py
new file mode 100644
index 0000000000..a0126312d1
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py
@@ -0,0 +1,65 @@
+import torch
+import torch.nn as nn
+from .utils import load_state_dict_from_url
+
+
+__all__ = ['AlexNet', 'alexnet']
+
+
+model_urls = {
+ 'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
+}
+
+
+class AlexNet(nn.Module):
+
+ def __init__(self, num_classes=1000):
+ super(AlexNet, self).__init__()
+ self.features = nn.Sequential(
+ nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2),
+ nn.Conv2d(64, 192, kernel_size=5, padding=2),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2),
+ nn.Conv2d(192, 384, kernel_size=3, padding=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(384, 256, kernel_size=3, padding=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(256, 256, kernel_size=3, padding=1),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2),
+ )
+ self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
+ self.classifier = nn.Sequential(
+ nn.Dropout(),
+ nn.Linear(256 * 6 * 6, 4096),
+ nn.ReLU(inplace=True),
+ nn.Dropout(),
+ nn.Linear(4096, 4096),
+ nn.ReLU(inplace=True),
+ nn.Linear(4096, num_classes),
+ )
+
+ def forward(self, x):
+ x = self.features(x)
+ x = self.avgpool(x)
+ x = torch.flatten(x, 1)
+ x = self.classifier(x)
+ return x
+
+
+def alexnet(pretrained=False, progress=True, **kwargs):
+ r"""AlexNet model architecture from the
+ `"One weird trick..." `_ paper.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ model = AlexNet(**kwargs)
+ if pretrained:
+ state_dict = load_state_dict_from_url(model_urls['alexnet'],
+ progress=progress)
+ model.load_state_dict(state_dict)
+ return model
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py
new file mode 100644
index 0000000000..822dde0925
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py
@@ -0,0 +1,279 @@
+import re
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as cp
+from collections import OrderedDict
+from .utils import load_state_dict_from_url
+from torch import Tensor
+from torch.jit.annotations import List
+
+
+__all__ = ['DenseNet', 'densenet121', 'densenet169', 'densenet201', 'densenet161']
+
+model_urls = {
+ 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth',
+ 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth',
+ 'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth',
+ 'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth',
+}
+
+
+class _DenseLayer(nn.Module):
+ def __init__(self, num_input_features, growth_rate, bn_size, drop_rate, memory_efficient=False):
+ super(_DenseLayer, self).__init__()
+ self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
+ self.add_module('relu1', nn.ReLU(inplace=True)),
+ self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
+ growth_rate, kernel_size=1, stride=1,
+ bias=False)),
+ self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
+ self.add_module('relu2', nn.ReLU(inplace=True)),
+ self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
+ kernel_size=3, stride=1, padding=1,
+ bias=False)),
+ self.drop_rate = float(drop_rate)
+ self.memory_efficient = memory_efficient
+
+ def bn_function(self, inputs):
+ # type: (List[Tensor]) -> Tensor
+ concated_features = torch.cat(inputs, 1)
+ bottleneck_output = self.conv1(self.relu1(self.norm1(concated_features))) # noqa: T484
+ return bottleneck_output
+
+ # todo: rewrite when torchscript supports any
+ def any_requires_grad(self, input):
+ # type: (List[Tensor]) -> bool
+ for tensor in input:
+ if tensor.requires_grad:
+ return True
+ return False
+
+ @torch.jit.unused # noqa: T484
+ def call_checkpoint_bottleneck(self, input):
+ # type: (List[Tensor]) -> Tensor
+ def closure(*inputs):
+ return self.bn_function(*inputs)
+
+ return cp.checkpoint(closure, input)
+
+ @torch.jit._overload_method # noqa: F811
+ def forward(self, input):
+ # type: (List[Tensor]) -> (Tensor)
+ pass
+
+ @torch.jit._overload_method # noqa: F811
+ def forward(self, input):
+ # type: (Tensor) -> (Tensor)
+ pass
+
+ # torchscript does not yet support *args, so we overload method
+ # allowing it to take either a List[Tensor] or single Tensor
+ def forward(self, input): # noqa: F811
+ if isinstance(input, Tensor):
+ prev_features = [input]
+ else:
+ prev_features = input
+
+ if self.memory_efficient and self.any_requires_grad(prev_features):
+ if torch.jit.is_scripting():
+ raise Exception("Memory Efficient not supported in JIT")
+
+ bottleneck_output = self.call_checkpoint_bottleneck(prev_features)
+ else:
+ bottleneck_output = self.bn_function(prev_features)
+
+ new_features = self.conv2(self.relu2(self.norm2(bottleneck_output)))
+ if self.drop_rate > 0:
+ new_features = F.dropout(new_features, p=self.drop_rate,
+ training=self.training)
+ return new_features
+
+
+class _DenseBlock(nn.ModuleDict):
+ _version = 2
+
+ def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate, memory_efficient=False):
+ super(_DenseBlock, self).__init__()
+ for i in range(num_layers):
+ layer = _DenseLayer(
+ num_input_features + i * growth_rate,
+ growth_rate=growth_rate,
+ bn_size=bn_size,
+ drop_rate=drop_rate,
+ memory_efficient=memory_efficient,
+ )
+ self.add_module('denselayer%d' % (i + 1), layer)
+
+ def forward(self, init_features):
+ features = [init_features]
+ for name, layer in self.items():
+ new_features = layer(features)
+ features.append(new_features)
+ return torch.cat(features, 1)
+
+
+class _Transition(nn.Sequential):
+ def __init__(self, num_input_features, num_output_features):
+ super(_Transition, self).__init__()
+ self.add_module('norm', nn.BatchNorm2d(num_input_features))
+ self.add_module('relu', nn.ReLU(inplace=True))
+ self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
+ kernel_size=1, stride=1, bias=False))
+ self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))
+
+
+class DenseNet(nn.Module):
+ r"""Densenet-BC model class, based on
+ `"Densely Connected Convolutional Networks" `_
+
+ Args:
+ growth_rate (int) - how many filters to add each layer (`k` in paper)
+ block_config (list of 4 ints) - how many layers in each pooling block
+ num_init_features (int) - the number of filters to learn in the first convolution layer
+ bn_size (int) - multiplicative factor for number of bottle neck layers
+ (i.e. bn_size * k features in the bottleneck layer)
+ drop_rate (float) - dropout rate after each dense layer
+ num_classes (int) - number of classification classes
+ memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
+ but slower. Default: *False*. See `"paper" `_
+ """
+
+ def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
+ num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000, memory_efficient=False):
+
+ super(DenseNet, self).__init__()
+
+ # First convolution
+ self.features = nn.Sequential(OrderedDict([
+ ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2,
+ padding=3, bias=False)),
+ ('norm0', nn.BatchNorm2d(num_init_features)),
+ ('relu0', nn.ReLU(inplace=True)),
+ ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
+ ]))
+
+ # Each denseblock
+ num_features = num_init_features
+ for i, num_layers in enumerate(block_config):
+ block = _DenseBlock(
+ num_layers=num_layers,
+ num_input_features=num_features,
+ bn_size=bn_size,
+ growth_rate=growth_rate,
+ drop_rate=drop_rate,
+ memory_efficient=memory_efficient
+ )
+ self.features.add_module('denseblock%d' % (i + 1), block)
+ num_features = num_features + num_layers * growth_rate
+ if i != len(block_config) - 1:
+ trans = _Transition(num_input_features=num_features,
+ num_output_features=num_features // 2)
+ self.features.add_module('transition%d' % (i + 1), trans)
+ num_features = num_features // 2
+
+ # Final batch norm
+ self.features.add_module('norm5', nn.BatchNorm2d(num_features))
+
+ # Linear layer
+ self.classifier = nn.Linear(num_features, num_classes)
+
+ # Official init from torch repo.
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_normal_(m.weight)
+ elif isinstance(m, nn.BatchNorm2d):
+ nn.init.constant_(m.weight, 1)
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.Linear):
+ nn.init.constant_(m.bias, 0)
+
+ def forward(self, x):
+ features = self.features(x)
+ out = F.relu(features, inplace=True)
+ out = F.adaptive_avg_pool2d(out, (1, 1))
+ out = torch.flatten(out, 1)
+ out = self.classifier(out)
+ return out
+
+
+def _load_state_dict(model, model_url, progress):
+ # '.'s are no longer allowed in module names, but previous _DenseLayer
+ # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.
+ # They are also in the checkpoints in model_urls. This pattern is used
+ # to find such keys.
+ pattern = re.compile(
+ r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$')
+
+ state_dict = load_state_dict_from_url(model_url, progress=progress)
+ for key in list(state_dict.keys()):
+ res = pattern.match(key)
+ if res:
+ new_key = res.group(1) + res.group(2)
+ state_dict[new_key] = state_dict[key]
+ del state_dict[key]
+ model.load_state_dict(state_dict)
+
+
+def _densenet(arch, growth_rate, block_config, num_init_features, pretrained, progress,
+ **kwargs):
+ model = DenseNet(growth_rate, block_config, num_init_features, **kwargs)
+ if pretrained:
+ _load_state_dict(model, model_urls[arch], progress)
+ return model
+
+
+def densenet121(pretrained=False, progress=True, **kwargs):
+ r"""Densenet-121 model from
+ `"Densely Connected Convolutional Networks" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
+ but slower. Default: *False*. See `"paper" `_
+ """
+ return _densenet('densenet121', 32, (6, 12, 24, 16), 64, pretrained, progress,
+ **kwargs)
+
+
+def densenet161(pretrained=False, progress=True, **kwargs):
+ r"""Densenet-161 model from
+ `"Densely Connected Convolutional Networks" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
+ but slower. Default: *False*. See `"paper" `_
+ """
+ return _densenet('densenet161', 48, (6, 12, 36, 24), 96, pretrained, progress,
+ **kwargs)
+
+
+def densenet169(pretrained=False, progress=True, **kwargs):
+ r"""Densenet-169 model from
+ `"Densely Connected Convolutional Networks" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
+ but slower. Default: *False*. See `"paper" `_
+ """
+ return _densenet('densenet169', 32, (6, 12, 32, 32), 64, pretrained, progress,
+ **kwargs)
+
+
+def densenet201(pretrained=False, progress=True, **kwargs):
+ r"""Densenet-201 model from
+ `"Densely Connected Convolutional Networks" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient,
+ but slower. Default: *False*. See `"paper" `_
+ """
+ return _densenet('densenet201', 32, (6, 12, 48, 32), 64, pretrained, progress,
+ **kwargs)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/__init__.py
new file mode 100644
index 0000000000..cdfb6cf23c
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/__init__.py
@@ -0,0 +1,5 @@
+from .faster_rcnn import *
+from .mask_rcnn import *
+from .keypoint_rcnn import *
+from .ssd import *
+from .ssdlite import *
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py
new file mode 100644
index 0000000000..ce70d93be0
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py
@@ -0,0 +1,406 @@
+import math
+from collections import OrderedDict
+from typing import List, Tuple
+
+import numpy as np
+import torch
+from torch import Tensor, nn
+from torchvision.ops.misc import FrozenBatchNorm2d
+import pdb
+
+
+class BalancedPositiveNegativeSampler:
+ """
+ This class samples batches, ensuring that they contain a fixed proportion of positives
+ """
+
+ def __init__(self, batch_size_per_image: int, positive_fraction: float) -> None:
+ """
+ Args:
+ batch_size_per_image (int): number of elements to be selected per image
+ positive_fraction (float): percentage of positive elements per batch
+ """
+ self.batch_size_per_image = batch_size_per_image
+ self.positive_fraction = positive_fraction
+
+ def __call__(self, matched_idxs: List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]:
+ """
+ Args:
+ matched idxs: list of tensors containing -1, 0 or positive values.
+ Each tensor corresponds to a specific image.
+ -1 values are ignored, 0 are considered as negatives and > 0 as
+ positives.
+
+ Returns:
+ pos_idx (list[tensor])
+ neg_idx (list[tensor])
+
+ Returns two lists of binary masks for each image.
+ The first list contains the positive elements that were selected,
+ and the second list the negative example.
+ """
+ pos_idx = []
+ neg_idx = []
+ for matched_idxs_per_image in matched_idxs:
+ positive = torch.where(matched_idxs_per_image >= 1)[0]
+ negative = torch.where(matched_idxs_per_image == 0)[0]
+
+ num_pos = int(self.batch_size_per_image * self.positive_fraction)
+ # protect against not enough positive examples
+ num_pos = min(positive.numel(), num_pos)
+ num_neg = self.batch_size_per_image - num_pos
+ # protect against not enough negative examples
+ num_neg = min(negative.numel(), num_neg)
+
+ # randomly select positive and negative examples
+ perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
+ perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
+
+ pos_idx_per_image = positive[perm1]
+ neg_idx_per_image = negative[perm2]
+
+ # create binary mask from indices
+ pos_idx_per_image_mask = torch.zeros_like(matched_idxs_per_image, dtype=torch.uint8)
+ neg_idx_per_image_mask = torch.zeros_like(matched_idxs_per_image, dtype=torch.uint8)
+
+ pos_idx_per_image_mask[pos_idx_per_image] = 1
+ neg_idx_per_image_mask[neg_idx_per_image] = 1
+
+ pos_idx.append(pos_idx_per_image_mask)
+ neg_idx.append(neg_idx_per_image_mask)
+
+ return pos_idx, neg_idx
+
+
+# @torch.jit._script_if_tracing
+def encode_boxes(reference_boxes: Tensor, proposals: Tensor, weights: Tensor) -> Tensor:
+ """
+ Encode a set of proposals with respect to some
+ reference boxes
+
+ Args:
+ reference_boxes (Tensor): reference boxes
+ proposals (Tensor): boxes to be encoded
+ weights (Tensor[4]): the weights for ``(x, y, w, h)``
+ """
+
+ # perform some unpacking to make it JIT-fusion friendly
+ wx = weights[0]
+ wy = weights[1]
+ ww = weights[2]
+ wh = weights[3]
+
+ proposals_x1 = proposals[:, 0].unsqueeze(1)
+ proposals_y1 = proposals[:, 1].unsqueeze(1)
+ proposals_x2 = proposals[:, 2].unsqueeze(1)
+ proposals_y2 = proposals[:, 3].unsqueeze(1)
+
+ reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1)
+ reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1)
+ reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1)
+ reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1)
+
+ # implementation starts here
+ ex_widths = proposals_x2 - proposals_x1
+ ex_heights = proposals_y2 - proposals_y1
+ ex_ctr_x = proposals_x1 + 0.5 * ex_widths
+ ex_ctr_y = proposals_y1 + 0.5 * ex_heights
+ # ex_widths += 1e-32
+ # ex_heights += 1e-32
+
+ gt_widths = reference_boxes_x2 - reference_boxes_x1
+ gt_widths_mask = gt_widths == 0
+ gt_widths_temp = gt_widths_mask * ex_widths
+ gt_widths = gt_widths + gt_widths_temp
+
+ gt_heights = reference_boxes_y2 - reference_boxes_y1
+ gt_heights_mask = gt_heights == 0
+ gt_heights_temp = gt_heights_mask * ex_heights
+ gt_heights = gt_heights + gt_heights_temp
+
+ gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths
+ gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights
+
+ targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
+ targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
+
+ targets_dw = ww * torch.log(gt_widths / ex_widths)
+ targets_dh = wh * torch.log(gt_heights / ex_heights)
+
+ targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
+ return targets
+
+
+class BoxCoder:
+ """
+ This class encodes and decodes a set of bounding boxes into
+ the representation used for training the regressors.
+ """
+
+ def __init__(
+ self, weights: Tuple[float, float, float, float], bbox_xform_clip: float = math.log(1000.0 / 16)
+ ) -> None:
+ """
+ Args:
+ weights (4-element tuple)
+ bbox_xform_clip (float)
+ """
+ self.weights = weights
+ self.bbox_xform_clip = bbox_xform_clip
+
+ def encode(self, reference_boxes: List[Tensor], proposals: List[Tensor]) -> List[Tensor]:
+ boxes_per_image = [len(b) for b in reference_boxes]
+ reference_boxes = torch.cat(reference_boxes, dim=0)
+ proposals = torch.cat(proposals, dim=0)
+ targets = self.encode_single(reference_boxes, proposals)
+ return targets.split(boxes_per_image, 0)
+
+ def encode_single(self, reference_boxes: Tensor, proposals: Tensor) -> Tensor:
+ """
+ Encode a set of proposals with respect to some
+ reference boxes
+
+ Args:
+ reference_boxes (Tensor): reference boxes
+ proposals (Tensor): boxes to be encoded
+ """
+ dtype = reference_boxes.dtype
+ device = reference_boxes.device
+ weights = torch.as_tensor(self.weights, dtype=dtype, device=device)
+ targets = encode_boxes(reference_boxes, proposals, weights)
+
+ return targets
+
+ def decode(self, rel_codes: Tensor, boxes: List[Tensor]) -> Tensor:
+ assert isinstance(boxes, (list, tuple))
+ assert isinstance(rel_codes, torch.Tensor)
+ boxes_per_image = [b.size(0) for b in boxes]
+ concat_boxes = torch.cat(boxes, dim=0)
+ box_sum = 0
+ for val in boxes_per_image:
+ box_sum += val
+ if box_sum > 0:
+ rel_codes = rel_codes.reshape(box_sum, -1)
+ pred_boxes = self.decode_single(rel_codes, concat_boxes)
+ if box_sum > 0:
+ pred_boxes = pred_boxes.reshape(box_sum, -1, 4)
+ return pred_boxes
+
+ def decode_single(self, rel_codes: Tensor, boxes: Tensor) -> Tensor:
+ """
+ From a set of original boxes and encoded relative box offsets,
+ get the decoded boxes.
+
+ Args:
+ rel_codes (Tensor): encoded boxes
+ boxes (Tensor): reference boxes.
+ """
+
+ boxes = boxes.to(rel_codes.dtype)
+
+ widths = boxes[:, 2] - boxes[:, 0]
+ heights = boxes[:, 3] - boxes[:, 1]
+ ctr_x = boxes[:, 0] + 0.5 * widths
+ ctr_y = boxes[:, 1] + 0.5 * heights
+
+ wx, wy, ww, wh = self.weights
+ dx = rel_codes[:, 0::4] / wx
+ dy = rel_codes[:, 1::4] / wy
+ dw = rel_codes[:, 2::4] / ww
+ dh = rel_codes[:, 3::4] / wh
+
+ # Prevent sending too large values into torch.exp()
+ dw = torch.clamp(dw, max=self.bbox_xform_clip)
+ dh = torch.clamp(dh, max=self.bbox_xform_clip)
+
+ pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
+ pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
+ pred_w = torch.exp(dw) * widths[:, None]
+ pred_h = torch.exp(dh) * heights[:, None]
+
+ # Distance from center to box's corner.
+ c_to_c_h = torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h
+ c_to_c_w = torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w
+
+ pred_boxes1 = pred_ctr_x - c_to_c_w
+ pred_boxes2 = pred_ctr_y - c_to_c_h
+ pred_boxes3 = pred_ctr_x + c_to_c_w
+ pred_boxes4 = pred_ctr_y + c_to_c_h
+ pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1)
+ return pred_boxes
+
+
+class Matcher:
+ """
+ This class assigns to each predicted "element" (e.g., a box) a ground-truth
+ element. Each predicted element will have exactly zero or one matches; each
+ ground-truth element may be assigned to zero or more predicted elements.
+
+ Matching is based on the MxN match_quality_matrix, that characterizes how well
+ each (ground-truth, predicted)-pair match. For example, if the elements are
+ boxes, the matrix may contain box IoU overlap values.
+
+ The matcher returns a tensor of size N containing the index of the ground-truth
+ element m that matches to prediction n. If there is no match, a negative value
+ is returned.
+ """
+
+ BELOW_LOW_THRESHOLD = -1
+ BETWEEN_THRESHOLDS = -2
+
+ __annotations__ = {
+ "BELOW_LOW_THRESHOLD": int,
+ "BETWEEN_THRESHOLDS": int,
+ }
+
+ def __init__(self, high_threshold: float, low_threshold: float, allow_low_quality_matches: bool = False) -> None:
+ """
+ Args:
+ high_threshold (float): quality values greater than or equal to
+ this value are candidate matches.
+ low_threshold (float): a lower quality threshold used to stratify
+ matches into three levels:
+ 1) matches >= high_threshold
+ 2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold)
+ 3) BELOW_LOW_THRESHOLD matches in [0, low_threshold)
+ allow_low_quality_matches (bool): if True, produce additional matches
+ for predictions that have only low-quality match candidates. See
+ set_low_quality_matches_ for more details.
+ """
+ self.BELOW_LOW_THRESHOLD = -1
+ self.BETWEEN_THRESHOLDS = -2
+ assert low_threshold <= high_threshold
+ self.high_threshold = high_threshold
+ self.low_threshold = low_threshold
+ self.allow_low_quality_matches = allow_low_quality_matches
+
+ def __call__(self, match_quality_matrix: Tensor) -> Tensor:
+ """
+ Args:
+ match_quality_matrix (Tensor[float]): an MxN tensor, containing the
+ pairwise quality between M ground-truth elements and N predicted elements.
+
+ Returns:
+ matches (Tensor[int64]): an N tensor where N[i] is a matched gt in
+ [0, M - 1] or a negative value indicating that prediction i could not
+ be matched.
+ """
+ if match_quality_matrix.numel() == 0:
+ # empty targets or proposals not supported during training
+ if match_quality_matrix.shape[0] == 0:
+ raise ValueError("No ground-truth boxes available for one of the images during training")
+ else:
+ raise ValueError("No proposal boxes available for one of the images during training")
+
+ # match_quality_matrix is M (gt) x N (predicted)
+ # Max over gt elements (dim 0) to find best gt candidate for each prediction
+ matched_vals, matches = match_quality_matrix.max(dim=0)
+ if self.allow_low_quality_matches:
+ all_matches = matches.clone()
+ else:
+ all_matches = None # type: ignore[assignment]
+
+ # Assign candidate matches with low quality to negative (unassigned) values
+ below_low_threshold = matched_vals < self.low_threshold
+ between_thresholds = (matched_vals >= self.low_threshold) & (matched_vals < self.high_threshold)
+ matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD
+ matches[between_thresholds] = self.BETWEEN_THRESHOLDS
+
+ if self.allow_low_quality_matches:
+ assert all_matches is not None
+ self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)
+
+ return matches
+
+ def set_low_quality_matches_(self, matches: Tensor, all_matches: Tensor, match_quality_matrix: Tensor) -> None:
+ """
+ Produce additional matches for predictions that have only low-quality matches.
+ Specifically, for each ground-truth find the set of predictions that have
+ maximum overlap with it (including ties); for each prediction in that set, if
+ it is unmatched, then match it to the ground-truth with which it has the highest
+ quality value.
+ """
+ # For each gt, find the prediction with which it has highest quality
+ highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
+ # Find highest quality match available, even if it is low, including ties
+ gt_pred_pairs_of_highest_quality = torch.where(match_quality_matrix == highest_quality_foreach_gt[:, None])
+ # Example gt_pred_pairs_of_highest_quality:
+ # tensor([[ 0, 39796],
+ # [ 1, 32055],
+ # [ 1, 32070],
+ # [ 2, 39190],
+ # [ 2, 40255],
+ # [ 3, 40390],
+ # [ 3, 41455],
+ # [ 4, 45470],
+ # [ 5, 45325],
+ # [ 5, 46390]])
+ # Each row is a (gt index, prediction index)
+ # Note how gt items 1, 2, 3, and 5 each have two ties
+
+ pred_inds_to_update = gt_pred_pairs_of_highest_quality[1]
+ matches[pred_inds_to_update] = all_matches[pred_inds_to_update]
+
+
+class SSDMatcher(Matcher):
+ def __init__(self, threshold: float) -> None:
+ super().__init__(threshold, threshold, allow_low_quality_matches=False)
+
+ def __call__(self, match_quality_matrix: Tensor) -> Tensor:
+ matches = super().__call__(match_quality_matrix)
+
+ # For each gt, find the prediction with which it has the highest quality
+ _, highest_quality_pred_foreach_gt = match_quality_matrix.max(dim=1)
+ matches[highest_quality_pred_foreach_gt] = torch.arange(
+ highest_quality_pred_foreach_gt.size(0), dtype=torch.int64, device=highest_quality_pred_foreach_gt.device
+ )
+
+ return matches
+
+
+def overwrite_eps(model: nn.Module, eps: float) -> None:
+ """
+ This method overwrites the default eps values of all the
+ FrozenBatchNorm2d layers of the model with the provided value.
+ This is necessary to address the BC-breaking change introduced
+ by the bug-fix at pytorch/vision#2933. The overwrite is applied
+ only when the pretrained weights are loaded to maintain compatibility
+ with previous versions.
+
+ Args:
+ model (nn.Module): The model on which we perform the overwrite.
+ eps (float): The new value of eps.
+ """
+ for module in model.modules():
+ if isinstance(module, FrozenBatchNorm2d):
+ module.eps = eps
+
+
+def retrieve_out_channels(model: nn.Module, size: Tuple[int, int]) -> List[int]:
+ """
+ This method retrieves the number of output channels of a specific model.
+
+ Args:
+ model (nn.Module): The model for which we estimate the out_channels.
+ It should return a single Tensor or an OrderedDict[Tensor].
+ size (Tuple[int, int]): The size (wxh) of the input.
+
+ Returns:
+ out_channels (List[int]): A list of the output channels of the model.
+ """
+ in_training = model.training
+ model.eval()
+
+ with torch.no_grad():
+ # Use dummy data to retrieve the feature map sizes to avoid hard-coding their values
+ device = next(model.parameters()).device
+ tmp_img = torch.zeros((1, 3, size[1], size[0]), device=device)
+ features = model(tmp_img)
+ if isinstance(features, torch.Tensor):
+ features = OrderedDict([("0", features)])
+ out_channels = [x.size(1) for x in features.values()]
+
+ if in_training:
+ model.train()
+
+ return out_channels
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py
new file mode 100644
index 0000000000..c48576328d
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py
@@ -0,0 +1,348 @@
+import math
+
+import torch
+from torch.jit.annotations import List, Tuple
+from torch import Tensor
+import torchvision
+
+
+# TODO: https://github.com/pytorch/pytorch/issues/26727
+def zeros_like(tensor, dtype):
+ # type: (Tensor, int) -> Tensor
+ return torch.zeros_like(tensor, dtype=dtype, layout=tensor.layout,
+ device=tensor.device, pin_memory=tensor.is_pinned())
+
+
+@torch.jit.script
+class BalancedPositiveNegativeSampler(object):
+ """
+ This class samples batches, ensuring that they contain a fixed proportion of positives
+ """
+
+ def __init__(self, batch_size_per_image, positive_fraction):
+ # type: (int, float)
+ """
+ Arguments:
+ batch_size_per_image (int): number of elements to be selected per image
+ positive_fraction (float): percentace of positive elements per batch
+ """
+ self.batch_size_per_image = batch_size_per_image
+ self.positive_fraction = positive_fraction
+
+ def __call__(self, matched_idxs):
+ # type: (List[Tensor])
+ """
+ Arguments:
+ matched idxs: list of tensors containing -1, 0 or positive values.
+ Each tensor corresponds to a specific image.
+ -1 values are ignored, 0 are considered as negatives and > 0 as
+ positives.
+
+ Returns:
+ pos_idx (list[tensor])
+ neg_idx (list[tensor])
+
+ Returns two lists of binary masks for each image.
+ The first list contains the positive elements that were selected,
+ and the second list the negative example.
+ """
+ pos_idx = []
+ neg_idx = []
+ for matched_idxs_per_image in matched_idxs:
+ positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
+ negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
+
+ num_pos = int(self.batch_size_per_image * self.positive_fraction)
+ # protect against not enough positive examples
+ num_pos = min(positive.numel(), num_pos)
+ num_neg = self.batch_size_per_image - num_pos
+ # protect against not enough negative examples
+ num_neg = min(negative.numel(), num_neg)
+
+ # randomly select positive and negative examples
+ perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
+ perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
+
+ pos_idx_per_image = positive[perm1]
+ neg_idx_per_image = negative[perm2]
+
+ # create binary mask from indices
+ pos_idx_per_image_mask = zeros_like(
+ matched_idxs_per_image, dtype=torch.uint8
+ )
+ neg_idx_per_image_mask = zeros_like(
+ matched_idxs_per_image, dtype=torch.uint8
+ )
+
+ pos_idx_per_image_mask[pos_idx_per_image] = torch.tensor(1, dtype=torch.uint8)
+ neg_idx_per_image_mask[neg_idx_per_image] = torch.tensor(1, dtype=torch.uint8)
+
+ pos_idx.append(pos_idx_per_image_mask)
+ neg_idx.append(neg_idx_per_image_mask)
+
+ return pos_idx, neg_idx
+
+
+@torch.jit.script
+def encode_boxes(reference_boxes, proposals, weights):
+ # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor
+ """
+ Encode a set of proposals with respect to some
+ reference boxes
+
+ Arguments:
+ reference_boxes (Tensor): reference boxes
+ proposals (Tensor): boxes to be encoded
+ """
+
+ # perform some unpacking to make it JIT-fusion friendly
+ wx = weights[0]
+ wy = weights[1]
+ ww = weights[2]
+ wh = weights[3]
+
+ proposals_x1 = proposals[:, 0].unsqueeze(1)
+ proposals_y1 = proposals[:, 1].unsqueeze(1)
+ proposals_x2 = proposals[:, 2].unsqueeze(1)
+ proposals_y2 = proposals[:, 3].unsqueeze(1)
+
+ reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1)
+ reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1)
+ reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1)
+ reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1)
+
+ # implementation starts here
+ ex_widths = proposals_x2 - proposals_x1
+ ex_heights = proposals_y2 - proposals_y1
+ ex_ctr_x = proposals_x1 + 0.5 * ex_widths
+ ex_ctr_y = proposals_y1 + 0.5 * ex_heights
+
+ gt_widths = reference_boxes_x2 - reference_boxes_x1
+ gt_heights = reference_boxes_y2 - reference_boxes_y1
+ gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths
+ gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights
+
+ targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
+ targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
+ targets_dw = ww * torch.log(gt_widths / ex_widths)
+ targets_dh = wh * torch.log(gt_heights / ex_heights)
+
+ targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
+ return targets
+
+
+@torch.jit.script
+class BoxCoder(object):
+ """
+ This class encodes and decodes a set of bounding boxes into
+ the representation used for training the regressors.
+ """
+
+ def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
+ # type: (Tuple[float, float, float, float], float)
+ """
+ Arguments:
+ weights (4-element tuple)
+ bbox_xform_clip (float)
+ """
+ self.weights = weights
+ self.bbox_xform_clip = bbox_xform_clip
+
+ def encode(self, reference_boxes, proposals):
+ # type: (List[Tensor], List[Tensor])
+ boxes_per_image = [len(b) for b in reference_boxes]
+ reference_boxes = torch.cat(reference_boxes, dim=0)
+ proposals = torch.cat(proposals, dim=0)
+ targets = self.encode_single(reference_boxes, proposals)
+ return targets.split(boxes_per_image, 0)
+
+ def encode_single(self, reference_boxes, proposals):
+ """
+ Encode a set of proposals with respect to some
+ reference boxes
+
+ Arguments:
+ reference_boxes (Tensor): reference boxes
+ proposals (Tensor): boxes to be encoded
+ """
+ dtype = reference_boxes.dtype
+ device = reference_boxes.device
+ weights = torch.as_tensor(self.weights, dtype=dtype, device=device)
+ targets = encode_boxes(reference_boxes, proposals, weights)
+
+ return targets
+
+ def decode(self, rel_codes, boxes):
+ # type: (Tensor, List[Tensor])
+ assert isinstance(boxes, (list, tuple))
+ assert isinstance(rel_codes, torch.Tensor)
+ boxes_per_image = [b.size(0) for b in boxes]
+ concat_boxes = torch.cat(boxes, dim=0)
+ box_sum = 0
+ for val in boxes_per_image:
+ box_sum += val
+ pred_boxes = self.decode_single(
+ rel_codes.reshape(box_sum, -1), concat_boxes
+ )
+ return pred_boxes.reshape(box_sum, -1, 4)
+
+ def decode_single(self, rel_codes, boxes):
+ """
+ From a set of original boxes and encoded relative box offsets,
+ get the decoded boxes.
+
+ Arguments:
+ rel_codes (Tensor): encoded boxes
+ boxes (Tensor): reference boxes.
+ """
+
+ boxes = boxes.to(rel_codes.dtype)
+
+ widths = boxes[:, 2] - boxes[:, 0]
+ heights = boxes[:, 3] - boxes[:, 1]
+ ctr_x = boxes[:, 0] + 0.5 * widths
+ ctr_y = boxes[:, 1] + 0.5 * heights
+
+ wx, wy, ww, wh = self.weights
+ dx = rel_codes[:, 0::4] / wx
+ dy = rel_codes[:, 1::4] / wy
+ dw = rel_codes[:, 2::4] / ww
+ dh = rel_codes[:, 3::4] / wh
+
+ # Prevent sending too large values into torch.exp()
+ dw = torch.clamp(dw, max=self.bbox_xform_clip)
+ dh = torch.clamp(dh, max=self.bbox_xform_clip)
+
+ pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
+ pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
+ pred_w = torch.exp(dw) * widths[:, None]
+ pred_h = torch.exp(dh) * heights[:, None]
+
+ pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w
+ pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h
+ pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w
+ pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h
+ pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1)
+ return pred_boxes
+
+
+@torch.jit.script
+class Matcher(object):
+ """
+ This class assigns to each predicted "element" (e.g., a box) a ground-truth
+ element. Each predicted element will have exactly zero or one matches; each
+ ground-truth element may be assigned to zero or more predicted elements.
+
+ Matching is based on the MxN match_quality_matrix, that characterizes how well
+ each (ground-truth, predicted)-pair match. For example, if the elements are
+ boxes, the matrix may contain box IoU overlap values.
+
+ The matcher returns a tensor of size N containing the index of the ground-truth
+ element m that matches to prediction n. If there is no match, a negative value
+ is returned.
+ """
+
+ BELOW_LOW_THRESHOLD = -1
+ BETWEEN_THRESHOLDS = -2
+
+ __annotations__ = {
+ 'BELOW_LOW_THRESHOLD': int,
+ 'BETWEEN_THRESHOLDS': int,
+ }
+
+ def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False):
+ # type: (float, float, bool)
+ """
+ Args:
+ high_threshold (float): quality values greater than or equal to
+ this value are candidate matches.
+ low_threshold (float): a lower quality threshold used to stratify
+ matches into three levels:
+ 1) matches >= high_threshold
+ 2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold)
+ 3) BELOW_LOW_THRESHOLD matches in [0, low_threshold)
+ allow_low_quality_matches (bool): if True, produce additional matches
+ for predictions that have only low-quality match candidates. See
+ set_low_quality_matches_ for more details.
+ """
+ self.BELOW_LOW_THRESHOLD = -1
+ self.BETWEEN_THRESHOLDS = -2
+ assert low_threshold <= high_threshold
+ self.high_threshold = high_threshold
+ self.low_threshold = low_threshold
+ self.allow_low_quality_matches = allow_low_quality_matches
+
+ def __call__(self, match_quality_matrix):
+ """
+ Args:
+ match_quality_matrix (Tensor[float]): an MxN tensor, containing the
+ pairwise quality between M ground-truth elements and N predicted elements.
+
+ Returns:
+ matches (Tensor[int64]): an N tensor where N[i] is a matched gt in
+ [0, M - 1] or a negative value indicating that prediction i could not
+ be matched.
+ """
+ if match_quality_matrix.numel() == 0:
+ # empty targets or proposals not supported during training
+ if match_quality_matrix.shape[0] == 0:
+ raise ValueError(
+ "No ground-truth boxes available for one of the images "
+ "during training")
+ else:
+ raise ValueError(
+ "No proposal boxes available for one of the images "
+ "during training")
+
+ # match_quality_matrix is M (gt) x N (predicted)
+ # Max over gt elements (dim 0) to find best gt candidate for each prediction
+ matched_vals, matches = match_quality_matrix.max(dim=0)
+ if self.allow_low_quality_matches:
+ all_matches = matches.clone()
+ else:
+ all_matches = None
+
+ # Assign candidate matches with low quality to negative (unassigned) values
+ below_low_threshold = matched_vals < self.low_threshold
+ between_thresholds = (matched_vals >= self.low_threshold) & (
+ matched_vals < self.high_threshold
+ )
+ matches[below_low_threshold] = torch.tensor(self.BELOW_LOW_THRESHOLD)
+ matches[between_thresholds] = torch.tensor(self.BETWEEN_THRESHOLDS)
+
+ if self.allow_low_quality_matches:
+ assert all_matches is not None
+ self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)
+
+ return matches
+
+ def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):
+ """
+ Produce additional matches for predictions that have only low-quality matches.
+ Specifically, for each ground-truth find the set of predictions that have
+ maximum overlap with it (including ties); for each prediction in that set, if
+ it is unmatched, then match it to the ground-truth with which it has the highest
+ quality value.
+ """
+ # For each gt, find the prediction with which it has highest quality
+ highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
+ # Find highest quality match available, even if it is low, including ties
+ gt_pred_pairs_of_highest_quality = torch.nonzero(
+ match_quality_matrix == highest_quality_foreach_gt[:, None]
+ )
+ # Example gt_pred_pairs_of_highest_quality:
+ # tensor([[ 0, 39796],
+ # [ 1, 32055],
+ # [ 1, 32070],
+ # [ 2, 39190],
+ # [ 2, 40255],
+ # [ 3, 40390],
+ # [ 3, 41455],
+ # [ 4, 45470],
+ # [ 5, 45325],
+ # [ 5, 46390]])
+ # Each row is a (gt index, prediction index)
+ # Note how gt items 1, 2, 3, and 5 each have two ties
+
+ pred_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]
+ matches[pred_inds_to_update] = all_matches[pred_inds_to_update]
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py
new file mode 100644
index 0000000000..1d6298eabe
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py
@@ -0,0 +1,279 @@
+import math
+from typing import List, Optional
+
+import torch
+from torch import nn, Tensor
+
+from .image_list import ImageList
+import numpy as np
+
+class AnchorGenerator(nn.Module):
+ """
+ Module that generates anchors for a set of feature maps and
+ image sizes.
+
+ The module support computing anchors at multiple sizes and aspect ratios
+ per feature map. This module assumes aspect ratio = height / width for
+ each anchor.
+
+ sizes and aspect_ratios should have the same number of elements, and it should
+ correspond to the number of feature maps.
+
+ sizes[i] and aspect_ratios[i] can have an arbitrary number of elements,
+ and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors
+ per spatial location for feature map i.
+
+ Args:
+ sizes (Tuple[Tuple[int]]):
+ aspect_ratios (Tuple[Tuple[float]]):
+ """
+
+ __annotations__ = {
+ "cell_anchors": List[torch.Tensor],
+ }
+
+ def __init__(
+ self,
+ sizes=((128, 256, 512),),
+ aspect_ratios=((0.5, 1.0, 2.0),),
+ ):
+ super().__init__()
+
+ if not isinstance(sizes[0], (list, tuple)):
+ # TODO change this
+ sizes = tuple((s,) for s in sizes)
+ if not isinstance(aspect_ratios[0], (list, tuple)):
+ aspect_ratios = (aspect_ratios,) * len(sizes)
+
+ assert len(sizes) == len(aspect_ratios)
+
+ self.sizes = sizes
+ self.aspect_ratios = aspect_ratios
+ self.cell_anchors = [
+ self.generate_anchors(size, aspect_ratio) for size, aspect_ratio in zip(sizes, aspect_ratios)
+ ]
+
+ # TODO: https://github.com/pytorch/pytorch/issues/26792
+ # For every (aspect_ratios, scales) combination, output a zero-centered anchor with those values.
+ # (scales, aspect_ratios) are usually an element of zip(self.scales, self.aspect_ratios)
+ # This method assumes aspect ratio = height / width for an anchor.
+ def generate_anchors(
+ self,
+ scales: List[int],
+ aspect_ratios: List[float],
+ dtype: torch.dtype = torch.float32,
+ device: torch.device = torch.device("cpu"),
+ ):
+ scales = torch.as_tensor(scales, dtype=dtype, device=device)
+ aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)
+ h_ratios = torch.sqrt(aspect_ratios)
+ w_ratios = 1 / h_ratios
+
+ ws = (w_ratios[:, None] * scales[None, :]).view(-1)
+ hs = (h_ratios[:, None] * scales[None, :]).view(-1)
+
+ base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2
+ return base_anchors.round()
+
+ def set_cell_anchors(self, dtype: torch.dtype, device: torch.device):
+ self.cell_anchors = [cell_anchor.to(dtype=dtype, device=device) for cell_anchor in self.cell_anchors]
+
+ def num_anchors_per_location(self):
+ return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)]
+
+ # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2),
+ # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a.
+ def grid_anchors(self, grid_sizes: List[List[int]], strides: List[List[Tensor]]) -> List[Tensor]:
+ anchors = []
+ cell_anchors = self.cell_anchors
+ assert cell_anchors is not None
+
+ if not (len(grid_sizes) == len(strides) == len(cell_anchors)):
+ raise ValueError(
+ "Anchors should be Tuple[Tuple[int]] because each feature "
+ "map could potentially have different sizes and aspect ratios. "
+ "There needs to be a match between the number of "
+ "feature maps passed and the number of sizes / aspect ratios specified."
+ )
+
+ for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors):
+ grid_height, grid_width = size
+ stride_height, stride_width = stride
+ device = base_anchors.device
+
+ # For output anchor, compute [x_center, y_center, x_center, y_center]
+ shifts_x = torch.arange(0, grid_width, dtype=torch.int32, device=device) * stride_width
+ shifts_y = torch.arange(0, grid_height, dtype=torch.int32, device=device) * stride_height
+ # shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x, indexing="ij")
+ shift_y, shift_x = np.meshgrid(shifts_y, shifts_x, indexing="ij")
+ shift_y = torch.from_numpy(shift_y)
+ shift_x = torch.from_numpy(shift_x)
+
+ shift_x = shift_x.reshape(-1)
+ shift_y = shift_y.reshape(-1)
+ shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1)
+
+ # For every (base anchor, output anchor) pair,
+ # offset each zero-centered base anchor by the center of the output anchor.
+ anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4))
+
+ return anchors
+
+ def forward(self, image_list: ImageList, feature_maps: List[Tensor]) -> List[Tensor]:
+ grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps]
+ image_size = image_list.tensors.shape[-2:]
+ dtype, device = feature_maps[0].dtype, feature_maps[0].device
+ strides = [
+ [
+ torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device),
+ torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device),
+ ]
+ for g in grid_sizes
+ ]
+ self.set_cell_anchors(dtype, device)
+ anchors_over_all_feature_maps = self.grid_anchors(grid_sizes, strides)
+ anchors: List[List[torch.Tensor]] = []
+ for _ in range(len(image_list.image_sizes)):
+ anchors_in_image = [anchors_per_feature_map for anchors_per_feature_map in anchors_over_all_feature_maps]
+ anchors.append(anchors_in_image)
+ anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]
+ return anchors
+
+
+class DefaultBoxGenerator(nn.Module):
+ """
+ This module generates the default boxes of SSD for a set of feature maps and image sizes.
+
+ Args:
+ aspect_ratios (List[List[int]]): A list with all the aspect ratios used in each feature map.
+ min_ratio (float): The minimum scale :math:`\text{s}_{\text{min}}` of the default boxes used in the estimation
+ of the scales of each feature map. It is used only if the ``scales`` parameter is not provided.
+ max_ratio (float): The maximum scale :math:`\text{s}_{\text{max}}` of the default boxes used in the estimation
+ of the scales of each feature map. It is used only if the ``scales`` parameter is not provided.
+ scales (List[float]], optional): The scales of the default boxes. If not provided it will be estimated using
+ the ``min_ratio`` and ``max_ratio`` parameters.
+ steps (List[int]], optional): It's a hyper-parameter that affects the tiling of defalt boxes. If not provided
+ it will be estimated from the data.
+ clip (bool): Whether the standardized values of default boxes should be clipped between 0 and 1. The clipping
+ is applied while the boxes are encoded in format ``(cx, cy, w, h)``.
+ """
+
+ def __init__(
+ self,
+ aspect_ratios: List[List[int]],
+ min_ratio: float = 0.15,
+ max_ratio: float = 0.9,
+ scales: Optional[List[float]] = None,
+ steps: Optional[List[int]] = None,
+ clip: bool = True,
+ ):
+ super().__init__()
+ if steps is not None:
+ assert len(aspect_ratios) == len(steps)
+ self.aspect_ratios = aspect_ratios
+ self.steps = steps
+ self.clip = clip
+ num_outputs = len(aspect_ratios)
+
+ # Estimation of default boxes scales
+ if scales is None:
+ if num_outputs > 1:
+ range_ratio = max_ratio - min_ratio
+ self.scales = [min_ratio + range_ratio * k / (num_outputs - 1.0) for k in range(num_outputs)]
+ self.scales.append(1.0)
+ else:
+ self.scales = [min_ratio, max_ratio]
+ else:
+ self.scales = scales
+
+ self._wh_pairs = self._generate_wh_pairs(num_outputs)
+
+ def _generate_wh_pairs(
+ self, num_outputs: int, dtype: torch.dtype = torch.float32, device: torch.device = torch.device("cpu")
+ ) -> List[Tensor]:
+ _wh_pairs: List[Tensor] = []
+ for k in range(num_outputs):
+ # Adding the 2 default width-height pairs for aspect ratio 1 and scale s'k
+ s_k = self.scales[k]
+ s_prime_k = math.sqrt(self.scales[k] * self.scales[k + 1])
+ wh_pairs = [[s_k, s_k], [s_prime_k, s_prime_k]]
+
+ # Adding 2 pairs for each aspect ratio of the feature map k
+ for ar in self.aspect_ratios[k]:
+ sq_ar = math.sqrt(ar)
+ w = self.scales[k] * sq_ar
+ h = self.scales[k] / sq_ar
+ wh_pairs.extend([[w, h], [h, w]])
+
+ _wh_pairs.append(torch.as_tensor(wh_pairs, dtype=dtype, device=device))
+ return _wh_pairs
+
+ def num_anchors_per_location(self):
+ # Estimate num of anchors based on aspect ratios: 2 default boxes + 2 * ratios of feaure map.
+ return [2 + 2 * len(r) for r in self.aspect_ratios]
+
+ # Default Boxes calculation based on page 6 of SSD paper
+ def _grid_default_boxes(
+ self, grid_sizes: List[List[int]], image_size: List[int], dtype: torch.dtype = torch.float32
+ ) -> Tensor:
+ default_boxes = []
+ for k, f_k in enumerate(grid_sizes):
+ # Now add the default boxes for each width-height pair
+ if self.steps is not None:
+ x_f_k = image_size[0] / self.steps[k]
+ y_f_k = image_size[1] / self.steps[k]
+ else:
+ y_f_k, x_f_k = f_k
+
+ shifts_x = ((torch.arange(0, f_k[1]) + 0.5) / x_f_k).to(dtype=dtype)
+ shifts_y = ((torch.arange(0, f_k[0]) + 0.5) / y_f_k).to(dtype=dtype)
+ # shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x, indexing="ij") #origin
+ shift_y, shift_x = np.meshgrid(shifts_y, shifts_x, indexing="ij")
+ shift_y = torch.from_numpy(shift_y)
+ shift_x = torch.from_numpy(shift_x)
+
+ shift_x = shift_x.reshape(-1)
+ shift_y = shift_y.reshape(-1)
+
+ shifts = torch.stack((shift_x, shift_y) * len(self._wh_pairs[k]), dim=-1).reshape(-1, 2)
+ # Clipping the default boxes while the boxes are encoded in format (cx, cy, w, h)
+ _wh_pair = self._wh_pairs[k].clamp(min=0, max=1) if self.clip else self._wh_pairs[k]
+ wh_pairs = _wh_pair.repeat((f_k[0] * f_k[1]), 1)
+
+ # default_box = torch.cat((shifts, wh_pairs), dim=1)
+ default_box = torch.cat((shifts, wh_pairs.half()), dim=1)
+
+ default_boxes.append(default_box)
+
+ return torch.cat(default_boxes, dim=0)
+
+ def __repr__(self) -> str:
+ s = self.__class__.__name__ + "("
+ s += "aspect_ratios={aspect_ratios}"
+ s += ", clip={clip}"
+ s += ", scales={scales}"
+ s += ", steps={steps}"
+ s += ")"
+ return s.format(**self.__dict__)
+
+ def forward(self, image_list: ImageList, feature_maps: List[Tensor]) -> List[Tensor]:
+ grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps]
+ image_size = image_list.tensors.shape[-2:]
+ dtype, device = feature_maps[0].dtype, feature_maps[0].device
+ default_boxes = self._grid_default_boxes(grid_sizes, image_size, dtype=dtype)
+ default_boxes = default_boxes.to(device)
+
+ dboxes = []
+ for _ in image_list.image_sizes:
+ dboxes_in_image = default_boxes
+ dboxes_in_image = torch.cat(
+ [
+ dboxes_in_image[:, :2] - 0.5 * dboxes_in_image[:, 2:],
+ dboxes_in_image[:, :2] + 0.5 * dboxes_in_image[:, 2:],
+ ],
+ -1,
+ )
+ dboxes_in_image[:, 0::2] *= image_size[1]
+ dboxes_in_image[:, 1::2] *= image_size[0]
+ dboxes.append(dboxes_in_image)
+ return dboxes
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py
new file mode 100644
index 0000000000..bac7cb6c74
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py
@@ -0,0 +1,270 @@
+import math
+from typing import List, Optional
+
+import torch
+from torch import nn, Tensor
+
+from .image_list import ImageList
+
+
+class AnchorGenerator(nn.Module):
+ """
+ Module that generates anchors for a set of feature maps and
+ image sizes.
+
+ The module support computing anchors at multiple sizes and aspect ratios
+ per feature map. This module assumes aspect ratio = height / width for
+ each anchor.
+
+ sizes and aspect_ratios should have the same number of elements, and it should
+ correspond to the number of feature maps.
+
+ sizes[i] and aspect_ratios[i] can have an arbitrary number of elements,
+ and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors
+ per spatial location for feature map i.
+
+ Args:
+ sizes (Tuple[Tuple[int]]):
+ aspect_ratios (Tuple[Tuple[float]]):
+ """
+
+ __annotations__ = {
+ "cell_anchors": List[torch.Tensor],
+ }
+
+ def __init__(
+ self,
+ sizes=((128, 256, 512),),
+ aspect_ratios=((0.5, 1.0, 2.0),),
+ ):
+ super().__init__()
+
+ if not isinstance(sizes[0], (list, tuple)):
+ # TODO change this
+ sizes = tuple((s,) for s in sizes)
+ if not isinstance(aspect_ratios[0], (list, tuple)):
+ aspect_ratios = (aspect_ratios,) * len(sizes)
+
+ assert len(sizes) == len(aspect_ratios)
+
+ self.sizes = sizes
+ self.aspect_ratios = aspect_ratios
+ self.cell_anchors = [
+ self.generate_anchors(size, aspect_ratio) for size, aspect_ratio in zip(sizes, aspect_ratios)
+ ]
+
+ # TODO: https://github.com/pytorch/pytorch/issues/26792
+ # For every (aspect_ratios, scales) combination, output a zero-centered anchor with those values.
+ # (scales, aspect_ratios) are usually an element of zip(self.scales, self.aspect_ratios)
+ # This method assumes aspect ratio = height / width for an anchor.
+ def generate_anchors(
+ self,
+ scales: List[int],
+ aspect_ratios: List[float],
+ dtype: torch.dtype = torch.float32,
+ device: torch.device = torch.device("cpu"),
+ ):
+ scales = torch.as_tensor(scales, dtype=dtype, device=device)
+ aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)
+ h_ratios = torch.sqrt(aspect_ratios)
+ w_ratios = 1 / h_ratios
+
+ ws = (w_ratios[:, None] * scales[None, :]).view(-1)
+ hs = (h_ratios[:, None] * scales[None, :]).view(-1)
+
+ base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2
+ return base_anchors.round()
+
+ def set_cell_anchors(self, dtype: torch.dtype, device: torch.device):
+ self.cell_anchors = [cell_anchor.to(dtype=dtype, device=device) for cell_anchor in self.cell_anchors]
+
+ def num_anchors_per_location(self):
+ return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)]
+
+ # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2),
+ # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a.
+ def grid_anchors(self, grid_sizes: List[List[int]], strides: List[List[Tensor]]) -> List[Tensor]:
+ anchors = []
+ cell_anchors = self.cell_anchors
+ assert cell_anchors is not None
+
+ if not (len(grid_sizes) == len(strides) == len(cell_anchors)):
+ raise ValueError(
+ "Anchors should be Tuple[Tuple[int]] because each feature "
+ "map could potentially have different sizes and aspect ratios. "
+ "There needs to be a match between the number of "
+ "feature maps passed and the number of sizes / aspect ratios specified."
+ )
+
+ for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors):
+ grid_height, grid_width = size
+ stride_height, stride_width = stride
+ device = base_anchors.device
+
+ # For output anchor, compute [x_center, y_center, x_center, y_center]
+ shifts_x = torch.arange(0, grid_width, dtype=torch.int32, device=device) * stride_width
+ shifts_y = torch.arange(0, grid_height, dtype=torch.int32, device=device) * stride_height
+ shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x, indexing="ij")
+ shift_x = shift_x.reshape(-1)
+ shift_y = shift_y.reshape(-1)
+ shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1)
+
+ # For every (base anchor, output anchor) pair,
+ # offset each zero-centered base anchor by the center of the output anchor.
+ anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4))
+
+ return anchors
+
+ def forward(self, image_list: ImageList, feature_maps: List[Tensor]) -> List[Tensor]:
+ grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps]
+ image_size = image_list.tensors.shape[-2:]
+ dtype, device = feature_maps[0].dtype, feature_maps[0].device
+ strides = [
+ [
+ torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device),
+ torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device),
+ ]
+ for g in grid_sizes
+ ]
+ self.set_cell_anchors(dtype, device)
+ anchors_over_all_feature_maps = self.grid_anchors(grid_sizes, strides)
+ anchors: List[List[torch.Tensor]] = []
+ for _ in range(len(image_list.image_sizes)):
+ anchors_in_image = [anchors_per_feature_map for anchors_per_feature_map in anchors_over_all_feature_maps]
+ anchors.append(anchors_in_image)
+ anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]
+ return anchors
+
+
+class DefaultBoxGenerator(nn.Module):
+ """
+ This module generates the default boxes of SSD for a set of feature maps and image sizes.
+
+ Args:
+ aspect_ratios (List[List[int]]): A list with all the aspect ratios used in each feature map.
+ min_ratio (float): The minimum scale :math:`\text{s}_{\text{min}}` of the default boxes used in the estimation
+ of the scales of each feature map. It is used only if the ``scales`` parameter is not provided.
+ max_ratio (float): The maximum scale :math:`\text{s}_{\text{max}}` of the default boxes used in the estimation
+ of the scales of each feature map. It is used only if the ``scales`` parameter is not provided.
+ scales (List[float]], optional): The scales of the default boxes. If not provided it will be estimated using
+ the ``min_ratio`` and ``max_ratio`` parameters.
+ steps (List[int]], optional): It's a hyper-parameter that affects the tiling of defalt boxes. If not provided
+ it will be estimated from the data.
+ clip (bool): Whether the standardized values of default boxes should be clipped between 0 and 1. The clipping
+ is applied while the boxes are encoded in format ``(cx, cy, w, h)``.
+ """
+
+ def __init__(
+ self,
+ aspect_ratios: List[List[int]],
+ min_ratio: float = 0.15,
+ max_ratio: float = 0.9,
+ scales: Optional[List[float]] = None,
+ steps: Optional[List[int]] = None,
+ clip: bool = True,
+ ):
+ super().__init__()
+ if steps is not None:
+ assert len(aspect_ratios) == len(steps)
+ self.aspect_ratios = aspect_ratios
+ self.steps = steps
+ self.clip = clip
+ num_outputs = len(aspect_ratios)
+
+ # Estimation of default boxes scales
+ if scales is None:
+ if num_outputs > 1:
+ range_ratio = max_ratio - min_ratio
+ self.scales = [min_ratio + range_ratio * k / (num_outputs - 1.0) for k in range(num_outputs)]
+ self.scales.append(1.0)
+ else:
+ self.scales = [min_ratio, max_ratio]
+ else:
+ self.scales = scales
+
+ self._wh_pairs = self._generate_wh_pairs(num_outputs)
+
+ def _generate_wh_pairs(
+ self, num_outputs: int, dtype: torch.dtype = torch.float32, device: torch.device = torch.device("cpu")
+ ) -> List[Tensor]:
+ _wh_pairs: List[Tensor] = []
+ for k in range(num_outputs):
+ # Adding the 2 default width-height pairs for aspect ratio 1 and scale s'k
+ s_k = self.scales[k]
+ s_prime_k = math.sqrt(self.scales[k] * self.scales[k + 1])
+ wh_pairs = [[s_k, s_k], [s_prime_k, s_prime_k]]
+
+ # Adding 2 pairs for each aspect ratio of the feature map k
+ for ar in self.aspect_ratios[k]:
+ sq_ar = math.sqrt(ar)
+ w = self.scales[k] * sq_ar
+ h = self.scales[k] / sq_ar
+ wh_pairs.extend([[w, h], [h, w]])
+
+ _wh_pairs.append(torch.as_tensor(wh_pairs, dtype=dtype, device=device))
+ return _wh_pairs
+
+ def num_anchors_per_location(self):
+ # Estimate num of anchors based on aspect ratios: 2 default boxes + 2 * ratios of feaure map.
+ return [2 + 2 * len(r) for r in self.aspect_ratios]
+
+ # Default Boxes calculation based on page 6 of SSD paper
+ def _grid_default_boxes(
+ self, grid_sizes: List[List[int]], image_size: List[int], dtype: torch.dtype = torch.float32
+ ) -> Tensor:
+ default_boxes = []
+ for k, f_k in enumerate(grid_sizes):
+ # Now add the default boxes for each width-height pair
+ if self.steps is not None:
+ x_f_k = image_size[0] / self.steps[k]
+ y_f_k = image_size[1] / self.steps[k]
+ else:
+ y_f_k, x_f_k = f_k
+
+ shifts_x = ((torch.arange(0, f_k[1]) + 0.5) / x_f_k).to(dtype=dtype)
+ shifts_y = ((torch.arange(0, f_k[0]) + 0.5) / y_f_k).to(dtype=dtype)
+ shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x, indexing="ij")
+ shift_x = shift_x.reshape(-1)
+ shift_y = shift_y.reshape(-1)
+
+ shifts = torch.stack((shift_x, shift_y) * len(self._wh_pairs[k]), dim=-1).reshape(-1, 2)
+ # Clipping the default boxes while the boxes are encoded in format (cx, cy, w, h)
+ _wh_pair = self._wh_pairs[k].clamp(min=0, max=1) if self.clip else self._wh_pairs[k]
+ wh_pairs = _wh_pair.repeat((f_k[0] * f_k[1]), 1)
+
+ default_box = torch.cat((shifts, wh_pairs), dim=1)
+
+ default_boxes.append(default_box)
+
+ return torch.cat(default_boxes, dim=0)
+
+ def __repr__(self) -> str:
+ s = self.__class__.__name__ + "("
+ s += "aspect_ratios={aspect_ratios}"
+ s += ", clip={clip}"
+ s += ", scales={scales}"
+ s += ", steps={steps}"
+ s += ")"
+ return s.format(**self.__dict__)
+
+ def forward(self, image_list: ImageList, feature_maps: List[Tensor]) -> List[Tensor]:
+ grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps]
+ image_size = image_list.tensors.shape[-2:]
+ dtype, device = feature_maps[0].dtype, feature_maps[0].device
+ default_boxes = self._grid_default_boxes(grid_sizes, image_size, dtype=dtype)
+ default_boxes = default_boxes.to(device)
+
+ dboxes = []
+ for _ in image_list.image_sizes:
+ dboxes_in_image = default_boxes
+ dboxes_in_image = torch.cat(
+ [
+ dboxes_in_image[:, :2] - 0.5 * dboxes_in_image[:, 2:],
+ dboxes_in_image[:, :2] + 0.5 * dboxes_in_image[:, 2:],
+ ],
+ -1,
+ )
+ dboxes_in_image[:, 0::2] *= image_size[1]
+ dboxes_in_image[:, 1::2] *= image_size[0]
+ dboxes.append(dboxes_in_image)
+ return dboxes
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py
new file mode 100644
index 0000000000..54fdc4c05c
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py
@@ -0,0 +1,212 @@
+import warnings
+from typing import Callable, Dict, Optional, List, Union
+
+from torch import nn, Tensor
+from torchvision.ops import misc as misc_nn_ops
+from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool, ExtraFPNBlock
+
+from .. import mobilenet
+from .. import resnet
+from .._utils import IntermediateLayerGetter
+
+
+class BackboneWithFPN(nn.Module):
+ """
+ Adds a FPN on top of a model.
+ Internally, it uses torchvision.models._utils.IntermediateLayerGetter to
+ extract a submodel that returns the feature maps specified in return_layers.
+ The same limitations of IntermediateLayerGetter apply here.
+ Args:
+ backbone (nn.Module)
+ return_layers (Dict[name, new_name]): a dict containing the names
+ of the modules for which the activations will be returned as
+ the key of the dict, and the value of the dict is the name
+ of the returned activation (which the user can specify).
+ in_channels_list (List[int]): number of channels for each feature map
+ that is returned, in the order they are present in the OrderedDict
+ out_channels (int): number of channels in the FPN.
+ Attributes:
+ out_channels (int): the number of channels in the FPN
+ """
+
+ def __init__(
+ self,
+ backbone: nn.Module,
+ return_layers: Dict[str, str],
+ in_channels_list: List[int],
+ out_channels: int,
+ extra_blocks: Optional[ExtraFPNBlock] = None,
+ ) -> None:
+ super().__init__()
+
+ if extra_blocks is None:
+ extra_blocks = LastLevelMaxPool()
+
+ self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
+ self.fpn = FeaturePyramidNetwork(
+ in_channels_list=in_channels_list,
+ out_channels=out_channels,
+ extra_blocks=extra_blocks,
+ )
+ self.out_channels = out_channels
+
+ def forward(self, x: Tensor) -> Dict[str, Tensor]:
+ x = self.body(x)
+ x = self.fpn(x)
+ return x
+
+
+def resnet_fpn_backbone(
+ backbone_name: str,
+ pretrained: bool,
+ norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d,
+ trainable_layers: int = 3,
+ returned_layers: Optional[List[int]] = None,
+ extra_blocks: Optional[ExtraFPNBlock] = None,
+) -> BackboneWithFPN:
+ """
+ Constructs a specified ResNet backbone with FPN on top. Freezes the specified number of layers in the backbone.
+
+ Examples::
+
+ >>> from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
+ >>> backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers=3)
+ >>> # get some dummy image
+ >>> x = torch.rand(1,3,64,64)
+ >>> # compute the output
+ >>> output = backbone(x)
+ >>> print([(k, v.shape) for k, v in output.items()])
+ >>> # returns
+ >>> [('0', torch.Size([1, 256, 16, 16])),
+ >>> ('1', torch.Size([1, 256, 8, 8])),
+ >>> ('2', torch.Size([1, 256, 4, 4])),
+ >>> ('3', torch.Size([1, 256, 2, 2])),
+ >>> ('pool', torch.Size([1, 256, 1, 1]))]
+
+ Args:
+ backbone_name (string): resnet architecture. Possible values are 'ResNet', 'resnet18', 'resnet34', 'resnet50',
+ 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2'
+ pretrained (bool): If True, returns a model with backbone pre-trained on Imagenet
+ norm_layer (callable): it is recommended to use the default value. For details visit:
+ (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267)
+ trainable_layers (int): number of trainable (not frozen) resnet layers starting from final block.
+ Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
+ returned_layers (list of int): The layers of the network to return. Each entry must be in ``[1, 4]``.
+ By default all layers are returned.
+ extra_blocks (ExtraFPNBlock or None): if provided, extra operations will
+ be performed. It is expected to take the fpn features, the original
+ features and the names of the original features as input, and returns
+ a new list of feature maps and their corresponding names. By
+ default a ``LastLevelMaxPool`` is used.
+ """
+ backbone = resnet.__dict__[backbone_name](pretrained=pretrained, norm_layer=norm_layer)
+ return _resnet_fpn_extractor(backbone, trainable_layers, returned_layers, extra_blocks)
+
+
+def _resnet_fpn_extractor(
+ backbone: resnet.ResNet,
+ trainable_layers: int,
+ returned_layers: Optional[List[int]] = None,
+ extra_blocks: Optional[ExtraFPNBlock] = None,
+) -> BackboneWithFPN:
+
+ # select layers that wont be frozen
+ assert 0 <= trainable_layers <= 5
+ layers_to_train = ["layer4", "layer3", "layer2", "layer1", "conv1"][:trainable_layers]
+ if trainable_layers == 5:
+ layers_to_train.append("bn1")
+ for name, parameter in backbone.named_parameters():
+ if all([not name.startswith(layer) for layer in layers_to_train]):
+ parameter.requires_grad_(False)
+
+ if extra_blocks is None:
+ extra_blocks = LastLevelMaxPool()
+
+ if returned_layers is None:
+ returned_layers = [1, 2, 3, 4]
+ assert min(returned_layers) > 0 and max(returned_layers) < 5
+ return_layers = {f"layer{k}": str(v) for v, k in enumerate(returned_layers)}
+
+ in_channels_stage2 = backbone.inplanes // 8
+ in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers]
+ out_channels = 256
+ return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks)
+
+
+def _validate_trainable_layers(
+ pretrained: bool,
+ trainable_backbone_layers: Optional[int],
+ max_value: int,
+ default_value: int,
+) -> int:
+ # don't freeze any layers if pretrained model or backbone is not used
+ if not pretrained:
+ if trainable_backbone_layers is not None:
+ warnings.warn(
+ "Changing trainable_backbone_layers has not effect if "
+ "neither pretrained nor pretrained_backbone have been set to True, "
+ f"falling back to trainable_backbone_layers={max_value} so that all layers are trainable"
+ )
+ trainable_backbone_layers = max_value
+
+ # by default freeze first blocks
+ if trainable_backbone_layers is None:
+ trainable_backbone_layers = default_value
+ assert 0 <= trainable_backbone_layers <= max_value
+ return trainable_backbone_layers
+
+
+def mobilenet_backbone(
+ backbone_name: str,
+ pretrained: bool,
+ fpn: bool,
+ norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d,
+ trainable_layers: int = 2,
+ returned_layers: Optional[List[int]] = None,
+ extra_blocks: Optional[ExtraFPNBlock] = None,
+) -> nn.Module:
+ backbone = mobilenet.__dict__[backbone_name](pretrained=pretrained, norm_layer=norm_layer)
+ return _mobilenet_extractor(backbone, fpn, trainable_layers, returned_layers, extra_blocks)
+
+
+def _mobilenet_extractor(
+ backbone: Union[mobilenet.MobileNetV2, mobilenet.MobileNetV3],
+ fpn: bool,
+ trainable_layers,
+ returned_layers: Optional[List[int]] = None,
+ extra_blocks: Optional[ExtraFPNBlock] = None,
+) -> nn.Module:
+ backbone = backbone.features
+ # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks.
+ # The first and last blocks are always included because they are the C0 (conv1) and Cn.
+ stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1]
+ num_stages = len(stage_indices)
+
+ # find the index of the layer from which we wont freeze
+ assert 0 <= trainable_layers <= num_stages
+ freeze_before = len(backbone) if trainable_layers == 0 else stage_indices[num_stages - trainable_layers]
+
+ for b in backbone[:freeze_before]:
+ for parameter in b.parameters():
+ parameter.requires_grad_(False)
+
+ out_channels = 256
+ if fpn:
+ if extra_blocks is None:
+ extra_blocks = LastLevelMaxPool()
+
+ if returned_layers is None:
+ returned_layers = [num_stages - 2, num_stages - 1]
+ assert min(returned_layers) >= 0 and max(returned_layers) < num_stages
+ return_layers = {f"{stage_indices[k]}": str(v) for v, k in enumerate(returned_layers)}
+
+ in_channels_list = [backbone[stage_indices[i]].out_channels for i in returned_layers]
+ return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks)
+ else:
+ m = nn.Sequential(
+ backbone,
+ # depthwise linear combination of channels to reduce their size
+ nn.Conv2d(backbone[-1].out_channels, out_channels, 1),
+ )
+ m.out_channels = out_channels # type: ignore[assignment]
+ return m
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py
new file mode 100644
index 0000000000..f5335c451d
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py
@@ -0,0 +1,63 @@
+from collections import OrderedDict
+from torch import nn
+from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool
+
+from torchvision.ops import misc as misc_nn_ops
+from .._utils import IntermediateLayerGetter
+from .. import resnet
+
+
+class BackboneWithFPN(nn.Module):
+ """
+ Adds a FPN on top of a model.
+ Internally, it uses torchvision.models._utils.IntermediateLayerGetter to
+ extract a submodel that returns the feature maps specified in return_layers.
+ The same limitations of IntermediatLayerGetter apply here.
+ Arguments:
+ backbone (nn.Module)
+ return_layers (Dict[name, new_name]): a dict containing the names
+ of the modules for which the activations will be returned as
+ the key of the dict, and the value of the dict is the name
+ of the returned activation (which the user can specify).
+ in_channels_list (List[int]): number of channels for each feature map
+ that is returned, in the order they are present in the OrderedDict
+ out_channels (int): number of channels in the FPN.
+ Attributes:
+ out_channels (int): the number of channels in the FPN
+ """
+ def __init__(self, backbone, return_layers, in_channels_list, out_channels):
+ super(BackboneWithFPN, self).__init__()
+ self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
+ self.fpn = FeaturePyramidNetwork(
+ in_channels_list=in_channels_list,
+ out_channels=out_channels,
+ extra_blocks=LastLevelMaxPool(),
+ )
+ self.out_channels = out_channels
+
+ def forward(self, x):
+ x = self.body(x)
+ x = self.fpn(x)
+ return x
+
+
+def resnet_fpn_backbone(backbone_name, pretrained):
+ backbone = resnet.__dict__[backbone_name](
+ pretrained=pretrained,
+ norm_layer=misc_nn_ops.FrozenBatchNorm2d)
+ # freeze layers
+ for name, parameter in backbone.named_parameters():
+ if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
+ parameter.requires_grad_(False)
+
+ return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}
+
+ in_channels_stage2 = backbone.inplanes // 8
+ in_channels_list = [
+ in_channels_stage2,
+ in_channels_stage2 * 2,
+ in_channels_stage2 * 4,
+ in_channels_stage2 * 8,
+ ]
+ out_channels = 256
+ return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py
new file mode 100644
index 0000000000..92366352b9
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py
@@ -0,0 +1,355 @@
+from collections import OrderedDict
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+from torchvision.ops import misc as misc_nn_ops
+from torchvision.ops import MultiScaleRoIAlign
+
+from ..utils import load_state_dict_from_url
+
+from .generalized_rcnn import GeneralizedRCNN
+from .rpn import AnchorGenerator, RPNHead, RegionProposalNetwork
+from .roi_heads import RoIHeads
+from .transform import GeneralizedRCNNTransform
+from .backbone_utils import resnet_fpn_backbone
+
+
+__all__ = [
+ "FasterRCNN", "fasterrcnn_resnet50_fpn",
+]
+
+
+class FasterRCNN(GeneralizedRCNN):
+ """
+ Implements Faster R-CNN.
+
+ The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
+ image, and should be in 0-1 range. Different images can have different sizes.
+
+ The behavior of the model changes depending if it is in training or evaluation mode.
+
+ During training, the model expects both the input tensors, as well as a targets (list of dictionary),
+ containing:
+ - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values of x
+ between 0 and W and values of y between 0 and H
+ - labels (Int64Tensor[N]): the class label for each ground-truth box
+
+ The model returns a Dict[Tensor] during training, containing the classification and regression
+ losses for both the RPN and the R-CNN.
+
+ During inference, the model requires only the input tensors, and returns the post-processed
+ predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
+ follows:
+ - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values of x
+ between 0 and W and values of y between 0 and H
+ - labels (Int64Tensor[N]): the predicted labels for each image
+ - scores (Tensor[N]): the scores or each prediction
+
+ Arguments:
+ backbone (nn.Module): the network used to compute the features for the model.
+ It should contain a out_channels attribute, which indicates the number of output
+ channels that each feature map has (and it should be the same for all feature maps).
+ The backbone should return a single Tensor or and OrderedDict[Tensor].
+ num_classes (int): number of output classes of the model (including the background).
+ If box_predictor is specified, num_classes should be None.
+ min_size (int): minimum size of the image to be rescaled before feeding it to the backbone
+ max_size (int): maximum size of the image to be rescaled before feeding it to the backbone
+ image_mean (Tuple[float, float, float]): mean values used for input normalization.
+ They are generally the mean values of the dataset on which the backbone has been trained
+ on
+ image_std (Tuple[float, float, float]): std values used for input normalization.
+ They are generally the std values of the dataset on which the backbone has been trained on
+ rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
+ maps.
+ rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN
+ rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training
+ rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing
+ rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training
+ rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing
+ rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals
+ rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
+ considered as positive during training of the RPN.
+ rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be
+ considered as negative during training of the RPN.
+ rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN
+ for computing the loss
+ rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
+ of the RPN
+ box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
+ the locations indicated by the bounding boxes
+ box_head (nn.Module): module that takes the cropped feature maps as input
+ box_predictor (nn.Module): module that takes the output of box_head and returns the
+ classification logits and box regression deltas.
+ box_score_thresh (float): during inference, only return proposals with a classification score
+ greater than box_score_thresh
+ box_nms_thresh (float): NMS threshold for the prediction head. Used during inference
+ box_detections_per_img (int): maximum number of detections per image, for all classes.
+ box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be
+ considered as positive during training of the classification head
+ box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be
+ considered as negative during training of the classification head
+ box_batch_size_per_image (int): number of proposals that are sampled during training of the
+ classification head
+ box_positive_fraction (float): proportion of positive proposals in a mini-batch during training
+ of the classification head
+ bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the
+ bounding boxes
+
+ Example::
+
+ >>> import torch
+ >>> import torchvision
+ >>> from torchvision.models.detection import FasterRCNN
+ >>> from torchvision.models.detection.rpn import AnchorGenerator
+ >>> # load a pre-trained model for classification and return
+ >>> # only the features
+ >>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
+ >>> # FasterRCNN needs to know the number of
+ >>> # output channels in a backbone. For mobilenet_v2, it's 1280
+ >>> # so we need to add it here
+ >>> backbone.out_channels = 1280
+ >>>
+ >>> # let's make the RPN generate 5 x 3 anchors per spatial
+ >>> # location, with 5 different sizes and 3 different aspect
+ >>> # ratios. We have a Tuple[Tuple[int]] because each feature
+ >>> # map could potentially have different sizes and
+ >>> # aspect ratios
+ >>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
+ >>> aspect_ratios=((0.5, 1.0, 2.0),))
+ >>>
+ >>> # let's define what are the feature maps that we will
+ >>> # use to perform the region of interest cropping, as well as
+ >>> # the size of the crop after rescaling.
+ >>> # if your backbone returns a Tensor, featmap_names is expected to
+ >>> # be ['0']. More generally, the backbone should return an
+ >>> # OrderedDict[Tensor], and in featmap_names you can choose which
+ >>> # feature maps to use.
+ >>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
+ >>> output_size=7,
+ >>> sampling_ratio=2)
+ >>>
+ >>> # put the pieces together inside a FasterRCNN model
+ >>> model = FasterRCNN(backbone,
+ >>> num_classes=2,
+ >>> rpn_anchor_generator=anchor_generator,
+ >>> box_roi_pool=roi_pooler)
+ >>> model.eval()
+ >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
+ >>> predictions = model(x)
+ """
+
+ def __init__(self, backbone, num_classes=None,
+ # transform parameters
+ min_size=800, max_size=1333,
+ image_mean=None, image_std=None,
+ # RPN parameters
+ rpn_anchor_generator=None, rpn_head=None,
+ rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,
+ rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,
+ rpn_nms_thresh=0.7,
+ rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
+ rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
+ # Box parameters
+ box_roi_pool=None, box_head=None, box_predictor=None,
+ box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
+ box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,
+ box_batch_size_per_image=512, box_positive_fraction=0.25,
+ bbox_reg_weights=None):
+
+ if not hasattr(backbone, "out_channels"):
+ raise ValueError(
+ "backbone should contain an attribute out_channels "
+ "specifying the number of output channels (assumed to be the "
+ "same for all the levels)")
+
+ assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
+ assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))
+
+ if num_classes is not None:
+ if box_predictor is not None:
+ raise ValueError("num_classes should be None when box_predictor is specified")
+ else:
+ if box_predictor is None:
+ raise ValueError("num_classes should not be None when box_predictor "
+ "is not specified")
+
+ out_channels = backbone.out_channels
+
+ if rpn_anchor_generator is None:
+ anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
+ aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
+ rpn_anchor_generator = AnchorGenerator(
+ anchor_sizes, aspect_ratios
+ )
+ if rpn_head is None:
+ rpn_head = RPNHead(
+ out_channels, rpn_anchor_generator.num_anchors_per_location()[0]
+ )
+
+ rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
+ rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)
+
+ rpn = RegionProposalNetwork(
+ rpn_anchor_generator, rpn_head,
+ rpn_fg_iou_thresh, rpn_bg_iou_thresh,
+ rpn_batch_size_per_image, rpn_positive_fraction,
+ rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)
+
+ if box_roi_pool is None:
+ box_roi_pool = MultiScaleRoIAlign(
+ featmap_names=['0', '1', '2', '3'],
+ output_size=7,
+ sampling_ratio=2)
+
+ if box_head is None:
+ resolution = box_roi_pool.output_size[0]
+ representation_size = 1024
+ box_head = TwoMLPHead(
+ out_channels * resolution ** 2,
+ representation_size)
+
+ if box_predictor is None:
+ representation_size = 1024
+ box_predictor = FastRCNNPredictor(
+ representation_size,
+ num_classes)
+
+ roi_heads = RoIHeads(
+ # Box
+ box_roi_pool, box_head, box_predictor,
+ box_fg_iou_thresh, box_bg_iou_thresh,
+ box_batch_size_per_image, box_positive_fraction,
+ bbox_reg_weights,
+ box_score_thresh, box_nms_thresh, box_detections_per_img)
+
+ if image_mean is None:
+ image_mean = [0.485, 0.456, 0.406]
+ if image_std is None:
+ image_std = [0.229, 0.224, 0.225]
+ transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)
+
+ super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)
+
+
+class TwoMLPHead(nn.Module):
+ """
+ Standard heads for FPN-based models
+
+ Arguments:
+ in_channels (int): number of input channels
+ representation_size (int): size of the intermediate representation
+ """
+
+ def __init__(self, in_channels, representation_size):
+ super(TwoMLPHead, self).__init__()
+
+ self.fc6 = nn.Linear(in_channels, representation_size)
+ self.fc7 = nn.Linear(representation_size, representation_size)
+
+ def forward(self, x):
+ x = x.flatten(start_dim=1)
+
+ x = F.relu(self.fc6(x))
+ x = F.relu(self.fc7(x))
+
+ return x
+
+
+class FastRCNNPredictor(nn.Module):
+ """
+ Standard classification + bounding box regression layers
+ for Fast R-CNN.
+
+ Arguments:
+ in_channels (int): number of input channels
+ num_classes (int): number of output classes (including background)
+ """
+
+ def __init__(self, in_channels, num_classes):
+ super(FastRCNNPredictor, self).__init__()
+ self.cls_score = nn.Linear(in_channels, num_classes)
+ self.bbox_pred = nn.Linear(in_channels, num_classes * 4)
+
+ def forward(self, x):
+ if x.dim() == 4:
+ assert list(x.shape[2:]) == [1, 1]
+ x = x.flatten(start_dim=1)
+ scores = self.cls_score(x)
+ bbox_deltas = self.bbox_pred(x)
+
+ return scores, bbox_deltas
+
+
+model_urls = {
+ 'fasterrcnn_resnet50_fpn_coco':
+ 'https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth',
+}
+
+
+def fasterrcnn_resnet50_fpn(pretrained=False, progress=True,
+ num_classes=91, pretrained_backbone=True, **kwargs):
+ """
+ Constructs a Faster R-CNN model with a ResNet-50-FPN backbone.
+
+ The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
+ image, and should be in ``0-1`` range. Different images can have different sizes.
+
+ The behavior of the model changes depending if it is in training or evaluation mode.
+
+ During training, the model expects both the input tensors, as well as a targets (list of dictionary),
+ containing:
+ - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
+ between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
+ - labels (``Int64Tensor[N]``): the class label for each ground-truth box
+
+ The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
+ losses for both the RPN and the R-CNN.
+
+ During inference, the model requires only the input tensors, and returns the post-processed
+ predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
+ follows:
+ - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
+ between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
+ - labels (``Int64Tensor[N]``): the predicted labels for each image
+ - scores (``Tensor[N]``): the scores or each prediction
+
+ Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.
+
+ Example::
+
+ >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
+ >>> # For training
+ >>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
+ >>> labels = torch.randint(1, 91, (4, 11))
+ >>> images = list(image for image in images)
+ >>> targets = []
+ >>> for i in range(len(images)):
+ >>> d = {}
+ >>> d['boxes'] = boxes[i]
+ >>> d['labels'] = labels[i]
+ >>> targets.append(d)
+ >>> output = model(images, targets)
+ >>> # For inference
+ >>> model.eval()
+ >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
+ >>> predictions = model(x)
+ >>>
+ >>> # optionally, if you want to export the model to ONNX:
+ >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11)
+
+ Arguments:
+ pretrained (bool): If True, returns a model pre-trained on COCO train2017
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ if pretrained:
+ # no need to download the backbone if pretrained is set
+ pretrained_backbone = False
+ backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
+ model = FasterRCNN(backbone, num_classes, **kwargs)
+ if pretrained:
+ state_dict = load_state_dict_from_url(model_urls['fasterrcnn_resnet50_fpn_coco'],
+ progress=progress)
+ model.load_state_dict(state_dict)
+ return model
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py
new file mode 100644
index 0000000000..50a25fb4f9
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py
@@ -0,0 +1,84 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+"""
+Implements the Generalized R-CNN framework
+"""
+
+from collections import OrderedDict
+import torch
+from torch import nn
+import warnings
+from torch.jit.annotations import Tuple, List, Dict, Optional
+from torch import Tensor
+
+
+class GeneralizedRCNN(nn.Module):
+ """
+ Main class for Generalized R-CNN.
+
+ Arguments:
+ backbone (nn.Module):
+ rpn (nn.Module):
+ heads (nn.Module): takes the features + the proposals from the RPN and computes
+ detections / masks from it.
+ transform (nn.Module): performs the data transformation from the inputs to feed into
+ the model
+ """
+
+ def __init__(self, backbone, rpn, roi_heads, transform):
+ super(GeneralizedRCNN, self).__init__()
+ self.transform = transform
+ self.backbone = backbone
+ self.rpn = rpn
+ self.roi_heads = roi_heads
+ # used only on torchscript mode
+ self._has_warned = False
+
+ @torch.jit.unused
+ def eager_outputs(self, losses, detections):
+ # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
+ if self.training:
+ return losses
+
+ return detections
+
+ def forward(self, images, targets=None):
+ # type: (List[Tensor], Optional[List[Dict[str, Tensor]]])
+ """
+ Arguments:
+ images (list[Tensor]): images to be processed
+ targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)
+
+ Returns:
+ result (list[BoxList] or dict[Tensor]): the output from the model.
+ During training, it returns a dict[Tensor] which contains the losses.
+ During testing, it returns list[BoxList] contains additional fields
+ like `scores`, `labels` and `mask` (for Mask R-CNN models).
+
+ """
+ if self.training and targets is None:
+ raise ValueError("In training mode, targets should be passed")
+ original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], [])
+ for img in images:
+ val = img.shape[-2:]
+ assert len(val) == 2
+ original_image_sizes.append((val[0], val[1]))
+
+ images, targets = self.transform(images, targets)
+ features = self.backbone(images.tensors)
+ if isinstance(features, torch.Tensor):
+ features = OrderedDict([('0', features)])
+ proposals, proposal_losses = self.rpn(images, features, targets)
+ detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
+ detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)
+
+ losses = {}
+ losses.update(detector_losses)
+ losses.update(proposal_losses)
+
+ if torch.jit.is_scripting():
+ if not self._has_warned:
+ warnings.warn("RCNN always returns a (Losses, Detections) tuple in scripting")
+ self._has_warned = True
+ return (losses, detections)
+ else:
+ return self.eager_outputs(losses, detections)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py
new file mode 100644
index 0000000000..583866557e
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py
@@ -0,0 +1,25 @@
+from typing import List, Tuple
+
+import torch
+from torch import Tensor
+
+
+class ImageList:
+ """
+ Structure that holds a list of images (of possibly
+ varying sizes) as a single tensor.
+ This works by padding the images to the same size,
+ and storing in a field the original sizes of each image
+
+ Args:
+ tensors (tensor): Tensor containing images.
+ image_sizes (list[tuple[int, int]]): List of Tuples each containing size of images.
+ """
+
+ def __init__(self, tensors: Tensor, image_sizes: List[Tuple[int, int]]) -> None:
+ self.tensors = tensors
+ self.image_sizes = image_sizes
+
+ def to(self, device: torch.device) -> "ImageList":
+ cast_tensor = self.tensors.to(device)
+ return ImageList(cast_tensor, self.image_sizes)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py
new file mode 100644
index 0000000000..aeee558ca2
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py
@@ -0,0 +1,330 @@
+import torch
+from torch import nn
+
+from torchvision.ops import misc as misc_nn_ops
+
+from torchvision.ops import MultiScaleRoIAlign
+
+from ..utils import load_state_dict_from_url
+
+from .faster_rcnn import FasterRCNN
+from .backbone_utils import resnet_fpn_backbone
+
+
+__all__ = [
+ "KeypointRCNN", "keypointrcnn_resnet50_fpn"
+]
+
+
+class KeypointRCNN(FasterRCNN):
+ """
+ Implements Keypoint R-CNN.
+
+ The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
+ image, and should be in 0-1 range. Different images can have different sizes.
+
+ The behavior of the model changes depending if it is in training or evaluation mode.
+
+ During training, the model expects both the input tensors, as well as a targets (list of dictionary),
+ containing:
+ - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values of x
+ between 0 and W and values of y between 0 and H
+ - labels (Int64Tensor[N]): the class label for each ground-truth box
+ - keypoints (FloatTensor[N, K, 3]): the K keypoints location for each of the N instances, in the
+ format [x, y, visibility], where visibility=0 means that the keypoint is not visible.
+
+ The model returns a Dict[Tensor] during training, containing the classification and regression
+ losses for both the RPN and the R-CNN, and the keypoint loss.
+
+ During inference, the model requires only the input tensors, and returns the post-processed
+ predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
+ follows:
+ - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values of x
+ between 0 and W and values of y between 0 and H
+ - labels (Int64Tensor[N]): the predicted labels for each image
+ - scores (Tensor[N]): the scores or each prediction
+ - keypoints (FloatTensor[N, K, 3]): the locations of the predicted keypoints, in [x, y, v] format.
+
+ Arguments:
+ backbone (nn.Module): the network used to compute the features for the model.
+ It should contain a out_channels attribute, which indicates the number of output
+ channels that each feature map has (and it should be the same for all feature maps).
+ The backbone should return a single Tensor or and OrderedDict[Tensor].
+ num_classes (int): number of output classes of the model (including the background).
+ If box_predictor is specified, num_classes should be None.
+ min_size (int): minimum size of the image to be rescaled before feeding it to the backbone
+ max_size (int): maximum size of the image to be rescaled before feeding it to the backbone
+ image_mean (Tuple[float, float, float]): mean values used for input normalization.
+ They are generally the mean values of the dataset on which the backbone has been trained
+ on
+ image_std (Tuple[float, float, float]): std values used for input normalization.
+ They are generally the std values of the dataset on which the backbone has been trained on
+ rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
+ maps.
+ rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN
+ rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training
+ rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing
+ rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training
+ rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing
+ rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals
+ rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
+ considered as positive during training of the RPN.
+ rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be
+ considered as negative during training of the RPN.
+ rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN
+ for computing the loss
+ rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
+ of the RPN
+ box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
+ the locations indicated by the bounding boxes
+ box_head (nn.Module): module that takes the cropped feature maps as input
+ box_predictor (nn.Module): module that takes the output of box_head and returns the
+ classification logits and box regression deltas.
+ box_score_thresh (float): during inference, only return proposals with a classification score
+ greater than box_score_thresh
+ box_nms_thresh (float): NMS threshold for the prediction head. Used during inference
+ box_detections_per_img (int): maximum number of detections per image, for all classes.
+ box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be
+ considered as positive during training of the classification head
+ box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be
+ considered as negative during training of the classification head
+ box_batch_size_per_image (int): number of proposals that are sampled during training of the
+ classification head
+ box_positive_fraction (float): proportion of positive proposals in a mini-batch during training
+ of the classification head
+ bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the
+ bounding boxes
+ keypoint_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
+ the locations indicated by the bounding boxes, which will be used for the keypoint head.
+ keypoint_head (nn.Module): module that takes the cropped feature maps as input
+ keypoint_predictor (nn.Module): module that takes the output of the keypoint_head and returns the
+ heatmap logits
+
+ Example::
+
+ >>> import torch
+ >>> import torchvision
+ >>> from torchvision.models.detection import KeypointRCNN
+ >>> from torchvision.models.detection.rpn import AnchorGenerator
+ >>>
+ >>> # load a pre-trained model for classification and return
+ >>> # only the features
+ >>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
+ >>> # KeypointRCNN needs to know the number of
+ >>> # output channels in a backbone. For mobilenet_v2, it's 1280
+ >>> # so we need to add it here
+ >>> backbone.out_channels = 1280
+ >>>
+ >>> # let's make the RPN generate 5 x 3 anchors per spatial
+ >>> # location, with 5 different sizes and 3 different aspect
+ >>> # ratios. We have a Tuple[Tuple[int]] because each feature
+ >>> # map could potentially have different sizes and
+ >>> # aspect ratios
+ >>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
+ >>> aspect_ratios=((0.5, 1.0, 2.0),))
+ >>>
+ >>> # let's define what are the feature maps that we will
+ >>> # use to perform the region of interest cropping, as well as
+ >>> # the size of the crop after rescaling.
+ >>> # if your backbone returns a Tensor, featmap_names is expected to
+ >>> # be ['0']. More generally, the backbone should return an
+ >>> # OrderedDict[Tensor], and in featmap_names you can choose which
+ >>> # feature maps to use.
+ >>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
+ >>> output_size=7,
+ >>> sampling_ratio=2)
+ >>>
+ >>> keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
+ >>> output_size=14,
+ >>> sampling_ratio=2)
+ >>> # put the pieces together inside a KeypointRCNN model
+ >>> model = KeypointRCNN(backbone,
+ >>> num_classes=2,
+ >>> rpn_anchor_generator=anchor_generator,
+ >>> box_roi_pool=roi_pooler,
+ >>> keypoint_roi_pool=keypoint_roi_pooler)
+ >>> model.eval()
+ >>> model.eval()
+ >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
+ >>> predictions = model(x)
+ """
+ def __init__(self, backbone, num_classes=None,
+ # transform parameters
+ min_size=None, max_size=1333,
+ image_mean=None, image_std=None,
+ # RPN parameters
+ rpn_anchor_generator=None, rpn_head=None,
+ rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,
+ rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,
+ rpn_nms_thresh=0.7,
+ rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
+ rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
+ # Box parameters
+ box_roi_pool=None, box_head=None, box_predictor=None,
+ box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
+ box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,
+ box_batch_size_per_image=512, box_positive_fraction=0.25,
+ bbox_reg_weights=None,
+ # keypoint parameters
+ keypoint_roi_pool=None, keypoint_head=None, keypoint_predictor=None,
+ num_keypoints=17):
+
+ assert isinstance(keypoint_roi_pool, (MultiScaleRoIAlign, type(None)))
+ if min_size is None:
+ min_size = (640, 672, 704, 736, 768, 800)
+
+ if num_classes is not None:
+ if keypoint_predictor is not None:
+ raise ValueError("num_classes should be None when keypoint_predictor is specified")
+
+ out_channels = backbone.out_channels
+
+ if keypoint_roi_pool is None:
+ keypoint_roi_pool = MultiScaleRoIAlign(
+ featmap_names=['0', '1', '2', '3'],
+ output_size=14,
+ sampling_ratio=2)
+
+ if keypoint_head is None:
+ keypoint_layers = tuple(512 for _ in range(8))
+ keypoint_head = KeypointRCNNHeads(out_channels, keypoint_layers)
+
+ if keypoint_predictor is None:
+ keypoint_dim_reduced = 512 # == keypoint_layers[-1]
+ keypoint_predictor = KeypointRCNNPredictor(keypoint_dim_reduced, num_keypoints)
+
+ super(KeypointRCNN, self).__init__(
+ backbone, num_classes,
+ # transform parameters
+ min_size, max_size,
+ image_mean, image_std,
+ # RPN-specific parameters
+ rpn_anchor_generator, rpn_head,
+ rpn_pre_nms_top_n_train, rpn_pre_nms_top_n_test,
+ rpn_post_nms_top_n_train, rpn_post_nms_top_n_test,
+ rpn_nms_thresh,
+ rpn_fg_iou_thresh, rpn_bg_iou_thresh,
+ rpn_batch_size_per_image, rpn_positive_fraction,
+ # Box parameters
+ box_roi_pool, box_head, box_predictor,
+ box_score_thresh, box_nms_thresh, box_detections_per_img,
+ box_fg_iou_thresh, box_bg_iou_thresh,
+ box_batch_size_per_image, box_positive_fraction,
+ bbox_reg_weights)
+
+ self.roi_heads.keypoint_roi_pool = keypoint_roi_pool
+ self.roi_heads.keypoint_head = keypoint_head
+ self.roi_heads.keypoint_predictor = keypoint_predictor
+
+
+class KeypointRCNNHeads(nn.Sequential):
+ def __init__(self, in_channels, layers):
+ d = []
+ next_feature = in_channels
+ for l in layers:
+ d.append(misc_nn_ops.Conv2d(next_feature, l, 3, stride=1, padding=1))
+ d.append(nn.ReLU(inplace=True))
+ next_feature = l
+ super(KeypointRCNNHeads, self).__init__(*d)
+ for m in self.children():
+ if isinstance(m, misc_nn_ops.Conv2d):
+ nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+ nn.init.constant_(m.bias, 0)
+
+
+class KeypointRCNNPredictor(nn.Module):
+ def __init__(self, in_channels, num_keypoints):
+ super(KeypointRCNNPredictor, self).__init__()
+ input_features = in_channels
+ deconv_kernel = 4
+ self.kps_score_lowres = misc_nn_ops.ConvTranspose2d(
+ input_features,
+ num_keypoints,
+ deconv_kernel,
+ stride=2,
+ padding=deconv_kernel // 2 - 1,
+ )
+ nn.init.kaiming_normal_(
+ self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu"
+ )
+ nn.init.constant_(self.kps_score_lowres.bias, 0)
+ self.up_scale = 2
+ self.out_channels = num_keypoints
+
+ def forward(self, x):
+ x = self.kps_score_lowres(x)
+ x = misc_nn_ops.interpolate(
+ x, scale_factor=float(self.up_scale), mode="bilinear", align_corners=False
+ )
+ return x
+
+
+model_urls = {
+ # legacy model for BC reasons, see https://github.com/pytorch/vision/issues/1606
+ 'keypointrcnn_resnet50_fpn_coco_legacy':
+ 'https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-9f466800.pth',
+ 'keypointrcnn_resnet50_fpn_coco':
+ 'https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-fc266e95.pth',
+}
+
+
+def keypointrcnn_resnet50_fpn(pretrained=False, progress=True,
+ num_classes=2, num_keypoints=17,
+ pretrained_backbone=True, **kwargs):
+ """
+ Constructs a Keypoint R-CNN model with a ResNet-50-FPN backbone.
+
+ The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
+ image, and should be in ``0-1`` range. Different images can have different sizes.
+
+ The behavior of the model changes depending if it is in training or evaluation mode.
+
+ During training, the model expects both the input tensors, as well as a targets (list of dictionary),
+ containing:
+ - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
+ between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
+ - labels (``Int64Tensor[N]``): the class label for each ground-truth box
+ - keypoints (``FloatTensor[N, K, 3]``): the ``K`` keypoints location for each of the ``N`` instances, in the
+ format ``[x, y, visibility]``, where ``visibility=0`` means that the keypoint is not visible.
+
+ The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
+ losses for both the RPN and the R-CNN, and the keypoint loss.
+
+ During inference, the model requires only the input tensors, and returns the post-processed
+ predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
+ follows:
+ - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
+ between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
+ - labels (``Int64Tensor[N]``): the predicted labels for each image
+ - scores (``Tensor[N]``): the scores or each prediction
+ - keypoints (``FloatTensor[N, K, 3]``): the locations of the predicted keypoints, in ``[x, y, v]`` format.
+
+ Keypoint R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.
+
+ Example::
+
+ >>> model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True)
+ >>> model.eval()
+ >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
+ >>> predictions = model(x)
+ >>>
+ >>> # optionally, if you want to export the model to ONNX:
+ >>> torch.onnx.export(model, x, "keypoint_rcnn.onnx", opset_version = 11)
+
+ Arguments:
+ pretrained (bool): If True, returns a model pre-trained on COCO train2017
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ if pretrained:
+ # no need to download the backbone if pretrained is set
+ pretrained_backbone = False
+ backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
+ model = KeypointRCNN(backbone, num_classes, num_keypoints=num_keypoints, **kwargs)
+ if pretrained:
+ key = 'keypointrcnn_resnet50_fpn_coco'
+ if pretrained == 'legacy':
+ key += '_legacy'
+ state_dict = load_state_dict_from_url(model_urls[key],
+ progress=progress)
+ model.load_state_dict(state_dict)
+ return model
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py
new file mode 100644
index 0000000000..a8a980fa3c
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py
@@ -0,0 +1,323 @@
+from collections import OrderedDict
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+from torchvision.ops import misc as misc_nn_ops
+from torchvision.ops import MultiScaleRoIAlign
+
+from ..utils import load_state_dict_from_url
+
+from .faster_rcnn import FasterRCNN
+from .backbone_utils import resnet_fpn_backbone
+
+__all__ = [
+ "MaskRCNN", "maskrcnn_resnet50_fpn",
+]
+
+
+class MaskRCNN(FasterRCNN):
+ """
+ Implements Mask R-CNN.
+
+ The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
+ image, and should be in 0-1 range. Different images can have different sizes.
+
+ The behavior of the model changes depending if it is in training or evaluation mode.
+
+ During training, the model expects both the input tensors, as well as a targets (list of dictionary),
+ containing:
+ - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values of x
+ between 0 and W and values of y between 0 and H
+ - labels (Int64Tensor[N]): the class label for each ground-truth box
+ - masks (UInt8Tensor[N, H, W]): the segmentation binary masks for each instance
+
+ The model returns a Dict[Tensor] during training, containing the classification and regression
+ losses for both the RPN and the R-CNN, and the mask loss.
+
+ During inference, the model requires only the input tensors, and returns the post-processed
+ predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
+ follows:
+ - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values of x
+ between 0 and W and values of y between 0 and H
+ - labels (Int64Tensor[N]): the predicted labels for each image
+ - scores (Tensor[N]): the scores or each prediction
+ - masks (UInt8Tensor[N, 1, H, W]): the predicted masks for each instance, in 0-1 range. In order to
+ obtain the final segmentation masks, the soft masks can be thresholded, generally
+ with a value of 0.5 (mask >= 0.5)
+
+ Arguments:
+ backbone (nn.Module): the network used to compute the features for the model.
+ It should contain a out_channels attribute, which indicates the number of output
+ channels that each feature map has (and it should be the same for all feature maps).
+ The backbone should return a single Tensor or and OrderedDict[Tensor].
+ num_classes (int): number of output classes of the model (including the background).
+ If box_predictor is specified, num_classes should be None.
+ min_size (int): minimum size of the image to be rescaled before feeding it to the backbone
+ max_size (int): maximum size of the image to be rescaled before feeding it to the backbone
+ image_mean (Tuple[float, float, float]): mean values used for input normalization.
+ They are generally the mean values of the dataset on which the backbone has been trained
+ on
+ image_std (Tuple[float, float, float]): std values used for input normalization.
+ They are generally the std values of the dataset on which the backbone has been trained on
+ rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
+ maps.
+ rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN
+ rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training
+ rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing
+ rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training
+ rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing
+ rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals
+ rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
+ considered as positive during training of the RPN.
+ rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be
+ considered as negative during training of the RPN.
+ rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN
+ for computing the loss
+ rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
+ of the RPN
+ box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
+ the locations indicated by the bounding boxes
+ box_head (nn.Module): module that takes the cropped feature maps as input
+ box_predictor (nn.Module): module that takes the output of box_head and returns the
+ classification logits and box regression deltas.
+ box_score_thresh (float): during inference, only return proposals with a classification score
+ greater than box_score_thresh
+ box_nms_thresh (float): NMS threshold for the prediction head. Used during inference
+ box_detections_per_img (int): maximum number of detections per image, for all classes.
+ box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be
+ considered as positive during training of the classification head
+ box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be
+ considered as negative during training of the classification head
+ box_batch_size_per_image (int): number of proposals that are sampled during training of the
+ classification head
+ box_positive_fraction (float): proportion of positive proposals in a mini-batch during training
+ of the classification head
+ bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the
+ bounding boxes
+ mask_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
+ the locations indicated by the bounding boxes, which will be used for the mask head.
+ mask_head (nn.Module): module that takes the cropped feature maps as input
+ mask_predictor (nn.Module): module that takes the output of the mask_head and returns the
+ segmentation mask logits
+
+ Example::
+
+ >>> import torch
+ >>> import torchvision
+ >>> from torchvision.models.detection import MaskRCNN
+ >>> from torchvision.models.detection.rpn import AnchorGenerator
+ >>>
+ >>> # load a pre-trained model for classification and return
+ >>> # only the features
+ >>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
+ >>> # MaskRCNN needs to know the number of
+ >>> # output channels in a backbone. For mobilenet_v2, it's 1280
+ >>> # so we need to add it here
+ >>> backbone.out_channels = 1280
+ >>>
+ >>> # let's make the RPN generate 5 x 3 anchors per spatial
+ >>> # location, with 5 different sizes and 3 different aspect
+ >>> # ratios. We have a Tuple[Tuple[int]] because each feature
+ >>> # map could potentially have different sizes and
+ >>> # aspect ratios
+ >>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
+ >>> aspect_ratios=((0.5, 1.0, 2.0),))
+ >>>
+ >>> # let's define what are the feature maps that we will
+ >>> # use to perform the region of interest cropping, as well as
+ >>> # the size of the crop after rescaling.
+ >>> # if your backbone returns a Tensor, featmap_names is expected to
+ >>> # be ['0']. More generally, the backbone should return an
+ >>> # OrderedDict[Tensor], and in featmap_names you can choose which
+ >>> # feature maps to use.
+ >>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
+ >>> output_size=7,
+ >>> sampling_ratio=2)
+ >>>
+ >>> mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
+ >>> output_size=14,
+ >>> sampling_ratio=2)
+ >>> # put the pieces together inside a MaskRCNN model
+ >>> model = MaskRCNN(backbone,
+ >>> num_classes=2,
+ >>> rpn_anchor_generator=anchor_generator,
+ >>> box_roi_pool=roi_pooler,
+ >>> mask_roi_pool=mask_roi_pooler)
+ >>> model.eval()
+ >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
+ >>> predictions = model(x)
+ """
+ def __init__(self, backbone, num_classes=None,
+ # transform parameters
+ min_size=800, max_size=1333,
+ image_mean=None, image_std=None,
+ # RPN parameters
+ rpn_anchor_generator=None, rpn_head=None,
+ rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,
+ rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,
+ rpn_nms_thresh=0.7,
+ rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
+ rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
+ # Box parameters
+ box_roi_pool=None, box_head=None, box_predictor=None,
+ box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
+ box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,
+ box_batch_size_per_image=512, box_positive_fraction=0.25,
+ bbox_reg_weights=None,
+ # Mask parameters
+ mask_roi_pool=None, mask_head=None, mask_predictor=None):
+
+ assert isinstance(mask_roi_pool, (MultiScaleRoIAlign, type(None)))
+
+ if num_classes is not None:
+ if mask_predictor is not None:
+ raise ValueError("num_classes should be None when mask_predictor is specified")
+
+ out_channels = backbone.out_channels
+
+ if mask_roi_pool is None:
+ mask_roi_pool = MultiScaleRoIAlign(
+ featmap_names=['0', '1', '2', '3'],
+ output_size=14,
+ sampling_ratio=2)
+
+ if mask_head is None:
+ mask_layers = (256, 256, 256, 256)
+ mask_dilation = 1
+ mask_head = MaskRCNNHeads(out_channels, mask_layers, mask_dilation)
+
+ if mask_predictor is None:
+ mask_predictor_in_channels = 256 # == mask_layers[-1]
+ mask_dim_reduced = 256
+ mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels,
+ mask_dim_reduced, num_classes)
+
+ super(MaskRCNN, self).__init__(
+ backbone, num_classes,
+ # transform parameters
+ min_size, max_size,
+ image_mean, image_std,
+ # RPN-specific parameters
+ rpn_anchor_generator, rpn_head,
+ rpn_pre_nms_top_n_train, rpn_pre_nms_top_n_test,
+ rpn_post_nms_top_n_train, rpn_post_nms_top_n_test,
+ rpn_nms_thresh,
+ rpn_fg_iou_thresh, rpn_bg_iou_thresh,
+ rpn_batch_size_per_image, rpn_positive_fraction,
+ # Box parameters
+ box_roi_pool, box_head, box_predictor,
+ box_score_thresh, box_nms_thresh, box_detections_per_img,
+ box_fg_iou_thresh, box_bg_iou_thresh,
+ box_batch_size_per_image, box_positive_fraction,
+ bbox_reg_weights)
+
+ self.roi_heads.mask_roi_pool = mask_roi_pool
+ self.roi_heads.mask_head = mask_head
+ self.roi_heads.mask_predictor = mask_predictor
+
+
+class MaskRCNNHeads(nn.Sequential):
+ def __init__(self, in_channels, layers, dilation):
+ """
+ Arguments:
+ in_channels (int): number of input channels
+ layers (list): feature dimensions of each FCN layer
+ dilation (int): dilation rate of kernel
+ """
+ d = OrderedDict()
+ next_feature = in_channels
+ for layer_idx, layer_features in enumerate(layers, 1):
+ d["mask_fcn{}".format(layer_idx)] = misc_nn_ops.Conv2d(
+ next_feature, layer_features, kernel_size=3,
+ stride=1, padding=dilation, dilation=dilation)
+ d["relu{}".format(layer_idx)] = nn.ReLU(inplace=True)
+ next_feature = layer_features
+
+ super(MaskRCNNHeads, self).__init__(d)
+ for name, param in self.named_parameters():
+ if "weight" in name:
+ nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
+ # elif "bias" in name:
+ # nn.init.constant_(param, 0)
+
+
+class MaskRCNNPredictor(nn.Sequential):
+ def __init__(self, in_channels, dim_reduced, num_classes):
+ super(MaskRCNNPredictor, self).__init__(OrderedDict([
+ ("conv5_mask", misc_nn_ops.ConvTranspose2d(in_channels, dim_reduced, 2, 2, 0)),
+ ("relu", nn.ReLU(inplace=True)),
+ ("mask_fcn_logits", misc_nn_ops.Conv2d(dim_reduced, num_classes, 1, 1, 0)),
+ ]))
+
+ for name, param in self.named_parameters():
+ if "weight" in name:
+ nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
+ # elif "bias" in name:
+ # nn.init.constant_(param, 0)
+
+
+model_urls = {
+ 'maskrcnn_resnet50_fpn_coco':
+ 'https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth',
+}
+
+
+def maskrcnn_resnet50_fpn(pretrained=False, progress=True,
+ num_classes=91, pretrained_backbone=True, **kwargs):
+ """
+ Constructs a Mask R-CNN model with a ResNet-50-FPN backbone.
+
+ The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
+ image, and should be in ``0-1`` range. Different images can have different sizes.
+
+ The behavior of the model changes depending if it is in training or evaluation mode.
+
+ During training, the model expects both the input tensors, as well as a targets (list of dictionary),
+ containing:
+ - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
+ between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
+ - labels (``Int64Tensor[N]``): the class label for each ground-truth box
+ - masks (``UInt8Tensor[N, H, W]``): the segmentation binary masks for each instance
+
+ The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
+ losses for both the RPN and the R-CNN, and the mask loss.
+
+ During inference, the model requires only the input tensors, and returns the post-processed
+ predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
+ follows:
+ - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x``
+ between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H``
+ - labels (``Int64Tensor[N]``): the predicted labels for each image
+ - scores (``Tensor[N]``): the scores or each prediction
+ - masks (``UInt8Tensor[N, 1, H, W]``): the predicted masks for each instance, in ``0-1`` range. In order to
+ obtain the final segmentation masks, the soft masks can be thresholded, generally
+ with a value of 0.5 (``mask >= 0.5``)
+
+ Mask R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.
+
+ Example::
+
+ >>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
+ >>> model.eval()
+ >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
+ >>> predictions = model(x)
+ >>>
+ >>> # optionally, if you want to export the model to ONNX:
+ >>> torch.onnx.export(model, x, "mask_rcnn.onnx", opset_version = 11)
+
+ Arguments:
+ pretrained (bool): If True, returns a model pre-trained on COCO train2017
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ if pretrained:
+ # no need to download the backbone if pretrained is set
+ pretrained_backbone = False
+ backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
+ model = MaskRCNN(backbone, num_classes, **kwargs)
+ if pretrained:
+ state_dict = load_state_dict_from_url(model_urls['maskrcnn_resnet50_fpn_coco'],
+ progress=progress)
+ model.load_state_dict(state_dict)
+ return model
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py
new file mode 100644
index 0000000000..fd1334fbc2
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py
@@ -0,0 +1,870 @@
+import torch
+import torchvision
+
+import torch.nn.functional as F
+from torch import nn, Tensor
+
+from torchvision.ops import boxes as box_ops
+from torchvision.ops import misc as misc_nn_ops
+
+from torchvision.ops import roi_align
+
+from . import _utils as det_utils
+
+from torch.jit.annotations import Optional, List, Dict, Tuple
+
+
+def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
+ # type: (Tensor, Tensor, List[Tensor], List[Tensor])
+ """
+ Computes the loss for Faster R-CNN.
+
+ Arguments:
+ class_logits (Tensor)
+ box_regression (Tensor)
+ labels (list[BoxList])
+ regression_targets (Tensor)
+
+ Returns:
+ classification_loss (Tensor)
+ box_loss (Tensor)
+ """
+
+ labels = torch.cat(labels, dim=0)
+ regression_targets = torch.cat(regression_targets, dim=0)
+
+ classification_loss = F.cross_entropy(class_logits, labels)
+
+ # get indices that correspond to the regression targets for
+ # the corresponding ground truth labels, to be used with
+ # advanced indexing
+ sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
+ labels_pos = labels[sampled_pos_inds_subset]
+ N, num_classes = class_logits.shape
+ box_regression = box_regression.reshape(N, -1, 4)
+
+ box_loss = F.smooth_l1_loss(
+ box_regression[sampled_pos_inds_subset, labels_pos],
+ regression_targets[sampled_pos_inds_subset],
+ reduction="sum",
+ )
+ box_loss = box_loss / labels.numel()
+
+ return classification_loss, box_loss
+
+
+def maskrcnn_inference(x, labels):
+ # type: (Tensor, List[Tensor])
+ """
+ From the results of the CNN, post process the masks
+ by taking the mask corresponding to the class with max
+ probability (which are of fixed size and directly output
+ by the CNN) and return the masks in the mask field of the BoxList.
+
+ Arguments:
+ x (Tensor): the mask logits
+ labels (list[BoxList]): bounding boxes that are used as
+ reference, one for ech image
+
+ Returns:
+ results (list[BoxList]): one BoxList for each image, containing
+ the extra field mask
+ """
+ mask_prob = x.sigmoid()
+
+ # select masks coresponding to the predicted classes
+ num_masks = x.shape[0]
+ boxes_per_image = [len(l) for l in labels]
+ labels = torch.cat(labels)
+ index = torch.arange(num_masks, device=labels.device)
+ mask_prob = mask_prob[index, labels][:, None]
+
+ if len(boxes_per_image) == 1:
+ # TODO : remove when dynamic split supported in ONNX
+ # and remove assignment to mask_prob_list, just assign to mask_prob
+ mask_prob_list = [mask_prob]
+ else:
+ mask_prob_list = mask_prob.split(boxes_per_image, dim=0)
+
+ return mask_prob_list
+
+
+def project_masks_on_boxes(gt_masks, boxes, matched_idxs, M):
+ # type: (Tensor, Tensor, Tensor, int)
+ """
+ Given segmentation masks and the bounding boxes corresponding
+ to the location of the masks in the image, this function
+ crops and resizes the masks in the position defined by the
+ boxes. This prepares the masks for them to be fed to the
+ loss computation as the targets.
+ """
+ matched_idxs = matched_idxs.to(boxes)
+ rois = torch.cat([matched_idxs[:, None], boxes], dim=1)
+ gt_masks = gt_masks[:, None].to(rois)
+ return roi_align(gt_masks, rois, (M, M), 1.)[:, 0]
+
+
+def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs):
+ # type: (Tensor, List[Tensor], List[Tensor], List[Tensor], List[Tensor])
+ """
+ Arguments:
+ proposals (list[BoxList])
+ mask_logits (Tensor)
+ targets (list[BoxList])
+
+ Return:
+ mask_loss (Tensor): scalar tensor containing the loss
+ """
+
+ discretization_size = mask_logits.shape[-1]
+ labels = [l[idxs] for l, idxs in zip(gt_labels, mask_matched_idxs)]
+ mask_targets = [
+ project_masks_on_boxes(m, p, i, discretization_size)
+ for m, p, i in zip(gt_masks, proposals, mask_matched_idxs)
+ ]
+
+ labels = torch.cat(labels, dim=0)
+ mask_targets = torch.cat(mask_targets, dim=0)
+
+ # torch.mean (in binary_cross_entropy_with_logits) doesn't
+ # accept empty tensors, so handle it separately
+ if mask_targets.numel() == 0:
+ return mask_logits.sum() * 0
+
+ mask_loss = F.binary_cross_entropy_with_logits(
+ mask_logits[torch.arange(labels.shape[0], device=labels.device), labels], mask_targets
+ )
+ return mask_loss
+
+
+def keypoints_to_heatmap(keypoints, rois, heatmap_size):
+ # type: (Tensor, Tensor, int)
+ offset_x = rois[:, 0]
+ offset_y = rois[:, 1]
+ scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
+ scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
+
+ offset_x = offset_x[:, None]
+ offset_y = offset_y[:, None]
+ scale_x = scale_x[:, None]
+ scale_y = scale_y[:, None]
+
+ x = keypoints[..., 0]
+ y = keypoints[..., 1]
+
+ x_boundary_inds = x == rois[:, 2][:, None]
+ y_boundary_inds = y == rois[:, 3][:, None]
+
+ x = (x - offset_x) * scale_x
+ x = x.floor().long()
+ y = (y - offset_y) * scale_y
+ y = y.floor().long()
+
+ x[x_boundary_inds] = torch.tensor(heatmap_size - 1)
+ y[y_boundary_inds] = torch.tensor(heatmap_size - 1)
+
+ valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
+ vis = keypoints[..., 2] > 0
+ valid = (valid_loc & vis).long()
+
+ lin_ind = y * heatmap_size + x
+ heatmaps = lin_ind * valid
+
+ return heatmaps, valid
+
+
+def _onnx_heatmaps_to_keypoints(maps, maps_i, roi_map_width, roi_map_height,
+ widths_i, heights_i, offset_x_i, offset_y_i):
+ num_keypoints = torch.scalar_tensor(maps.size(1), dtype=torch.int64)
+
+ width_correction = widths_i / roi_map_width
+ height_correction = heights_i / roi_map_height
+
+ roi_map = torch.nn.functional.interpolate(
+ maps_i[None], size=(int(roi_map_height), int(roi_map_width)), mode='bicubic', align_corners=False)[0]
+
+ w = torch.scalar_tensor(roi_map.size(2), dtype=torch.int64)
+ pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
+
+ x_int = (pos % w)
+ y_int = ((pos - x_int) / w)
+
+ x = (torch.tensor(0.5, dtype=torch.float32) + x_int.to(dtype=torch.float32)) * \
+ width_correction.to(dtype=torch.float32)
+ y = (torch.tensor(0.5, dtype=torch.float32) + y_int.to(dtype=torch.float32)) * \
+ height_correction.to(dtype=torch.float32)
+
+ xy_preds_i_0 = x + offset_x_i.to(dtype=torch.float32)
+ xy_preds_i_1 = y + offset_y_i.to(dtype=torch.float32)
+ xy_preds_i_2 = torch.ones((xy_preds_i_1.shape), dtype=torch.float32)
+ xy_preds_i = torch.stack([xy_preds_i_0.to(dtype=torch.float32),
+ xy_preds_i_1.to(dtype=torch.float32),
+ xy_preds_i_2.to(dtype=torch.float32)], 0)
+
+ # TODO: simplify when indexing without rank will be supported by ONNX
+ end_scores_i = roi_map.index_select(1, y_int.to(dtype=torch.int64)) \
+ .index_select(2, x_int.to(dtype=torch.int64))[:num_keypoints, 0, 0]
+ return xy_preds_i, end_scores_i
+
+
+@torch.jit.script
+def _onnx_heatmaps_to_keypoints_loop(maps, rois, widths_ceil, heights_ceil,
+ widths, heights, offset_x, offset_y, num_keypoints):
+ xy_preds = torch.zeros((0, 3, int(num_keypoints)), dtype=torch.float32, device=maps.device)
+ end_scores = torch.zeros((0, int(num_keypoints)), dtype=torch.float32, device=maps.device)
+
+ for i in range(int(rois.size(0))):
+ xy_preds_i, end_scores_i = _onnx_heatmaps_to_keypoints(maps, maps[i],
+ widths_ceil[i], heights_ceil[i],
+ widths[i], heights[i],
+ offset_x[i], offset_y[i])
+ xy_preds = torch.cat((xy_preds.to(dtype=torch.float32),
+ xy_preds_i.unsqueeze(0).to(dtype=torch.float32)), 0)
+ end_scores = torch.cat((end_scores.to(dtype=torch.float32),
+ end_scores_i.to(dtype=torch.float32).unsqueeze(0)), 0)
+ return xy_preds, end_scores
+
+
+def heatmaps_to_keypoints(maps, rois):
+ """Extract predicted keypoint locations from heatmaps. Output has shape
+ (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
+ for each keypoint.
+ """
+ # This function converts a discrete image coordinate in a HEATMAP_SIZE x
+ # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
+ # consistency with keypoints_to_heatmap_labels by using the conversion from
+ # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
+ # continuous coordinate.
+ offset_x = rois[:, 0]
+ offset_y = rois[:, 1]
+
+ widths = rois[:, 2] - rois[:, 0]
+ heights = rois[:, 3] - rois[:, 1]
+ widths = widths.clamp(min=1)
+ heights = heights.clamp(min=1)
+ widths_ceil = widths.ceil()
+ heights_ceil = heights.ceil()
+
+ num_keypoints = maps.shape[1]
+
+ if torchvision._is_tracing():
+ xy_preds, end_scores = _onnx_heatmaps_to_keypoints_loop(maps, rois,
+ widths_ceil, heights_ceil, widths, heights,
+ offset_x, offset_y,
+ torch.scalar_tensor(num_keypoints, dtype=torch.int64))
+ return xy_preds.permute(0, 2, 1), end_scores
+
+ xy_preds = torch.zeros((len(rois), 3, num_keypoints), dtype=torch.float32, device=maps.device)
+ end_scores = torch.zeros((len(rois), num_keypoints), dtype=torch.float32, device=maps.device)
+ for i in range(len(rois)):
+ roi_map_width = int(widths_ceil[i].item())
+ roi_map_height = int(heights_ceil[i].item())
+ width_correction = widths[i] / roi_map_width
+ height_correction = heights[i] / roi_map_height
+ roi_map = torch.nn.functional.interpolate(
+ maps[i][None], size=(roi_map_height, roi_map_width), mode='bicubic', align_corners=False)[0]
+ # roi_map_probs = scores_to_probs(roi_map.copy())
+ w = roi_map.shape[2]
+ pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
+
+ x_int = pos % w
+ y_int = (pos - x_int) // w
+ # assert (roi_map_probs[k, y_int, x_int] ==
+ # roi_map_probs[k, :, :].max())
+ x = (x_int.float() + 0.5) * width_correction
+ y = (y_int.float() + 0.5) * height_correction
+ xy_preds[i, 0, :] = x + offset_x[i]
+ xy_preds[i, 1, :] = y + offset_y[i]
+ xy_preds[i, 2, :] = 1
+ end_scores[i, :] = roi_map[torch.arange(num_keypoints), y_int, x_int]
+
+ return xy_preds.permute(0, 2, 1), end_scores
+
+
+def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched_idxs):
+ # type: (Tensor, List[Tensor], List[Tensor], List[Tensor])
+ N, K, H, W = keypoint_logits.shape
+ assert H == W
+ discretization_size = H
+ heatmaps = []
+ valid = []
+ for proposals_per_image, gt_kp_in_image, midx in zip(proposals, gt_keypoints, keypoint_matched_idxs):
+ kp = gt_kp_in_image[midx]
+ heatmaps_per_image, valid_per_image = keypoints_to_heatmap(
+ kp, proposals_per_image, discretization_size
+ )
+ heatmaps.append(heatmaps_per_image.view(-1))
+ valid.append(valid_per_image.view(-1))
+
+ keypoint_targets = torch.cat(heatmaps, dim=0)
+ valid = torch.cat(valid, dim=0).to(dtype=torch.uint8)
+ valid = torch.nonzero(valid).squeeze(1)
+
+ # torch.mean (in binary_cross_entropy_with_logits) does'nt
+ # accept empty tensors, so handle it sepaartely
+ if keypoint_targets.numel() == 0 or len(valid) == 0:
+ return keypoint_logits.sum() * 0
+
+ keypoint_logits = keypoint_logits.view(N * K, H * W)
+
+ keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid])
+ return keypoint_loss
+
+
+def keypointrcnn_inference(x, boxes):
+ # type: (Tensor, List[Tensor])
+ kp_probs = []
+ kp_scores = []
+
+ boxes_per_image = [box.size(0) for box in boxes]
+
+ if len(boxes_per_image) == 1:
+ # TODO : remove when dynamic split supported in ONNX
+ kp_prob, scores = heatmaps_to_keypoints(x, boxes[0])
+ return [kp_prob], [scores]
+
+ x2 = x.split(boxes_per_image, dim=0)
+
+ for xx, bb in zip(x2, boxes):
+ kp_prob, scores = heatmaps_to_keypoints(xx, bb)
+ kp_probs.append(kp_prob)
+ kp_scores.append(scores)
+
+ return kp_probs, kp_scores
+
+
+def _onnx_expand_boxes(boxes, scale):
+ # type: (Tensor, float)
+ w_half = (boxes[:, 2] - boxes[:, 0]) * .5
+ h_half = (boxes[:, 3] - boxes[:, 1]) * .5
+ x_c = (boxes[:, 2] + boxes[:, 0]) * .5
+ y_c = (boxes[:, 3] + boxes[:, 1]) * .5
+
+ w_half = w_half.to(dtype=torch.float32) * scale
+ h_half = h_half.to(dtype=torch.float32) * scale
+
+ boxes_exp0 = x_c - w_half
+ boxes_exp1 = y_c - h_half
+ boxes_exp2 = x_c + w_half
+ boxes_exp3 = y_c + h_half
+ boxes_exp = torch.stack((boxes_exp0, boxes_exp1, boxes_exp2, boxes_exp3), 1)
+ return boxes_exp
+
+
+# the next two functions should be merged inside Masker
+# but are kept here for the moment while we need them
+# temporarily for paste_mask_in_image
+def expand_boxes(boxes, scale):
+ # type: (Tensor, float)
+ if torchvision._is_tracing():
+ return _onnx_expand_boxes(boxes, scale)
+ w_half = (boxes[:, 2] - boxes[:, 0]) * .5
+ h_half = (boxes[:, 3] - boxes[:, 1]) * .5
+ x_c = (boxes[:, 2] + boxes[:, 0]) * .5
+ y_c = (boxes[:, 3] + boxes[:, 1]) * .5
+
+ w_half *= scale
+ h_half *= scale
+
+ boxes_exp = torch.zeros_like(boxes)
+ boxes_exp[:, 0] = x_c - w_half
+ boxes_exp[:, 2] = x_c + w_half
+ boxes_exp[:, 1] = y_c - h_half
+ boxes_exp[:, 3] = y_c + h_half
+ return boxes_exp
+
+
+@torch.jit.unused
+def expand_masks_tracing_scale(M, padding):
+ # type: (int, int) -> float
+ return torch.tensor(M + 2 * padding).to(torch.float32) / torch.tensor(M).to(torch.float32)
+
+
+def expand_masks(mask, padding):
+ # type: (Tensor, int)
+ M = mask.shape[-1]
+ if torch._C._get_tracing_state(): # could not import is_tracing(), not sure why
+ scale = expand_masks_tracing_scale(M, padding)
+ else:
+ scale = float(M + 2 * padding) / M
+ padded_mask = torch.nn.functional.pad(mask, (padding,) * 4)
+ return padded_mask, scale
+
+
+def paste_mask_in_image(mask, box, im_h, im_w):
+ # type: (Tensor, Tensor, int, int)
+ TO_REMOVE = 1
+ w = int(box[2] - box[0] + TO_REMOVE)
+ h = int(box[3] - box[1] + TO_REMOVE)
+ w = max(w, 1)
+ h = max(h, 1)
+
+ # Set shape to [batchxCxHxW]
+ mask = mask.expand((1, 1, -1, -1))
+
+ # Resize mask
+ mask = misc_nn_ops.interpolate(mask, size=(h, w), mode='bilinear', align_corners=False)
+ mask = mask[0][0]
+
+ im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device)
+ x_0 = max(box[0], 0)
+ x_1 = min(box[2] + 1, im_w)
+ y_0 = max(box[1], 0)
+ y_1 = min(box[3] + 1, im_h)
+
+ im_mask[y_0:y_1, x_0:x_1] = mask[
+ (y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0])
+ ]
+ return im_mask
+
+
+def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
+ one = torch.ones(1, dtype=torch.int64)
+ zero = torch.zeros(1, dtype=torch.int64)
+
+ w = (box[2] - box[0] + one)
+ h = (box[3] - box[1] + one)
+ w = torch.max(torch.cat((w, one)))
+ h = torch.max(torch.cat((h, one)))
+
+ # Set shape to [batchxCxHxW]
+ mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
+
+ # Resize mask
+ mask = torch.nn.functional.interpolate(mask, size=(int(h), int(w)), mode='bilinear', align_corners=False)
+ mask = mask[0][0]
+
+ x_0 = torch.max(torch.cat((box[0].unsqueeze(0), zero)))
+ x_1 = torch.min(torch.cat((box[2].unsqueeze(0) + one, im_w.unsqueeze(0))))
+ y_0 = torch.max(torch.cat((box[1].unsqueeze(0), zero)))
+ y_1 = torch.min(torch.cat((box[3].unsqueeze(0) + one, im_h.unsqueeze(0))))
+
+ unpaded_im_mask = mask[(y_0 - box[1]):(y_1 - box[1]),
+ (x_0 - box[0]):(x_1 - box[0])]
+
+ # TODO : replace below with a dynamic padding when support is added in ONNX
+
+ # pad y
+ zeros_y0 = torch.zeros(y_0, unpaded_im_mask.size(1))
+ zeros_y1 = torch.zeros(im_h - y_1, unpaded_im_mask.size(1))
+ concat_0 = torch.cat((zeros_y0,
+ unpaded_im_mask.to(dtype=torch.float32),
+ zeros_y1), 0)[0:im_h, :]
+ # pad x
+ zeros_x0 = torch.zeros(concat_0.size(0), x_0)
+ zeros_x1 = torch.zeros(concat_0.size(0), im_w - x_1)
+ im_mask = torch.cat((zeros_x0,
+ concat_0,
+ zeros_x1), 1)[:, :im_w]
+ return im_mask
+
+
+@torch.jit.script
+def _onnx_paste_masks_in_image_loop(masks, boxes, im_h, im_w):
+ res_append = torch.zeros(0, im_h, im_w)
+ for i in range(masks.size(0)):
+ mask_res = _onnx_paste_mask_in_image(masks[i][0], boxes[i], im_h, im_w)
+ mask_res = mask_res.unsqueeze(0)
+ res_append = torch.cat((res_append, mask_res))
+ return res_append
+
+
+def paste_masks_in_image(masks, boxes, img_shape, padding=1):
+ # type: (Tensor, Tensor, Tuple[int, int], int)
+ masks, scale = expand_masks(masks, padding=padding)
+ boxes = expand_boxes(boxes, scale).to(dtype=torch.int64)
+ im_h, im_w = img_shape
+
+ if torchvision._is_tracing():
+ return _onnx_paste_masks_in_image_loop(masks, boxes,
+ torch.scalar_tensor(im_h, dtype=torch.int64),
+ torch.scalar_tensor(im_w, dtype=torch.int64))[:, None]
+ res = [
+ paste_mask_in_image(m[0], b, im_h, im_w)
+ for m, b in zip(masks, boxes)
+ ]
+ if len(res) > 0:
+ ret = torch.stack(res, dim=0)[:, None]
+ else:
+ ret = masks.new_empty((0, 1, im_h, im_w))
+ return ret
+
+
+class RoIHeads(torch.nn.Module):
+ __annotations__ = {
+ 'box_coder': det_utils.BoxCoder,
+ 'proposal_matcher': det_utils.Matcher,
+ 'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,
+ }
+
+ def __init__(self,
+ box_roi_pool,
+ box_head,
+ box_predictor,
+ # Faster R-CNN training
+ fg_iou_thresh, bg_iou_thresh,
+ batch_size_per_image, positive_fraction,
+ bbox_reg_weights,
+ # Faster R-CNN inference
+ score_thresh,
+ nms_thresh,
+ detections_per_img,
+ # Mask
+ mask_roi_pool=None,
+ mask_head=None,
+ mask_predictor=None,
+ keypoint_roi_pool=None,
+ keypoint_head=None,
+ keypoint_predictor=None,
+ ):
+ super(RoIHeads, self).__init__()
+
+ self.box_similarity = box_ops.box_iou
+ # assign ground-truth boxes for each proposal
+ self.proposal_matcher = det_utils.Matcher(
+ fg_iou_thresh,
+ bg_iou_thresh,
+ allow_low_quality_matches=False)
+
+ self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
+ batch_size_per_image,
+ positive_fraction)
+
+ if bbox_reg_weights is None:
+ bbox_reg_weights = (10., 10., 5., 5.)
+ self.box_coder = det_utils.BoxCoder(bbox_reg_weights)
+
+ self.box_roi_pool = box_roi_pool
+ self.box_head = box_head
+ self.box_predictor = box_predictor
+
+ self.score_thresh = score_thresh
+ self.nms_thresh = nms_thresh
+ self.detections_per_img = detections_per_img
+
+ self.mask_roi_pool = mask_roi_pool
+ self.mask_head = mask_head
+ self.mask_predictor = mask_predictor
+
+ self.keypoint_roi_pool = keypoint_roi_pool
+ self.keypoint_head = keypoint_head
+ self.keypoint_predictor = keypoint_predictor
+
+ def has_mask(self):
+ if self.mask_roi_pool is None:
+ return False
+ if self.mask_head is None:
+ return False
+ if self.mask_predictor is None:
+ return False
+ return True
+
+ def has_keypoint(self):
+ if self.keypoint_roi_pool is None:
+ return False
+ if self.keypoint_head is None:
+ return False
+ if self.keypoint_predictor is None:
+ return False
+ return True
+
+ def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):
+ # type: (List[Tensor], List[Tensor], List[Tensor])
+ matched_idxs = []
+ labels = []
+ for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):
+
+ if gt_boxes_in_image.numel() == 0:
+ # Background image
+ device = proposals_in_image.device
+ clamped_matched_idxs_in_image = torch.zeros(
+ (proposals_in_image.shape[0],), dtype=torch.int64, device=device
+ )
+ labels_in_image = torch.zeros(
+ (proposals_in_image.shape[0],), dtype=torch.int64, device=device
+ )
+ else:
+ # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands
+ match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image)
+ matched_idxs_in_image = self.proposal_matcher(match_quality_matrix)
+
+ clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)
+
+ labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]
+ labels_in_image = labels_in_image.to(dtype=torch.int64)
+
+ # Label background (below the low threshold)
+ bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD
+ labels_in_image[bg_inds] = torch.tensor(0)
+
+ # Label ignore proposals (between low and high thresholds)
+ ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS
+ labels_in_image[ignore_inds] = torch.tensor(-1) # -1 is ignored by sampler
+
+ matched_idxs.append(clamped_matched_idxs_in_image)
+ labels.append(labels_in_image)
+ return matched_idxs, labels
+
+ def subsample(self, labels):
+ # type: (List[Tensor])
+ sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
+ sampled_inds = []
+ for img_idx, (pos_inds_img, neg_inds_img) in enumerate(
+ zip(sampled_pos_inds, sampled_neg_inds)
+ ):
+ img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1)
+ sampled_inds.append(img_sampled_inds)
+ return sampled_inds
+
+ def add_gt_proposals(self, proposals, gt_boxes):
+ # type: (List[Tensor], List[Tensor])
+ proposals = [
+ torch.cat((proposal, gt_box))
+ for proposal, gt_box in zip(proposals, gt_boxes)
+ ]
+
+ return proposals
+
+ def DELTEME_all(self, the_list):
+ # type: (List[bool])
+ for i in the_list:
+ if not i:
+ return False
+ return True
+
+ def check_targets(self, targets):
+ # type: (Optional[List[Dict[str, Tensor]]])
+ assert targets is not None
+ assert self.DELTEME_all(["boxes" in t for t in targets])
+ assert self.DELTEME_all(["labels" in t for t in targets])
+ if self.has_mask():
+ assert self.DELTEME_all(["masks" in t for t in targets])
+
+ def select_training_samples(self, proposals, targets):
+ # type: (List[Tensor], Optional[List[Dict[str, Tensor]]])
+ self.check_targets(targets)
+ assert targets is not None
+ dtype = proposals[0].dtype
+ device = proposals[0].device
+
+ gt_boxes = [t["boxes"].to(dtype) for t in targets]
+ gt_labels = [t["labels"] for t in targets]
+
+ # append ground-truth bboxes to propos
+ proposals = self.add_gt_proposals(proposals, gt_boxes)
+
+ # get matching gt indices for each proposal
+ matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)
+ # sample a fixed proportion of positive-negative proposals
+ sampled_inds = self.subsample(labels)
+ matched_gt_boxes = []
+ num_images = len(proposals)
+ for img_id in range(num_images):
+ img_sampled_inds = sampled_inds[img_id]
+ proposals[img_id] = proposals[img_id][img_sampled_inds]
+ labels[img_id] = labels[img_id][img_sampled_inds]
+ matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds]
+
+ gt_boxes_in_image = gt_boxes[img_id]
+ if gt_boxes_in_image.numel() == 0:
+ gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device)
+ matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]])
+
+ regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)
+ return proposals, matched_idxs, labels, regression_targets
+
+ def postprocess_detections(self, class_logits, box_regression, proposals, image_shapes):
+ # type: (Tensor, Tensor, List[Tensor], List[Tuple[int, int]])
+ device = class_logits.device
+ num_classes = class_logits.shape[-1]
+
+ boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
+ pred_boxes = self.box_coder.decode(box_regression, proposals)
+
+ pred_scores = F.softmax(class_logits, -1)
+
+ pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
+ pred_scores_list = pred_scores.split(boxes_per_image, 0)
+
+ all_boxes = []
+ all_scores = []
+ all_labels = []
+ for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):
+ boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
+
+ # create labels for each prediction
+ labels = torch.arange(num_classes, device=device)
+ labels = labels.view(1, -1).expand_as(scores)
+
+ # remove predictions with the background label
+ boxes = boxes[:, 1:]
+ scores = scores[:, 1:]
+ labels = labels[:, 1:]
+
+ # batch everything, by making every class prediction be a separate instance
+ boxes = boxes.reshape(-1, 4)
+ scores = scores.reshape(-1)
+ labels = labels.reshape(-1)
+
+ # remove low scoring boxes
+ inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
+ boxes, scores, labels = boxes[inds], scores[inds], labels[inds]
+
+ # remove empty boxes
+ keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
+ boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
+
+ # non-maximum suppression, independently done per class
+ keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
+ # keep only topk scoring predictions
+ keep = keep[:self.detections_per_img]
+ boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
+
+ all_boxes.append(boxes)
+ all_scores.append(scores)
+ all_labels.append(labels)
+
+ return all_boxes, all_scores, all_labels
+
+ def forward(self, features, proposals, image_shapes, targets=None):
+ # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]], Optional[List[Dict[str, Tensor]]])
+ """
+ Arguments:
+ features (List[Tensor])
+ proposals (List[Tensor[N, 4]])
+ image_shapes (List[Tuple[H, W]])
+ targets (List[Dict])
+ """
+ if targets is not None:
+ for t in targets:
+ # TODO: https://github.com/pytorch/pytorch/issues/26731
+ floating_point_types = (torch.float, torch.double, torch.half)
+ assert t["boxes"].dtype in floating_point_types, 'target boxes must of float type'
+ assert t["labels"].dtype == torch.int64, 'target labels must of int64 type'
+ if self.has_keypoint():
+ assert t["keypoints"].dtype == torch.float32, 'target keypoints must of float type'
+
+ if self.training:
+ proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
+ else:
+ labels = None
+ regression_targets = None
+ matched_idxs = None
+
+ box_features = self.box_roi_pool(features, proposals, image_shapes)
+ box_features = self.box_head(box_features)
+ class_logits, box_regression = self.box_predictor(box_features)
+
+ result = torch.jit.annotate(List[Dict[str, torch.Tensor]], [])
+ losses = {}
+ if self.training:
+ assert labels is not None and regression_targets is not None
+ loss_classifier, loss_box_reg = fastrcnn_loss(
+ class_logits, box_regression, labels, regression_targets)
+ losses = {
+ "loss_classifier": loss_classifier,
+ "loss_box_reg": loss_box_reg
+ }
+ else:
+ boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
+ num_images = len(boxes)
+ for i in range(num_images):
+ result.append(
+ {
+ "boxes": boxes[i],
+ "labels": labels[i],
+ "scores": scores[i],
+ }
+ )
+
+ if self.has_mask():
+ mask_proposals = [p["boxes"] for p in result]
+ if self.training:
+ assert matched_idxs is not None
+ # during training, only focus on positive boxes
+ num_images = len(proposals)
+ mask_proposals = []
+ pos_matched_idxs = []
+ for img_id in range(num_images):
+ pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
+ mask_proposals.append(proposals[img_id][pos])
+ pos_matched_idxs.append(matched_idxs[img_id][pos])
+ else:
+ pos_matched_idxs = None
+
+ if self.mask_roi_pool is not None:
+ mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes)
+ mask_features = self.mask_head(mask_features)
+ mask_logits = self.mask_predictor(mask_features)
+ else:
+ mask_logits = torch.tensor(0)
+ raise Exception("Expected mask_roi_pool to be not None")
+
+ loss_mask = {}
+ if self.training:
+ assert targets is not None
+ assert pos_matched_idxs is not None
+ assert mask_logits is not None
+
+ gt_masks = [t["masks"] for t in targets]
+ gt_labels = [t["labels"] for t in targets]
+ rcnn_loss_mask = maskrcnn_loss(
+ mask_logits, mask_proposals,
+ gt_masks, gt_labels, pos_matched_idxs)
+ loss_mask = {
+ "loss_mask": rcnn_loss_mask
+ }
+ else:
+ labels = [r["labels"] for r in result]
+ masks_probs = maskrcnn_inference(mask_logits, labels)
+ for mask_prob, r in zip(masks_probs, result):
+ r["masks"] = mask_prob
+
+ losses.update(loss_mask)
+
+ # keep none checks in if conditional so torchscript will conditionally
+ # compile each branch
+ if self.keypoint_roi_pool is not None and self.keypoint_head is not None \
+ and self.keypoint_predictor is not None:
+ keypoint_proposals = [p["boxes"] for p in result]
+ if self.training:
+ # during training, only focus on positive boxes
+ num_images = len(proposals)
+ keypoint_proposals = []
+ pos_matched_idxs = []
+ assert matched_idxs is not None
+ for img_id in range(num_images):
+ pos = torch.nonzero(labels[img_id] > 0).squeeze(1)
+ keypoint_proposals.append(proposals[img_id][pos])
+ pos_matched_idxs.append(matched_idxs[img_id][pos])
+ else:
+ pos_matched_idxs = None
+
+ keypoint_features = self.keypoint_roi_pool(features, keypoint_proposals, image_shapes)
+ keypoint_features = self.keypoint_head(keypoint_features)
+ keypoint_logits = self.keypoint_predictor(keypoint_features)
+
+ loss_keypoint = {}
+ if self.training:
+ assert targets is not None
+ assert pos_matched_idxs is not None
+
+ gt_keypoints = [t["keypoints"] for t in targets]
+ rcnn_loss_keypoint = keypointrcnn_loss(
+ keypoint_logits, keypoint_proposals,
+ gt_keypoints, pos_matched_idxs)
+ loss_keypoint = {
+ "loss_keypoint": rcnn_loss_keypoint
+ }
+ else:
+ assert keypoint_logits is not None
+ assert keypoint_proposals is not None
+
+ keypoints_probs, kp_scores = keypointrcnn_inference(keypoint_logits, keypoint_proposals)
+ for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result):
+ r["keypoints"] = keypoint_prob
+ r["keypoints_scores"] = kps
+
+ losses.update(loss_keypoint)
+
+ return result, losses
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py
new file mode 100644
index 0000000000..381bc77084
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py
@@ -0,0 +1,501 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+import torch
+from torch.nn import functional as F
+from torch import nn, Tensor
+
+import torchvision
+from torchvision.ops import boxes as box_ops
+
+from . import _utils as det_utils
+from .image_list import ImageList
+
+from torch.jit.annotations import List, Optional, Dict, Tuple
+
+
+@torch.jit.unused
+def _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n):
+ # type: (Tensor, int) -> Tuple[int, int]
+ from torch.onnx import operators
+ num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0)
+ # TODO : remove cast to IntTensor/num_anchors.dtype when
+ # ONNX Runtime version is updated with ReduceMin int64 support
+ pre_nms_top_n = torch.min(torch.cat(
+ (torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype),
+ num_anchors), 0).to(torch.int32)).to(num_anchors.dtype)
+
+ return num_anchors, pre_nms_top_n
+
+
+class AnchorGenerator(nn.Module):
+ __annotations__ = {
+ "cell_anchors": Optional[List[torch.Tensor]],
+ "_cache": Dict[str, List[torch.Tensor]]
+ }
+
+ """
+ Module that generates anchors for a set of feature maps and
+ image sizes.
+
+ The module support computing anchors at multiple sizes and aspect ratios
+ per feature map.
+
+ sizes and aspect_ratios should have the same number of elements, and it should
+ correspond to the number of feature maps.
+
+ sizes[i] and aspect_ratios[i] can have an arbitrary number of elements,
+ and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors
+ per spatial location for feature map i.
+
+ Arguments:
+ sizes (Tuple[Tuple[int]]):
+ aspect_ratios (Tuple[Tuple[float]]):
+ """
+
+ def __init__(
+ self,
+ sizes=(128, 256, 512),
+ aspect_ratios=(0.5, 1.0, 2.0),
+ ):
+ super(AnchorGenerator, self).__init__()
+
+ if not isinstance(sizes[0], (list, tuple)):
+ # TODO change this
+ sizes = tuple((s,) for s in sizes)
+ if not isinstance(aspect_ratios[0], (list, tuple)):
+ aspect_ratios = (aspect_ratios,) * len(sizes)
+
+ assert len(sizes) == len(aspect_ratios)
+
+ self.sizes = sizes
+ self.aspect_ratios = aspect_ratios
+ self.cell_anchors = None
+ self._cache = {}
+
+ # TODO: https://github.com/pytorch/pytorch/issues/26792
+ # For every (aspect_ratios, scales) combination, output a zero-centered anchor with those values.
+ # (scales, aspect_ratios) are usually an element of zip(self.scales, self.aspect_ratios)
+ def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device="cpu"):
+ # type: (List[int], List[float], int, Device) # noqa: F821
+ scales = torch.as_tensor(scales, dtype=dtype, device=device)
+ aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device)
+ h_ratios = torch.sqrt(aspect_ratios)
+ w_ratios = 1 / h_ratios
+
+ ws = (w_ratios[:, None] * scales[None, :]).view(-1)
+ hs = (h_ratios[:, None] * scales[None, :]).view(-1)
+
+ base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2
+ return base_anchors.round()
+
+ def set_cell_anchors(self, dtype, device):
+ # type: (int, Device) -> None # noqa: F821
+ if self.cell_anchors is not None:
+ cell_anchors = self.cell_anchors
+ assert cell_anchors is not None
+ # suppose that all anchors have the same device
+ # which is a valid assumption in the current state of the codebase
+ if cell_anchors[0].device == device:
+ return
+
+ cell_anchors = [
+ self.generate_anchors(
+ sizes,
+ aspect_ratios,
+ dtype,
+ device
+ )
+ for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios)
+ ]
+ self.cell_anchors = cell_anchors
+
+ def num_anchors_per_location(self):
+ return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)]
+
+ # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2),
+ # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a.
+ def grid_anchors(self, grid_sizes, strides):
+ # type: (List[List[int]], List[List[Tensor]])
+ anchors = []
+ cell_anchors = self.cell_anchors
+ assert cell_anchors is not None
+
+ for size, stride, base_anchors in zip(
+ grid_sizes, strides, cell_anchors
+ ):
+ grid_height, grid_width = size
+ stride_height, stride_width = stride
+ device = base_anchors.device
+
+ # For output anchor, compute [x_center, y_center, x_center, y_center]
+ shifts_x = torch.arange(
+ 0, grid_width, dtype=torch.float32, device=device
+ ) * stride_width
+ shifts_y = torch.arange(
+ 0, grid_height, dtype=torch.float32, device=device
+ ) * stride_height
+ shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
+ shift_x = shift_x.reshape(-1)
+ shift_y = shift_y.reshape(-1)
+ shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1)
+
+ # For every (base anchor, output anchor) pair,
+ # offset each zero-centered base anchor by the center of the output anchor.
+ anchors.append(
+ (shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4)
+ )
+
+ return anchors
+
+ def cached_grid_anchors(self, grid_sizes, strides):
+ # type: (List[List[int]], List[List[Tensor]])
+ key = str(grid_sizes) + str(strides)
+ if key in self._cache:
+ return self._cache[key]
+ anchors = self.grid_anchors(grid_sizes, strides)
+ self._cache[key] = anchors
+ return anchors
+
+ def forward(self, image_list, feature_maps):
+ # type: (ImageList, List[Tensor])
+ grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps])
+ image_size = image_list.tensors.shape[-2:]
+ dtype, device = feature_maps[0].dtype, feature_maps[0].device
+ strides = [[torch.tensor(image_size[0] / g[0], dtype=torch.int64, device=device),
+ torch.tensor(image_size[1] / g[1], dtype=torch.int64, device=device)] for g in grid_sizes]
+ self.set_cell_anchors(dtype, device)
+ anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides)
+ anchors = torch.jit.annotate(List[List[torch.Tensor]], [])
+ for i, (image_height, image_width) in enumerate(image_list.image_sizes):
+ anchors_in_image = []
+ for anchors_per_feature_map in anchors_over_all_feature_maps:
+ anchors_in_image.append(anchors_per_feature_map)
+ anchors.append(anchors_in_image)
+ anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors]
+ # Clear the cache in case that memory leaks.
+ self._cache.clear()
+ return anchors
+
+
+class RPNHead(nn.Module):
+ """
+ Adds a simple RPN Head with classification and regression heads
+
+ Arguments:
+ in_channels (int): number of channels of the input feature
+ num_anchors (int): number of anchors to be predicted
+ """
+
+ def __init__(self, in_channels, num_anchors):
+ super(RPNHead, self).__init__()
+ self.conv = nn.Conv2d(
+ in_channels, in_channels, kernel_size=3, stride=1, padding=1
+ )
+ self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1)
+ self.bbox_pred = nn.Conv2d(
+ in_channels, num_anchors * 4, kernel_size=1, stride=1
+ )
+
+ for l in self.children():
+ torch.nn.init.normal_(l.weight, std=0.01)
+ torch.nn.init.constant_(l.bias, 0)
+
+ def forward(self, x):
+ # type: (List[Tensor])
+ logits = []
+ bbox_reg = []
+ for feature in x:
+ t = F.relu(self.conv(feature))
+ logits.append(self.cls_logits(t))
+ bbox_reg.append(self.bbox_pred(t))
+ return logits, bbox_reg
+
+
+def permute_and_flatten(layer, N, A, C, H, W):
+ # type: (Tensor, int, int, int, int, int)
+ layer = layer.view(N, -1, C, H, W)
+ layer = layer.permute(0, 3, 4, 1, 2)
+ layer = layer.reshape(N, -1, C)
+ return layer
+
+
+def concat_box_prediction_layers(box_cls, box_regression):
+ # type: (List[Tensor], List[Tensor])
+ box_cls_flattened = []
+ box_regression_flattened = []
+ # for each feature level, permute the outputs to make them be in the
+ # same format as the labels. Note that the labels are computed for
+ # all feature levels concatenated, so we keep the same representation
+ # for the objectness and the box_regression
+ for box_cls_per_level, box_regression_per_level in zip(
+ box_cls, box_regression
+ ):
+ N, AxC, H, W = box_cls_per_level.shape
+ Ax4 = box_regression_per_level.shape[1]
+ A = Ax4 // 4
+ C = AxC // A
+ box_cls_per_level = permute_and_flatten(
+ box_cls_per_level, N, A, C, H, W
+ )
+ box_cls_flattened.append(box_cls_per_level)
+
+ box_regression_per_level = permute_and_flatten(
+ box_regression_per_level, N, A, 4, H, W
+ )
+ box_regression_flattened.append(box_regression_per_level)
+ # concatenate on the first dimension (representing the feature levels), to
+ # take into account the way the labels were generated (with all feature maps
+ # being concatenated as well)
+ box_cls = torch.cat(box_cls_flattened, dim=1).flatten(0, -2)
+ box_regression = torch.cat(box_regression_flattened, dim=1).reshape(-1, 4)
+ return box_cls, box_regression
+
+
+class RegionProposalNetwork(torch.nn.Module):
+ """
+ Implements Region Proposal Network (RPN).
+
+ Arguments:
+ anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
+ maps.
+ head (nn.Module): module that computes the objectness and regression deltas
+ fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
+ considered as positive during training of the RPN.
+ bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be
+ considered as negative during training of the RPN.
+ batch_size_per_image (int): number of anchors that are sampled during training of the RPN
+ for computing the loss
+ positive_fraction (float): proportion of positive anchors in a mini-batch during training
+ of the RPN
+ pre_nms_top_n (Dict[int]): number of proposals to keep before applying NMS. It should
+ contain two fields: training and testing, to allow for different values depending
+ on training or evaluation
+ post_nms_top_n (Dict[int]): number of proposals to keep after applying NMS. It should
+ contain two fields: training and testing, to allow for different values depending
+ on training or evaluation
+ nms_thresh (float): NMS threshold used for postprocessing the RPN proposals
+
+ """
+ __annotations__ = {
+ 'box_coder': det_utils.BoxCoder,
+ 'proposal_matcher': det_utils.Matcher,
+ 'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler,
+ 'pre_nms_top_n': Dict[str, int],
+ 'post_nms_top_n': Dict[str, int],
+ }
+
+ def __init__(self,
+ anchor_generator,
+ head,
+ #
+ fg_iou_thresh, bg_iou_thresh,
+ batch_size_per_image, positive_fraction,
+ #
+ pre_nms_top_n, post_nms_top_n, nms_thresh):
+ super(RegionProposalNetwork, self).__init__()
+ self.anchor_generator = anchor_generator
+ self.head = head
+ self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))
+
+ # used during training
+ self.box_similarity = box_ops.box_iou
+
+ self.proposal_matcher = det_utils.Matcher(
+ fg_iou_thresh,
+ bg_iou_thresh,
+ allow_low_quality_matches=True,
+ )
+
+ self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
+ batch_size_per_image, positive_fraction
+ )
+ # used during testing
+ self._pre_nms_top_n = pre_nms_top_n
+ self._post_nms_top_n = post_nms_top_n
+ self.nms_thresh = nms_thresh
+ self.min_size = 1e-3
+
+ def pre_nms_top_n(self):
+ if self.training:
+ return self._pre_nms_top_n['training']
+ return self._pre_nms_top_n['testing']
+
+ def post_nms_top_n(self):
+ if self.training:
+ return self._post_nms_top_n['training']
+ return self._post_nms_top_n['testing']
+
+ def assign_targets_to_anchors(self, anchors, targets):
+ # type: (List[Tensor], List[Dict[str, Tensor]])
+ labels = []
+ matched_gt_boxes = []
+ for anchors_per_image, targets_per_image in zip(anchors, targets):
+ gt_boxes = targets_per_image["boxes"]
+
+ if gt_boxes.numel() == 0:
+ # Background image (negative example)
+ device = anchors_per_image.device
+ matched_gt_boxes_per_image = torch.zeros(anchors_per_image.shape, dtype=torch.float32, device=device)
+ labels_per_image = torch.zeros((anchors_per_image.shape[0],), dtype=torch.float32, device=device)
+ else:
+ match_quality_matrix = box_ops.box_iou(gt_boxes, anchors_per_image)
+ matched_idxs = self.proposal_matcher(match_quality_matrix)
+ # get the targets corresponding GT for each proposal
+ # NB: need to clamp the indices because we can have a single
+ # GT in the image, and matched_idxs can be -2, which goes
+ # out of bounds
+ matched_gt_boxes_per_image = gt_boxes[matched_idxs.clamp(min=0)]
+
+ labels_per_image = matched_idxs >= 0
+ labels_per_image = labels_per_image.to(dtype=torch.float32)
+
+ # Background (negative examples)
+ bg_indices = matched_idxs == self.proposal_matcher.BELOW_LOW_THRESHOLD
+ labels_per_image[bg_indices] = torch.tensor(0.0)
+
+ # discard indices that are between thresholds
+ inds_to_discard = matched_idxs == self.proposal_matcher.BETWEEN_THRESHOLDS
+ labels_per_image[inds_to_discard] = torch.tensor(-1.0)
+
+ labels.append(labels_per_image)
+ matched_gt_boxes.append(matched_gt_boxes_per_image)
+ return labels, matched_gt_boxes
+
+ def _get_top_n_idx(self, objectness, num_anchors_per_level):
+ # type: (Tensor, List[int])
+ r = []
+ offset = 0
+ for ob in objectness.split(num_anchors_per_level, 1):
+ if torchvision._is_tracing():
+ num_anchors, pre_nms_top_n = _onnx_get_num_anchors_and_pre_nms_top_n(ob, self.pre_nms_top_n())
+ else:
+ num_anchors = ob.shape[1]
+ pre_nms_top_n = min(self.pre_nms_top_n(), num_anchors)
+ _, top_n_idx = ob.topk(pre_nms_top_n, dim=1)
+ r.append(top_n_idx + offset)
+ offset += num_anchors
+ return torch.cat(r, dim=1)
+
+ def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level):
+ # type: (Tensor, Tensor, List[Tuple[int, int]], List[int])
+ num_images = proposals.shape[0]
+ device = proposals.device
+ # do not backprop throught objectness
+ objectness = objectness.detach()
+ objectness = objectness.reshape(num_images, -1)
+
+ levels = [
+ torch.full((n,), idx, dtype=torch.int64, device=device)
+ for idx, n in enumerate(num_anchors_per_level)
+ ]
+ levels = torch.cat(levels, 0)
+ levels = levels.reshape(1, -1).expand_as(objectness)
+
+ # select top_n boxes independently per level before applying nms
+ top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)
+
+ image_range = torch.arange(num_images, device=device)
+ batch_idx = image_range[:, None]
+
+ objectness = objectness[batch_idx, top_n_idx]
+ levels = levels[batch_idx, top_n_idx]
+ proposals = proposals[batch_idx, top_n_idx]
+
+ final_boxes = []
+ final_scores = []
+ for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels, image_shapes):
+ boxes = box_ops.clip_boxes_to_image(boxes, img_shape)
+ keep = box_ops.remove_small_boxes(boxes, self.min_size)
+ boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]
+ # non-maximum suppression, independently done per level
+ keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)
+ # keep only topk scoring predictions
+ keep = keep[:self.post_nms_top_n()]
+ boxes, scores = boxes[keep], scores[keep]
+ final_boxes.append(boxes)
+ final_scores.append(scores)
+ return final_boxes, final_scores
+
+ def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets):
+ # type: (Tensor, Tensor, List[Tensor], List[Tensor])
+ """
+ Arguments:
+ objectness (Tensor)
+ pred_bbox_deltas (Tensor)
+ labels (List[Tensor])
+ regression_targets (List[Tensor])
+
+ Returns:
+ objectness_loss (Tensor)
+ box_loss (Tensor)
+ """
+
+ sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
+ sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
+ sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)
+
+ sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)
+
+ objectness = objectness.flatten()
+
+ labels = torch.cat(labels, dim=0)
+ regression_targets = torch.cat(regression_targets, dim=0)
+
+ box_loss = F.l1_loss(
+ pred_bbox_deltas[sampled_pos_inds],
+ regression_targets[sampled_pos_inds],
+ reduction="sum",
+ ) / (sampled_inds.numel())
+
+ objectness_loss = F.binary_cross_entropy_with_logits(
+ objectness[sampled_inds], labels[sampled_inds]
+ )
+
+ return objectness_loss, box_loss
+
+ def forward(self, images, features, targets=None):
+ # type: (ImageList, Dict[str, Tensor], Optional[List[Dict[str, Tensor]]])
+ """
+ Arguments:
+ images (ImageList): images for which we want to compute the predictions
+ features (List[Tensor]): features computed from the images that are
+ used for computing the predictions. Each tensor in the list
+ correspond to different feature levels
+ targets (List[Dict[Tensor]]): ground-truth boxes present in the image (optional).
+ If provided, each element in the dict should contain a field `boxes`,
+ with the locations of the ground-truth boxes.
+
+ Returns:
+ boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per
+ image.
+ losses (Dict[Tensor]): the losses for the model during training. During
+ testing, it is an empty dict.
+ """
+ # RPN uses all feature maps that are available
+ features = list(features.values())
+ objectness, pred_bbox_deltas = self.head(features)
+ anchors = self.anchor_generator(images, features)
+
+ num_images = len(anchors)
+ num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]
+ num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors]
+ objectness, pred_bbox_deltas = \
+ concat_box_prediction_layers(objectness, pred_bbox_deltas)
+ # apply pred_bbox_deltas to anchors to obtain the decoded proposals
+ # note that we detach the deltas because Faster R-CNN do not backprop through
+ # the proposals
+ proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors)
+ proposals = proposals.view(num_images, -1, 4)
+ boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level)
+
+ losses = {}
+ if self.training:
+ assert targets is not None
+ labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets)
+ regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)
+ loss_objectness, loss_rpn_box_reg = self.compute_loss(
+ objectness, pred_bbox_deltas, labels, regression_targets)
+ losses = {
+ "loss_objectness": loss_objectness,
+ "loss_rpn_box_reg": loss_rpn_box_reg,
+ }
+ return boxes, losses
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py
new file mode 100644
index 0000000000..91a5ae5cda
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py
@@ -0,0 +1,638 @@
+import warnings
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional, Tuple
+import pdb
+import torch
+import time
+import torch.nn.functional as F
+from torch import nn, Tensor
+import numpy as np
+import sys
+
+from ..._internally_replaced_utils import load_state_dict_from_url
+from ...ops import boxes as box_ops
+from ...utils import _log_api_usage_once
+from .. import vgg
+from . import _utils as det_utils
+from .anchor_utils import DefaultBoxGenerator
+from .backbone_utils import _validate_trainable_layers
+from .transform import GeneralizedRCNNTransform
+
+
+# for servers to immediately record the logs
+def flush_print(func):
+ def new_print(*args, **kwargs):
+ func(*args, **kwargs)
+ sys.stdout.flush()
+ return new_print
+print = flush_print(print)
+
+
+__all__ = ["SSD", "ssd300_vgg16"]
+
+model_urls = {
+ "ssd300_vgg16_coco": "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth",
+}
+
+backbone_urls = {
+ # We port the features of a VGG16 backbone trained by amdegroot because unlike the one on TorchVision, it uses the
+ # same input standardization method as the paper. Ref: https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth
+ # Only the `features` weights have proper values, those on the `classifier` module are filled with nans.
+ "vgg16_features": "https://download.pytorch.org/models/vgg16_features-amdegroot-88682ab5.pth"
+}
+
+
+def _xavier_init(conv: nn.Module):
+ for layer in conv.modules():
+ if isinstance(layer, nn.Conv2d):
+ torch.nn.init.xavier_uniform_(layer.weight)
+ if layer.bias is not None:
+ torch.nn.init.constant_(layer.bias, 0.0)
+
+
+class SSDHead(nn.Module):
+ def __init__(self, in_channels: List[int], num_anchors: List[int], num_classes: int):
+ super().__init__()
+ self.classification_head = SSDClassificationHead(in_channels, num_anchors, num_classes)
+ self.regression_head = SSDRegressionHead(in_channels, num_anchors)
+
+ def forward(self, x: List[Tensor]) -> Dict[str, Tensor]:
+ return {
+ "bbox_regression": self.regression_head(x),
+ "cls_logits": self.classification_head(x),
+ }
+
+
+class SSDScoringHead(nn.Module):
+ def __init__(self, module_list: nn.ModuleList, num_columns: int):
+ super().__init__()
+ self.module_list = module_list
+ self.num_columns = num_columns
+
+ def _get_result_from_module_list(self, x: Tensor, idx: int) -> Tensor:
+ """
+ This is equivalent to self.module_list[idx](x),
+ but torchscript doesn't support this yet
+ """
+ num_blocks = len(self.module_list)
+ if idx < 0:
+ idx += num_blocks
+ out = x
+ for i, module in enumerate(self.module_list):
+ if i == idx:
+ out = module(x)
+ return out
+
+ def forward(self, x: List[Tensor]) -> Tensor:
+ all_results = []
+
+ for i, features in enumerate(x):
+ results = self._get_result_from_module_list(features, i)
+
+ # Permute output from (N, A * K, H, W) to (N, HWA, K).
+ N, _, H, W = results.shape
+ results = results.view(N, -1, self.num_columns, H, W)
+ results = results.permute(0, 3, 4, 1, 2)
+ results = results.reshape(N, -1, self.num_columns) # Size=(N, HWA, K)
+
+ all_results.append(results)
+
+ return torch.cat(all_results, dim=1)
+
+
+class SSDClassificationHead(SSDScoringHead):
+ def __init__(self, in_channels: List[int], num_anchors: List[int], num_classes: int):
+ cls_logits = nn.ModuleList()
+ for channels, anchors in zip(in_channels, num_anchors):
+ cls_logits.append(nn.Conv2d(channels, num_classes * anchors, kernel_size=3, padding=1))
+ _xavier_init(cls_logits)
+ super().__init__(cls_logits, num_classes)
+
+
+class SSDRegressionHead(SSDScoringHead):
+ def __init__(self, in_channels: List[int], num_anchors: List[int]):
+ bbox_reg = nn.ModuleList()
+ for channels, anchors in zip(in_channels, num_anchors):
+ bbox_reg.append(nn.Conv2d(channels, 4 * anchors, kernel_size=3, padding=1))
+ _xavier_init(bbox_reg)
+ super().__init__(bbox_reg, 4)
+
+
+class SSD(nn.Module):
+ """
+ Implements SSD architecture from `"SSD: Single Shot MultiBox Detector" `_.
+
+ The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
+ image, and should be in 0-1 range. Different images can have different sizes but they will be resized
+ to a fixed size before passing it to the backbone.
+
+ The behavior of the model changes depending if it is in training or evaluation mode.
+
+ During training, the model expects both the input tensors, as well as a targets (list of dictionary),
+ containing:
+ - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
+ ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
+ - labels (Int64Tensor[N]): the class label for each ground-truth box
+
+ The model returns a Dict[Tensor] during training, containing the classification and regression
+ losses.
+
+ During inference, the model requires only the input tensors, and returns the post-processed
+ predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
+ follows, where ``N`` is the number of detections:
+
+ - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
+ ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
+ - labels (Int64Tensor[N]): the predicted labels for each detection
+ - scores (Tensor[N]): the scores for each detection
+
+ Args:
+ backbone (nn.Module): the network used to compute the features for the model.
+ It should contain an out_channels attribute with the list of the output channels of
+ each feature map. The backbone should return a single Tensor or an OrderedDict[Tensor].
+ anchor_generator (DefaultBoxGenerator): module that generates the default boxes for a
+ set of feature maps.
+ size (Tuple[int, int]): the width and height to which images will be rescaled before feeding them
+ to the backbone.
+ num_classes (int): number of output classes of the model (including the background).
+ image_mean (Tuple[float, float, float]): mean values used for input normalization.
+ They are generally the mean values of the dataset on which the backbone has been trained
+ on
+ image_std (Tuple[float, float, float]): std values used for input normalization.
+ They are generally the std values of the dataset on which the backbone has been trained on
+ head (nn.Module, optional): Module run on top of the backbone features. Defaults to a module containing
+ a classification and regression module.
+ score_thresh (float): Score threshold used for postprocessing the detections.
+ nms_thresh (float): NMS threshold used for postprocessing the detections.
+ detections_per_img (int): Number of best detections to keep after NMS.
+ iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
+ considered as positive during training.
+ topk_candidates (int): Number of best detections to keep before NMS.
+ positive_fraction (float): a number between 0 and 1 which indicates the proportion of positive
+ proposals used during the training of the classification head. It is used to estimate the negative to
+ positive ratio.
+ """
+
+ __annotations__ = {
+ "box_coder": det_utils.BoxCoder,
+ "proposal_matcher": det_utils.Matcher,
+ }
+
+ def __init__(
+ self,
+ backbone: nn.Module,
+ anchor_generator: DefaultBoxGenerator,
+ size: Tuple[int, int],
+ num_classes: int,
+ image_mean: Optional[List[float]] = None,
+ image_std: Optional[List[float]] = None,
+ head: Optional[nn.Module] = None,
+ score_thresh: float = 0.01,
+ nms_thresh: float = 0.45,
+ detections_per_img: int = 200,
+ iou_thresh: float = 0.5,
+ topk_candidates: int = 400,
+ positive_fraction: float = 0.25,
+ ):
+ super().__init__()
+ _log_api_usage_once(self)
+
+ self.backbone = backbone
+
+ self.anchor_generator = anchor_generator
+
+ self.box_coder = det_utils.BoxCoder(weights=(10.0, 10.0, 5.0, 5.0))
+
+ if head is None:
+ if hasattr(backbone, "out_channels"):
+ out_channels = backbone.out_channels
+ else:
+ out_channels = det_utils.retrieve_out_channels(backbone, size)
+
+ assert len(out_channels) == len(anchor_generator.aspect_ratios)
+
+ num_anchors = self.anchor_generator.num_anchors_per_location()
+ head = SSDHead(out_channels, num_anchors, num_classes)
+ self.head = head
+
+ self.proposal_matcher = det_utils.SSDMatcher(iou_thresh)
+
+ if image_mean is None:
+ image_mean = [0.485, 0.456, 0.406]
+ if image_std is None:
+ image_std = [0.229, 0.224, 0.225]
+ self.transform = GeneralizedRCNNTransform(
+ min(size), max(size), image_mean, image_std, size_divisible=1, fixed_size=size
+ )
+
+ self.score_thresh = score_thresh
+ self.nms_thresh = nms_thresh
+ self.detections_per_img = detections_per_img
+ self.topk_candidates = topk_candidates
+ self.neg_to_pos_ratio = (1.0 - positive_fraction) / positive_fraction
+
+ # used only on torchscript mode
+ self._has_warned = False
+
+ @torch.jit.unused
+ def eager_outputs(
+ self, losses: Dict[str, Tensor], detections: List[Dict[str, Tensor]]
+ ) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]:
+ if self.training:
+ return losses
+
+ return detections
+
+ def compute_loss(
+ self,
+ targets: List[Dict[str, Tensor]],
+ head_outputs: Dict[str, Tensor],
+ anchors: List[Tensor],
+ matched_idxs: List[Tensor],
+ ) -> Dict[str, Tensor]:
+ bbox_regression = head_outputs["bbox_regression"]
+ cls_logits = head_outputs["cls_logits"]
+
+ # Match original targets with default boxes
+ num_foreground = 0
+ bbox_loss = []
+ cls_targets = []
+ for (
+ targets_per_image,
+ bbox_regression_per_image,
+ cls_logits_per_image,
+ anchors_per_image,
+ matched_idxs_per_image,
+ ) in zip(targets, bbox_regression, cls_logits, anchors, matched_idxs):
+
+ # produce the matching between boxes and targets
+ np_matched_idxs_per_image = matched_idxs_per_image.to('cpu').numpy()
+ np_foreground_idxs_per_image = np.where(np_matched_idxs_per_image>=0)[0]
+ boxes_num = len(np_foreground_idxs_per_image)
+ fix_boxes_num = 0
+ if boxes_num <= 100:
+ fix_boxes_num = 100
+ elif boxes_num <= 1000:
+ fix_boxes_num = 1000
+ else:
+ fix_boxes_num = 3234
+
+ pad_idx = np.zeros(fix_boxes_num - boxes_num)
+ np_foreground_idxs_per_image = np.concatenate((np_foreground_idxs_per_image, pad_idx), axis=0)
+ foreground_idxs_per_image = torch.as_tensor(np_foreground_idxs_per_image,
+ dtype=matched_idxs_per_image.dtype,
+ device=matched_idxs_per_image.device)
+ foreground_matched_idxs_per_image = matched_idxs_per_image[foreground_idxs_per_image]
+ num_foreground += boxes_num
+
+ # Calculate regression loss
+ matched_gt_boxes_per_image = targets_per_image["boxes"][foreground_matched_idxs_per_image]
+ bbox_regression_per_image = bbox_regression_per_image[foreground_idxs_per_image, :]
+ anchors_per_image = anchors_per_image[foreground_idxs_per_image, :]
+ target_regression = self.box_coder.encode_single(matched_gt_boxes_per_image, anchors_per_image)
+ bbox_regression_per_image[boxes_num:,:] = target_regression[boxes_num:,:]
+ bbox_loss.append(
+ torch.nn.functional.smooth_l1_loss(bbox_regression_per_image, target_regression, reduction="sum")
+ )
+
+ # Estimate ground truth for class targets
+ gt_classes_target = torch.zeros(
+ (cls_logits_per_image.size(0),),
+ dtype=targets_per_image["labels"].dtype,
+ device=targets_per_image["labels"].device,
+ )
+ gt_classes_target[foreground_idxs_per_image] = targets_per_image["labels"][
+ foreground_matched_idxs_per_image
+ ]
+ cls_targets.append(gt_classes_target)
+
+ bbox_loss = torch.stack(bbox_loss)
+ cls_targets = torch.stack(cls_targets)
+
+ # Calculate classification loss
+ num_classes = cls_logits.size(-1)
+ cls_loss = F.cross_entropy(cls_logits.view(-1, num_classes), cls_targets.view(-1), reduction="none").view(
+ cls_targets.size()
+ )
+
+ # Hard Negative Sampling
+ foreground_idxs = cls_targets > 0
+ num_negative = self.neg_to_pos_ratio * foreground_idxs.sum(1, keepdim=True)
+ # num_negative[num_negative < self.neg_to_pos_ratio] = self.neg_to_pos_ratio
+ negative_loss = cls_loss.clone()
+ negative_loss[foreground_idxs] = -float(1) # use -inf to detect positive values that creeped in the sample
+ values, idx = negative_loss.sort(1, descending=True)
+ # background_idxs = torch.logical_and(idx.sort(1)[1] < num_negative, torch.isfinite(values))
+ background_idxs = idx.sort(1)[1] < num_negative
+
+ N = max(1, num_foreground)
+ return {
+ "bbox_regression": bbox_loss.sum() / N,
+ "classification": ((cls_loss*foreground_idxs).sum() + (cls_loss*background_idxs).sum()) / N,
+ }
+
+ def forward(
+ self, images: List[Tensor], targets: Optional[List[Dict[str, Tensor]]] = None
+ ) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]:
+ if self.training and targets is None:
+ raise ValueError("In training mode, targets should be passed")
+
+ if self.training:
+ assert targets is not None
+ for target in targets:
+ boxes = target["boxes"]
+ if isinstance(boxes, torch.Tensor):
+ if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
+ raise ValueError(f"Expected target boxes to be a tensor of shape [N, 4], got {boxes.shape}.")
+ else:
+ raise ValueError(f"Expected target boxes to be of type Tensor, got {type(boxes)}.")
+ else:
+ original_image_sizes = targets
+
+ # get the features from the backbone
+ features = self.backbone(images.tensors)
+ if isinstance(features, torch.Tensor):
+ features = OrderedDict([("0", features)])
+
+ features = list(features.values())
+
+ # compute the ssd heads outputs using the features
+ head_outputs = self.head(features)
+
+ # create the set of anchors
+ anchors = self.anchor_generator(images, features)
+
+ losses = {}
+ detections: List[Dict[str, Tensor]] = []
+ if self.training:
+ assert targets is not None
+ matched_idxs = []
+ for anchors_per_image, targets_per_image in zip(anchors, targets):
+ if targets_per_image["boxes"].numel() == 0:
+ matched_idxs.append(
+ torch.full((anchors_per_image.size(0),), -1, dtype=torch.int64, device=anchors_per_image.device)
+ )
+ continue
+ match_quality_matrix = box_ops.box_iou(targets_per_image["boxes"], anchors_per_image)
+ matched_idxs.append(self.proposal_matcher(match_quality_matrix))
+ losses = self.compute_loss(targets, head_outputs, anchors, matched_idxs)
+ else:
+ detections = self.postprocess_detections(head_outputs, anchors, images.image_sizes)
+
+ detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)
+
+ if torch.jit.is_scripting():
+ if not self._has_warned:
+ warnings.warn("SSD always returns a (Losses, Detections) tuple in scripting")
+ self._has_warned = True
+ return losses, detections
+ return self.eager_outputs(losses, detections)
+
+ def postprocess_detections(
+ self, head_outputs: Dict[str, Tensor], image_anchors: List[Tensor], image_shapes: List[Tuple[int, int]]
+ ) -> List[Dict[str, Tensor]]:
+ bbox_regression = head_outputs["bbox_regression"]
+ pred_scores = F.softmax(head_outputs["cls_logits"], dim=-1)
+
+ num_classes = pred_scores.size(-1)
+ device = pred_scores.device
+
+ detections: List[Dict[str, Tensor]] = []
+
+ for boxes, scores, anchors, image_shape in zip(bbox_regression, pred_scores, image_anchors, image_shapes):
+ boxes = self.box_coder.decode_single(boxes, anchors)
+ boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
+
+ image_boxes = []
+ image_scores = []
+ image_labels = []
+ for label in range(1, num_classes):
+ score = scores[:, label]
+
+ keep_idxs = score > self.score_thresh
+ # score = score[keep_idxs]
+ # box = boxes[keep_idxs]
+ score = score * keep_idxs
+ box = boxes * keep_idxs.unsqueeze(1).repeat(1, 4)
+
+ # keep only topk scoring predictions
+ num_topk = min(self.topk_candidates, score.size(0))
+ score, idxs = score.topk(num_topk)
+ box = box[idxs]
+
+ image_boxes.append(box)
+ image_scores.append(score)
+ image_labels.append(torch.full_like(score, fill_value=label, dtype=torch.int64, device=device))
+
+ image_boxes = torch.cat(image_boxes, dim=0)
+ image_scores = torch.cat(image_scores, dim=0)
+ image_labels = torch.cat(image_labels, dim=0)
+
+ # non-maximum suppression
+ keep = box_ops.batched_nms(image_boxes, image_scores, image_labels, self.nms_thresh)
+ keep = keep[: self.detections_per_img]
+
+ detections.append(
+ {
+ "boxes": image_boxes[keep],
+ "scores": image_scores[keep],
+ "labels": image_labels[keep],
+ }
+ )
+ return detections
+
+
+class SSDFeatureExtractorVGG(nn.Module):
+ def __init__(self, backbone: nn.Module, highres: bool):
+ super().__init__()
+
+ _, _, maxpool3_pos, maxpool4_pos, _ = (i for i, layer in enumerate(backbone) if isinstance(layer, nn.MaxPool2d))
+
+ # Patch ceil_mode for maxpool3 to get the same WxH output sizes as the paper
+ backbone[maxpool3_pos].ceil_mode = True
+
+ # parameters used for L2 regularization + rescaling
+ self.scale_weight = nn.Parameter(torch.ones(512) * 20)
+
+ # Multiple Feature maps - page 4, Fig 2 of SSD paper
+ self.features = nn.Sequential(*backbone[:maxpool4_pos]) # until conv4_3
+
+ # SSD300 case - page 4, Fig 2 of SSD paper
+ extra = nn.ModuleList(
+ [
+ nn.Sequential(
+ nn.Conv2d(1024, 256, kernel_size=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(256, 512, kernel_size=3, padding=1, stride=2), # conv8_2
+ nn.ReLU(inplace=True),
+ ),
+ nn.Sequential(
+ nn.Conv2d(512, 128, kernel_size=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2), # conv9_2
+ nn.ReLU(inplace=True),
+ ),
+ nn.Sequential(
+ nn.Conv2d(256, 128, kernel_size=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(128, 256, kernel_size=3), # conv10_2
+ nn.ReLU(inplace=True),
+ ),
+ nn.Sequential(
+ nn.Conv2d(256, 128, kernel_size=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(128, 256, kernel_size=3), # conv11_2
+ nn.ReLU(inplace=True),
+ ),
+ ]
+ )
+ if highres:
+ # Additional layers for the SSD512 case. See page 11, footernote 5.
+ extra.append(
+ nn.Sequential(
+ nn.Conv2d(256, 128, kernel_size=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(128, 256, kernel_size=4), # conv12_2
+ nn.ReLU(inplace=True),
+ )
+ )
+ _xavier_init(extra)
+
+ fc = nn.Sequential(
+ nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=False), # add modified maxpool5
+ nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, padding=6, dilation=6), # FC6 with atrous
+ nn.ReLU(inplace=True),
+ nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=1), # FC7
+ nn.ReLU(inplace=True),
+ )
+ _xavier_init(fc)
+ extra.insert(
+ 0,
+ nn.Sequential(
+ *backbone[maxpool4_pos:-1], # until conv5_3, skip maxpool5
+ fc,
+ ),
+ )
+ self.extra = extra
+
+ def forward(self, x: Tensor) -> Dict[str, Tensor]:
+ # L2 regularization + Rescaling of 1st block's feature map
+ x = self.features(x)
+ rescaled = self.scale_weight.view(1, -1, 1, 1) * F.normalize(x)
+ output = [rescaled]
+
+ # Calculating Feature maps for the rest blocks
+ for block in self.extra:
+ x = block(x)
+ output.append(x)
+
+ return OrderedDict([(str(i), v) for i, v in enumerate(output)])
+
+
+def _vgg_extractor(backbone: vgg.VGG, highres: bool, trainable_layers: int):
+ backbone = backbone.features
+ # Gather the indices of maxpools. These are the locations of output blocks.
+ stage_indices = [0] + [i for i, b in enumerate(backbone) if isinstance(b, nn.MaxPool2d)][:-1]
+ num_stages = len(stage_indices)
+
+ # find the index of the layer from which we wont freeze
+ assert 0 <= trainable_layers <= num_stages
+ freeze_before = len(backbone) if trainable_layers == 0 else stage_indices[num_stages - trainable_layers]
+
+ for b in backbone[:freeze_before]:
+ for parameter in b.parameters():
+ parameter.requires_grad_(False)
+
+ return SSDFeatureExtractorVGG(backbone, highres)
+
+
+def ssd300_vgg16(
+ pretrained: bool = False,
+ progress: bool = True,
+ num_classes: int = 91,
+ pretrained_backbone: bool = True,
+ trainable_backbone_layers: Optional[int] = None,
+ **kwargs: Any,
+):
+ """Constructs an SSD model with input size 300x300 and a VGG16 backbone.
+
+ Reference: `"SSD: Single Shot MultiBox Detector" `_.
+
+ The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
+ image, and should be in 0-1 range. Different images can have different sizes but they will be resized
+ to a fixed size before passing it to the backbone.
+
+ The behavior of the model changes depending if it is in training or evaluation mode.
+
+ During training, the model expects both the input tensors, as well as a targets (list of dictionary),
+ containing:
+
+ - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
+ ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
+ - labels (Int64Tensor[N]): the class label for each ground-truth box
+
+ The model returns a Dict[Tensor] during training, containing the classification and regression
+ losses.
+
+ During inference, the model requires only the input tensors, and returns the post-processed
+ predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
+ follows, where ``N`` is the number of detections:
+
+ - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
+ ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
+ - labels (Int64Tensor[N]): the predicted labels for each detection
+ - scores (Tensor[N]): the scores for each detection
+
+ Example:
+
+ >>> model = torchvision.models.detection.ssd300_vgg16(pretrained=True)
+ >>> model.eval()
+ >>> x = [torch.rand(3, 300, 300), torch.rand(3, 500, 400)]
+ >>> predictions = model(x)
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on COCO train2017
+ progress (bool): If True, displays a progress bar of the download to stderr
+ num_classes (int): number of output classes of the model (including the background)
+ pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
+ trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
+ Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
+ """
+ if "size" in kwargs:
+ warnings.warn("The size of the model is already fixed; ignoring the argument.")
+
+ trainable_backbone_layers = _validate_trainable_layers(
+ pretrained or pretrained_backbone, trainable_backbone_layers, 5, 4
+ )
+
+ if pretrained:
+ # no need to download the backbone if pretrained is set
+ pretrained_backbone = False
+
+ # Use custom backbones more appropriate for SSD
+ backbone = vgg.vgg16(pretrained=False, progress=progress)
+ if pretrained_backbone:
+ state_dict = load_state_dict_from_url(backbone_urls["vgg16_features"], progress=progress)
+ backbone.load_state_dict(state_dict)
+
+ backbone = _vgg_extractor(backbone, False, trainable_backbone_layers)
+ anchor_generator = DefaultBoxGenerator(
+ [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
+ scales=[0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05],
+ steps=[8, 16, 32, 64, 100, 300],
+ )
+
+ defaults = {
+ # Rescale the input in a way compatible to the backbone
+ "image_mean": [0.48235, 0.45882, 0.40784],
+ "image_std": [1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0], # undo the 0-1 scaling of toTensor
+ }
+ kwargs = {**defaults, **kwargs}
+ model = SSD(backbone, anchor_generator, (300, 300), num_classes, **kwargs)
+ if pretrained:
+ weights_name = "ssd300_vgg16_coco"
+ if model_urls.get(weights_name, None) is None:
+ raise ValueError(f"No checkpoint is available for model {weights_name}")
+ state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress)
+ model.load_state_dict(state_dict)
+ return model
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py
new file mode 100644
index 0000000000..be30bb54c4
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py
@@ -0,0 +1,629 @@
+import warnings
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional, Tuple
+
+import torch
+import torch.nn.functional as F
+from torch import nn, Tensor
+
+from ..._internally_replaced_utils import load_state_dict_from_url
+from ...ops import boxes as box_ops
+from ...utils import _log_api_usage_once
+from .. import vgg
+from . import _utils as det_utils
+from .anchor_utils import DefaultBoxGenerator
+from .backbone_utils import _validate_trainable_layers
+from .transform import GeneralizedRCNNTransform
+
+__all__ = ["SSD", "ssd300_vgg16"]
+
+model_urls = {
+ "ssd300_vgg16_coco": "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth",
+}
+
+backbone_urls = {
+ # We port the features of a VGG16 backbone trained by amdegroot because unlike the one on TorchVision, it uses the
+ # same input standardization method as the paper. Ref: https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth
+ # Only the `features` weights have proper values, those on the `classifier` module are filled with nans.
+ "vgg16_features": "https://download.pytorch.org/models/vgg16_features-amdegroot-88682ab5.pth"
+}
+
+
+def _xavier_init(conv: nn.Module):
+ for layer in conv.modules():
+ if isinstance(layer, nn.Conv2d):
+ torch.nn.init.xavier_uniform_(layer.weight)
+ if layer.bias is not None:
+ torch.nn.init.constant_(layer.bias, 0.0)
+
+
+class SSDHead(nn.Module):
+ def __init__(self, in_channels: List[int], num_anchors: List[int], num_classes: int):
+ super().__init__()
+ self.classification_head = SSDClassificationHead(in_channels, num_anchors, num_classes)
+ self.regression_head = SSDRegressionHead(in_channels, num_anchors)
+
+ def forward(self, x: List[Tensor]) -> Dict[str, Tensor]:
+ return {
+ "bbox_regression": self.regression_head(x),
+ "cls_logits": self.classification_head(x),
+ }
+
+
+class SSDScoringHead(nn.Module):
+ def __init__(self, module_list: nn.ModuleList, num_columns: int):
+ super().__init__()
+ self.module_list = module_list
+ self.num_columns = num_columns
+
+ def _get_result_from_module_list(self, x: Tensor, idx: int) -> Tensor:
+ """
+ This is equivalent to self.module_list[idx](x),
+ but torchscript doesn't support this yet
+ """
+ num_blocks = len(self.module_list)
+ if idx < 0:
+ idx += num_blocks
+ out = x
+ for i, module in enumerate(self.module_list):
+ if i == idx:
+ out = module(x)
+ return out
+
+ def forward(self, x: List[Tensor]) -> Tensor:
+ all_results = []
+
+ for i, features in enumerate(x):
+ results = self._get_result_from_module_list(features, i)
+
+ # Permute output from (N, A * K, H, W) to (N, HWA, K).
+ N, _, H, W = results.shape
+ results = results.view(N, -1, self.num_columns, H, W)
+ results = results.permute(0, 3, 4, 1, 2)
+ results = results.reshape(N, -1, self.num_columns) # Size=(N, HWA, K)
+
+ all_results.append(results)
+
+ return torch.cat(all_results, dim=1)
+
+
+class SSDClassificationHead(SSDScoringHead):
+ def __init__(self, in_channels: List[int], num_anchors: List[int], num_classes: int):
+ cls_logits = nn.ModuleList()
+ for channels, anchors in zip(in_channels, num_anchors):
+ cls_logits.append(nn.Conv2d(channels, num_classes * anchors, kernel_size=3, padding=1))
+ _xavier_init(cls_logits)
+ super().__init__(cls_logits, num_classes)
+
+
+class SSDRegressionHead(SSDScoringHead):
+ def __init__(self, in_channels: List[int], num_anchors: List[int]):
+ bbox_reg = nn.ModuleList()
+ for channels, anchors in zip(in_channels, num_anchors):
+ bbox_reg.append(nn.Conv2d(channels, 4 * anchors, kernel_size=3, padding=1))
+ _xavier_init(bbox_reg)
+ super().__init__(bbox_reg, 4)
+
+
+class SSD(nn.Module):
+ """
+ Implements SSD architecture from `"SSD: Single Shot MultiBox Detector" `_.
+
+ The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
+ image, and should be in 0-1 range. Different images can have different sizes but they will be resized
+ to a fixed size before passing it to the backbone.
+
+ The behavior of the model changes depending if it is in training or evaluation mode.
+
+ During training, the model expects both the input tensors, as well as a targets (list of dictionary),
+ containing:
+ - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
+ ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
+ - labels (Int64Tensor[N]): the class label for each ground-truth box
+
+ The model returns a Dict[Tensor] during training, containing the classification and regression
+ losses.
+
+ During inference, the model requires only the input tensors, and returns the post-processed
+ predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
+ follows, where ``N`` is the number of detections:
+
+ - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
+ ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
+ - labels (Int64Tensor[N]): the predicted labels for each detection
+ - scores (Tensor[N]): the scores for each detection
+
+ Args:
+ backbone (nn.Module): the network used to compute the features for the model.
+ It should contain an out_channels attribute with the list of the output channels of
+ each feature map. The backbone should return a single Tensor or an OrderedDict[Tensor].
+ anchor_generator (DefaultBoxGenerator): module that generates the default boxes for a
+ set of feature maps.
+ size (Tuple[int, int]): the width and height to which images will be rescaled before feeding them
+ to the backbone.
+ num_classes (int): number of output classes of the model (including the background).
+ image_mean (Tuple[float, float, float]): mean values used for input normalization.
+ They are generally the mean values of the dataset on which the backbone has been trained
+ on
+ image_std (Tuple[float, float, float]): std values used for input normalization.
+ They are generally the std values of the dataset on which the backbone has been trained on
+ head (nn.Module, optional): Module run on top of the backbone features. Defaults to a module containing
+ a classification and regression module.
+ score_thresh (float): Score threshold used for postprocessing the detections.
+ nms_thresh (float): NMS threshold used for postprocessing the detections.
+ detections_per_img (int): Number of best detections to keep after NMS.
+ iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
+ considered as positive during training.
+ topk_candidates (int): Number of best detections to keep before NMS.
+ positive_fraction (float): a number between 0 and 1 which indicates the proportion of positive
+ proposals used during the training of the classification head. It is used to estimate the negative to
+ positive ratio.
+ """
+
+ __annotations__ = {
+ "box_coder": det_utils.BoxCoder,
+ "proposal_matcher": det_utils.Matcher,
+ }
+
+ def __init__(
+ self,
+ backbone: nn.Module,
+ anchor_generator: DefaultBoxGenerator,
+ size: Tuple[int, int],
+ num_classes: int,
+ image_mean: Optional[List[float]] = None,
+ image_std: Optional[List[float]] = None,
+ head: Optional[nn.Module] = None,
+ score_thresh: float = 0.01,
+ nms_thresh: float = 0.45,
+ detections_per_img: int = 200,
+ iou_thresh: float = 0.5,
+ topk_candidates: int = 400,
+ positive_fraction: float = 0.25,
+ ):
+ super().__init__()
+ _log_api_usage_once(self)
+
+ self.backbone = backbone
+
+ self.anchor_generator = anchor_generator
+
+ self.box_coder = det_utils.BoxCoder(weights=(10.0, 10.0, 5.0, 5.0))
+
+ if head is None:
+ if hasattr(backbone, "out_channels"):
+ out_channels = backbone.out_channels
+ else:
+ out_channels = det_utils.retrieve_out_channels(backbone, size)
+
+ assert len(out_channels) == len(anchor_generator.aspect_ratios)
+
+ num_anchors = self.anchor_generator.num_anchors_per_location()
+ head = SSDHead(out_channels, num_anchors, num_classes)
+ self.head = head
+
+ self.proposal_matcher = det_utils.SSDMatcher(iou_thresh)
+
+ if image_mean is None:
+ image_mean = [0.485, 0.456, 0.406]
+ if image_std is None:
+ image_std = [0.229, 0.224, 0.225]
+ self.transform = GeneralizedRCNNTransform(
+ min(size), max(size), image_mean, image_std, size_divisible=1, fixed_size=size
+ )
+
+ self.score_thresh = score_thresh
+ self.nms_thresh = nms_thresh
+ self.detections_per_img = detections_per_img
+ self.topk_candidates = topk_candidates
+ self.neg_to_pos_ratio = (1.0 - positive_fraction) / positive_fraction
+
+ # used only on torchscript mode
+ self._has_warned = False
+
+ @torch.jit.unused
+ def eager_outputs(
+ self, losses: Dict[str, Tensor], detections: List[Dict[str, Tensor]]
+ ) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]:
+ if self.training:
+ return losses
+
+ return detections
+
+ def compute_loss(
+ self,
+ targets: List[Dict[str, Tensor]],
+ head_outputs: Dict[str, Tensor],
+ anchors: List[Tensor],
+ matched_idxs: List[Tensor],
+ ) -> Dict[str, Tensor]:
+ bbox_regression = head_outputs["bbox_regression"]
+ cls_logits = head_outputs["cls_logits"]
+
+ # Match original targets with default boxes
+ num_foreground = 0
+ bbox_loss = []
+ cls_targets = []
+ for (
+ targets_per_image,
+ bbox_regression_per_image,
+ cls_logits_per_image,
+ anchors_per_image,
+ matched_idxs_per_image,
+ ) in zip(targets, bbox_regression, cls_logits, anchors, matched_idxs):
+ # produce the matching between boxes and targets
+ foreground_idxs_per_image = torch.where(matched_idxs_per_image >= 0)[0]
+ foreground_matched_idxs_per_image = matched_idxs_per_image[foreground_idxs_per_image]
+ num_foreground += foreground_matched_idxs_per_image.numel()
+
+ # Calculate regression loss
+ matched_gt_boxes_per_image = targets_per_image["boxes"][foreground_matched_idxs_per_image]
+ bbox_regression_per_image = bbox_regression_per_image[foreground_idxs_per_image, :]
+ anchors_per_image = anchors_per_image[foreground_idxs_per_image, :]
+ target_regression = self.box_coder.encode_single(matched_gt_boxes_per_image, anchors_per_image)
+ bbox_loss.append(
+ torch.nn.functional.smooth_l1_loss(bbox_regression_per_image, target_regression, reduction="sum")
+ )
+
+ # Estimate ground truth for class targets
+ gt_classes_target = torch.zeros(
+ (cls_logits_per_image.size(0),),
+ dtype=targets_per_image["labels"].dtype,
+ device=targets_per_image["labels"].device,
+ )
+ gt_classes_target[foreground_idxs_per_image] = targets_per_image["labels"][
+ foreground_matched_idxs_per_image
+ ]
+ cls_targets.append(gt_classes_target)
+
+ bbox_loss = torch.stack(bbox_loss)
+ cls_targets = torch.stack(cls_targets)
+
+ # Calculate classification loss
+ num_classes = cls_logits.size(-1)
+ cls_loss = F.cross_entropy(cls_logits.view(-1, num_classes), cls_targets.view(-1), reduction="none").view(
+ cls_targets.size()
+ )
+
+ # Hard Negative Sampling
+ foreground_idxs = cls_targets > 0
+ num_negative = self.neg_to_pos_ratio * foreground_idxs.sum(1, keepdim=True)
+ # num_negative[num_negative < self.neg_to_pos_ratio] = self.neg_to_pos_ratio
+ negative_loss = cls_loss.clone()
+ negative_loss[foreground_idxs] = -float("inf") # use -inf to detect positive values that creeped in the sample
+ values, idx = negative_loss.sort(1, descending=True)
+ # background_idxs = torch.logical_and(idx.sort(1)[1] < num_negative, torch.isfinite(values))
+ background_idxs = idx.sort(1)[1] < num_negative
+
+ N = max(1, num_foreground)
+ return {
+ "bbox_regression": bbox_loss.sum() / N,
+ "classification": (cls_loss[foreground_idxs].sum() + cls_loss[background_idxs].sum()) / N,
+ }
+
+ def forward(
+ self, images: List[Tensor], targets: Optional[List[Dict[str, Tensor]]] = None
+ ) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]:
+ if self.training and targets is None:
+ raise ValueError("In training mode, targets should be passed")
+
+ if self.training:
+ assert targets is not None
+ for target in targets:
+ boxes = target["boxes"]
+ if isinstance(boxes, torch.Tensor):
+ if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
+ raise ValueError(f"Expected target boxes to be a tensor of shape [N, 4], got {boxes.shape}.")
+ else:
+ raise ValueError(f"Expected target boxes to be of type Tensor, got {type(boxes)}.")
+
+ # get the original image sizes
+ original_image_sizes: List[Tuple[int, int]] = []
+ for img in images:
+ val = img.shape[-2:]
+ assert len(val) == 2
+ original_image_sizes.append((val[0], val[1]))
+
+ # transform the input
+ images, targets = self.transform(images, targets)
+
+ # Check for degenerate boxes
+ if targets is not None:
+ for target_idx, target in enumerate(targets):
+ boxes = target["boxes"]
+ degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]
+ if degenerate_boxes.any():
+ bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0]
+ degen_bb: List[float] = boxes[bb_idx].tolist()
+ raise ValueError(
+ "All bounding boxes should have positive height and width."
+ f" Found invalid box {degen_bb} for target at index {target_idx}."
+ )
+
+ # get the features from the backbone
+ features = self.backbone(images.tensors)
+ if isinstance(features, torch.Tensor):
+ features = OrderedDict([("0", features)])
+
+ features = list(features.values())
+
+ # compute the ssd heads outputs using the features
+ head_outputs = self.head(features)
+
+ # create the set of anchors
+ anchors = self.anchor_generator(images, features)
+
+ losses = {}
+ detections: List[Dict[str, Tensor]] = []
+ if self.training:
+ assert targets is not None
+
+ matched_idxs = []
+ for anchors_per_image, targets_per_image in zip(anchors, targets):
+ if targets_per_image["boxes"].numel() == 0:
+ matched_idxs.append(
+ torch.full((anchors_per_image.size(0),), -1, dtype=torch.int64, device=anchors_per_image.device)
+ )
+ continue
+
+ match_quality_matrix = box_ops.box_iou(targets_per_image["boxes"], anchors_per_image)
+ matched_idxs.append(self.proposal_matcher(match_quality_matrix))
+
+ losses = self.compute_loss(targets, head_outputs, anchors, matched_idxs)
+ else:
+ detections = self.postprocess_detections(head_outputs, anchors, images.image_sizes)
+ detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)
+
+ if torch.jit.is_scripting():
+ if not self._has_warned:
+ warnings.warn("SSD always returns a (Losses, Detections) tuple in scripting")
+ self._has_warned = True
+ return losses, detections
+ return self.eager_outputs(losses, detections)
+
+ def postprocess_detections(
+ self, head_outputs: Dict[str, Tensor], image_anchors: List[Tensor], image_shapes: List[Tuple[int, int]]
+ ) -> List[Dict[str, Tensor]]:
+ bbox_regression = head_outputs["bbox_regression"]
+ pred_scores = F.softmax(head_outputs["cls_logits"], dim=-1)
+
+ num_classes = pred_scores.size(-1)
+ device = pred_scores.device
+
+ detections: List[Dict[str, Tensor]] = []
+
+ for boxes, scores, anchors, image_shape in zip(bbox_regression, pred_scores, image_anchors, image_shapes):
+ boxes = self.box_coder.decode_single(boxes, anchors)
+ boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
+
+ image_boxes = []
+ image_scores = []
+ image_labels = []
+ for label in range(1, num_classes):
+ score = scores[:, label]
+
+ keep_idxs = score > self.score_thresh
+ score = score[keep_idxs]
+ box = boxes[keep_idxs]
+
+ # keep only topk scoring predictions
+ num_topk = min(self.topk_candidates, score.size(0))
+ score, idxs = score.topk(num_topk)
+ box = box[idxs]
+
+ image_boxes.append(box)
+ image_scores.append(score)
+ image_labels.append(torch.full_like(score, fill_value=label, dtype=torch.int64, device=device))
+
+ image_boxes = torch.cat(image_boxes, dim=0)
+ image_scores = torch.cat(image_scores, dim=0)
+ image_labels = torch.cat(image_labels, dim=0)
+
+ # non-maximum suppression
+ keep = box_ops.batched_nms(image_boxes, image_scores, image_labels, self.nms_thresh)
+ keep = keep[: self.detections_per_img]
+
+ detections.append(
+ {
+ "boxes": image_boxes[keep],
+ "scores": image_scores[keep],
+ "labels": image_labels[keep],
+ }
+ )
+ return detections
+
+
+class SSDFeatureExtractorVGG(nn.Module):
+ def __init__(self, backbone: nn.Module, highres: bool):
+ super().__init__()
+
+ _, _, maxpool3_pos, maxpool4_pos, _ = (i for i, layer in enumerate(backbone) if isinstance(layer, nn.MaxPool2d))
+
+ # Patch ceil_mode for maxpool3 to get the same WxH output sizes as the paper
+ backbone[maxpool3_pos].ceil_mode = True
+
+ # parameters used for L2 regularization + rescaling
+ self.scale_weight = nn.Parameter(torch.ones(512) * 20)
+
+ # Multiple Feature maps - page 4, Fig 2 of SSD paper
+ self.features = nn.Sequential(*backbone[:maxpool4_pos]) # until conv4_3
+
+ # SSD300 case - page 4, Fig 2 of SSD paper
+ extra = nn.ModuleList(
+ [
+ nn.Sequential(
+ nn.Conv2d(1024, 256, kernel_size=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(256, 512, kernel_size=3, padding=1, stride=2), # conv8_2
+ nn.ReLU(inplace=True),
+ ),
+ nn.Sequential(
+ nn.Conv2d(512, 128, kernel_size=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2), # conv9_2
+ nn.ReLU(inplace=True),
+ ),
+ nn.Sequential(
+ nn.Conv2d(256, 128, kernel_size=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(128, 256, kernel_size=3), # conv10_2
+ nn.ReLU(inplace=True),
+ ),
+ nn.Sequential(
+ nn.Conv2d(256, 128, kernel_size=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(128, 256, kernel_size=3), # conv11_2
+ nn.ReLU(inplace=True),
+ ),
+ ]
+ )
+ if highres:
+ # Additional layers for the SSD512 case. See page 11, footernote 5.
+ extra.append(
+ nn.Sequential(
+ nn.Conv2d(256, 128, kernel_size=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(128, 256, kernel_size=4), # conv12_2
+ nn.ReLU(inplace=True),
+ )
+ )
+ _xavier_init(extra)
+
+ fc = nn.Sequential(
+ nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=False), # add modified maxpool5
+ nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, padding=6, dilation=6), # FC6 with atrous
+ nn.ReLU(inplace=True),
+ nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=1), # FC7
+ nn.ReLU(inplace=True),
+ )
+ _xavier_init(fc)
+ extra.insert(
+ 0,
+ nn.Sequential(
+ *backbone[maxpool4_pos:-1], # until conv5_3, skip maxpool5
+ fc,
+ ),
+ )
+ self.extra = extra
+
+ def forward(self, x: Tensor) -> Dict[str, Tensor]:
+ # L2 regularization + Rescaling of 1st block's feature map
+ x = self.features(x)
+ rescaled = self.scale_weight.view(1, -1, 1, 1) * F.normalize(x)
+ output = [rescaled]
+
+ # Calculating Feature maps for the rest blocks
+ for block in self.extra:
+ x = block(x)
+ output.append(x)
+
+ return OrderedDict([(str(i), v) for i, v in enumerate(output)])
+
+
+def _vgg_extractor(backbone: vgg.VGG, highres: bool, trainable_layers: int):
+ backbone = backbone.features
+ # Gather the indices of maxpools. These are the locations of output blocks.
+ stage_indices = [0] + [i for i, b in enumerate(backbone) if isinstance(b, nn.MaxPool2d)][:-1]
+ num_stages = len(stage_indices)
+
+ # find the index of the layer from which we wont freeze
+ assert 0 <= trainable_layers <= num_stages
+ freeze_before = len(backbone) if trainable_layers == 0 else stage_indices[num_stages - trainable_layers]
+
+ for b in backbone[:freeze_before]:
+ for parameter in b.parameters():
+ parameter.requires_grad_(False)
+
+ return SSDFeatureExtractorVGG(backbone, highres)
+
+
+def ssd300_vgg16(
+ pretrained: bool = False,
+ progress: bool = True,
+ num_classes: int = 91,
+ pretrained_backbone: bool = True,
+ trainable_backbone_layers: Optional[int] = None,
+ **kwargs: Any,
+):
+ """Constructs an SSD model with input size 300x300 and a VGG16 backbone.
+
+ Reference: `"SSD: Single Shot MultiBox Detector" `_.
+
+ The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
+ image, and should be in 0-1 range. Different images can have different sizes but they will be resized
+ to a fixed size before passing it to the backbone.
+
+ The behavior of the model changes depending if it is in training or evaluation mode.
+
+ During training, the model expects both the input tensors, as well as a targets (list of dictionary),
+ containing:
+
+ - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
+ ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
+ - labels (Int64Tensor[N]): the class label for each ground-truth box
+
+ The model returns a Dict[Tensor] during training, containing the classification and regression
+ losses.
+
+ During inference, the model requires only the input tensors, and returns the post-processed
+ predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
+ follows, where ``N`` is the number of detections:
+
+ - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
+ ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
+ - labels (Int64Tensor[N]): the predicted labels for each detection
+ - scores (Tensor[N]): the scores for each detection
+
+ Example:
+
+ >>> model = torchvision.models.detection.ssd300_vgg16(pretrained=True)
+ >>> model.eval()
+ >>> x = [torch.rand(3, 300, 300), torch.rand(3, 500, 400)]
+ >>> predictions = model(x)
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on COCO train2017
+ progress (bool): If True, displays a progress bar of the download to stderr
+ num_classes (int): number of output classes of the model (including the background)
+ pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
+ trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
+ Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable.
+ """
+ if "size" in kwargs:
+ warnings.warn("The size of the model is already fixed; ignoring the argument.")
+
+ trainable_backbone_layers = _validate_trainable_layers(
+ pretrained or pretrained_backbone, trainable_backbone_layers, 5, 4
+ )
+
+ if pretrained:
+ # no need to download the backbone if pretrained is set
+ pretrained_backbone = False
+
+ # Use custom backbones more appropriate for SSD
+ backbone = vgg.vgg16(pretrained=False, progress=progress)
+ if pretrained_backbone:
+ state_dict = load_state_dict_from_url(backbone_urls["vgg16_features"], progress=progress)
+ backbone.load_state_dict(state_dict)
+
+ backbone = _vgg_extractor(backbone, False, trainable_backbone_layers)
+ anchor_generator = DefaultBoxGenerator(
+ [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
+ scales=[0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05],
+ steps=[8, 16, 32, 64, 100, 300],
+ )
+
+ defaults = {
+ # Rescale the input in a way compatible to the backbone
+ "image_mean": [0.48235, 0.45882, 0.40784],
+ "image_std": [1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0], # undo the 0-1 scaling of toTensor
+ }
+ kwargs = {**defaults, **kwargs}
+ model = SSD(backbone, anchor_generator, (300, 300), num_classes, **kwargs)
+ if pretrained:
+ weights_name = "ssd300_vgg16_coco"
+ if model_urls.get(weights_name, None) is None:
+ raise ValueError(f"No checkpoint is available for model {weights_name}")
+ state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress)
+ model.load_state_dict(state_dict)
+ return model
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py
new file mode 100644
index 0000000000..652d3afe4d
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py
@@ -0,0 +1,274 @@
+import warnings
+from collections import OrderedDict
+from functools import partial
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import torch
+from torch import nn, Tensor
+
+from ..._internally_replaced_utils import load_state_dict_from_url
+from ...ops.misc import ConvNormActivation
+from ...utils import _log_api_usage_once
+from .. import mobilenet
+from . import _utils as det_utils
+from .anchor_utils import DefaultBoxGenerator
+from .backbone_utils import _validate_trainable_layers
+from .ssd import SSD, SSDScoringHead
+
+
+__all__ = ["ssdlite320_mobilenet_v3_large"]
+
+model_urls = {
+ "ssdlite320_mobilenet_v3_large_coco": "https://download.pytorch.org/models/ssdlite320_mobilenet_v3_large_coco-a79551df.pth"
+}
+
+
+# Building blocks of SSDlite as described in section 6.2 of MobileNetV2 paper
+def _prediction_block(
+ in_channels: int, out_channels: int, kernel_size: int, norm_layer: Callable[..., nn.Module]
+) -> nn.Sequential:
+ return nn.Sequential(
+ # 3x3 depthwise with stride 1 and padding 1
+ ConvNormActivation(
+ in_channels,
+ in_channels,
+ kernel_size=kernel_size,
+ groups=in_channels,
+ norm_layer=norm_layer,
+ activation_layer=nn.ReLU6,
+ ),
+ # 1x1 projetion to output channels
+ nn.Conv2d(in_channels, out_channels, 1),
+ )
+
+
+def _extra_block(in_channels: int, out_channels: int, norm_layer: Callable[..., nn.Module]) -> nn.Sequential:
+ activation = nn.ReLU6
+ intermediate_channels = out_channels // 2
+ return nn.Sequential(
+ # 1x1 projection to half output channels
+ ConvNormActivation(
+ in_channels, intermediate_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=activation
+ ),
+ # 3x3 depthwise with stride 2 and padding 1
+ ConvNormActivation(
+ intermediate_channels,
+ intermediate_channels,
+ kernel_size=3,
+ stride=2,
+ groups=intermediate_channels,
+ norm_layer=norm_layer,
+ activation_layer=activation,
+ ),
+ # 1x1 projetion to output channels
+ ConvNormActivation(
+ intermediate_channels, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=activation
+ ),
+ )
+
+
+def _normal_init(conv: nn.Module):
+ for layer in conv.modules():
+ if isinstance(layer, nn.Conv2d):
+ torch.nn.init.normal_(layer.weight, mean=0.0, std=0.03)
+ if layer.bias is not None:
+ torch.nn.init.constant_(layer.bias, 0.0)
+
+
+class SSDLiteHead(nn.Module):
+ def __init__(
+ self, in_channels: List[int], num_anchors: List[int], num_classes: int, norm_layer: Callable[..., nn.Module]
+ ):
+ super().__init__()
+ self.classification_head = SSDLiteClassificationHead(in_channels, num_anchors, num_classes, norm_layer)
+ self.regression_head = SSDLiteRegressionHead(in_channels, num_anchors, norm_layer)
+
+ def forward(self, x: List[Tensor]) -> Dict[str, Tensor]:
+ return {
+ "bbox_regression": self.regression_head(x),
+ "cls_logits": self.classification_head(x),
+ }
+
+
+class SSDLiteClassificationHead(SSDScoringHead):
+ def __init__(
+ self, in_channels: List[int], num_anchors: List[int], num_classes: int, norm_layer: Callable[..., nn.Module]
+ ):
+ cls_logits = nn.ModuleList()
+ for channels, anchors in zip(in_channels, num_anchors):
+ cls_logits.append(_prediction_block(channels, num_classes * anchors, 3, norm_layer))
+ _normal_init(cls_logits)
+ super().__init__(cls_logits, num_classes)
+
+
+class SSDLiteRegressionHead(SSDScoringHead):
+ def __init__(self, in_channels: List[int], num_anchors: List[int], norm_layer: Callable[..., nn.Module]):
+ bbox_reg = nn.ModuleList()
+ for channels, anchors in zip(in_channels, num_anchors):
+ bbox_reg.append(_prediction_block(channels, 4 * anchors, 3, norm_layer))
+ _normal_init(bbox_reg)
+ super().__init__(bbox_reg, 4)
+
+
+class SSDLiteFeatureExtractorMobileNet(nn.Module):
+ def __init__(
+ self,
+ backbone: nn.Module,
+ c4_pos: int,
+ norm_layer: Callable[..., nn.Module],
+ width_mult: float = 1.0,
+ min_depth: int = 16,
+ ):
+ super().__init__()
+ _log_api_usage_once(self)
+
+ assert not backbone[c4_pos].use_res_connect
+ self.features = nn.Sequential(
+ # As described in section 6.3 of MobileNetV3 paper
+ nn.Sequential(*backbone[:c4_pos], backbone[c4_pos].block[0]), # from start until C4 expansion layer
+ nn.Sequential(backbone[c4_pos].block[1:], *backbone[c4_pos + 1 :]), # from C4 depthwise until end
+ )
+
+ get_depth = lambda d: max(min_depth, int(d * width_mult)) # noqa: E731
+ extra = nn.ModuleList(
+ [
+ _extra_block(backbone[-1].out_channels, get_depth(512), norm_layer),
+ _extra_block(get_depth(512), get_depth(256), norm_layer),
+ _extra_block(get_depth(256), get_depth(256), norm_layer),
+ _extra_block(get_depth(256), get_depth(128), norm_layer),
+ ]
+ )
+ _normal_init(extra)
+
+ self.extra = extra
+
+ def forward(self, x: Tensor) -> Dict[str, Tensor]:
+ # Get feature maps from backbone and extra. Can't be refactored due to JIT limitations.
+ output = []
+ for block in self.features:
+ x = block(x)
+ output.append(x)
+
+ for block in self.extra:
+ x = block(x)
+ output.append(x)
+
+ return OrderedDict([(str(i), v) for i, v in enumerate(output)])
+
+
+def _mobilenet_extractor(
+ backbone: Union[mobilenet.MobileNetV2, mobilenet.MobileNetV3],
+ trainable_layers: int,
+ norm_layer: Callable[..., nn.Module],
+):
+ backbone = backbone.features
+ # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks.
+ # The first and last blocks are always included because they are the C0 (conv1) and Cn.
+ stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1]
+ num_stages = len(stage_indices)
+
+ # find the index of the layer from which we wont freeze
+ assert 0 <= trainable_layers <= num_stages
+ freeze_before = len(backbone) if trainable_layers == 0 else stage_indices[num_stages - trainable_layers]
+
+ for b in backbone[:freeze_before]:
+ for parameter in b.parameters():
+ parameter.requires_grad_(False)
+
+ return SSDLiteFeatureExtractorMobileNet(backbone, stage_indices[-2], norm_layer)
+
+
+def ssdlite320_mobilenet_v3_large(
+ pretrained: bool = False,
+ progress: bool = True,
+ num_classes: int = 91,
+ pretrained_backbone: bool = False,
+ trainable_backbone_layers: Optional[int] = None,
+ norm_layer: Optional[Callable[..., nn.Module]] = None,
+ **kwargs: Any,
+):
+ """Constructs an SSDlite model with input size 320x320 and a MobileNetV3 Large backbone, as described at
+ `"Searching for MobileNetV3"
+ `_ and
+ `"MobileNetV2: Inverted Residuals and Linear Bottlenecks"
+ `_.
+
+ See :func:`~torchvision.models.detection.ssd300_vgg16` for more details.
+
+ Example:
+
+ >>> model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(pretrained=True)
+ >>> model.eval()
+ >>> x = [torch.rand(3, 320, 320), torch.rand(3, 500, 400)]
+ >>> predictions = model(x)
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on COCO train2017
+ progress (bool): If True, displays a progress bar of the download to stderr
+ num_classes (int): number of output classes of the model (including the background)
+ pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
+ trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
+ Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable.
+ norm_layer (callable, optional): Module specifying the normalization layer to use.
+ """
+ if "size" in kwargs:
+ warnings.warn("The size of the model is already fixed; ignoring the argument.")
+
+ trainable_backbone_layers = _validate_trainable_layers(
+ pretrained or pretrained_backbone, trainable_backbone_layers, 6, 6
+ )
+
+ if pretrained:
+ pretrained_backbone = False
+
+ # Enable reduced tail if no pretrained backbone is selected. See Table 6 of MobileNetV3 paper.
+ reduce_tail = not pretrained_backbone
+
+ if norm_layer is None:
+ norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.03)
+
+ backbone = mobilenet.mobilenet_v3_large(
+ pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer, reduced_tail=reduce_tail, **kwargs
+ )
+ if not pretrained_backbone:
+ # Change the default initialization scheme if not pretrained
+ _normal_init(backbone)
+ backbone = _mobilenet_extractor(
+ backbone,
+ trainable_backbone_layers,
+ norm_layer,
+ )
+
+ size = (320, 320)
+ anchor_generator = DefaultBoxGenerator([[2, 3] for _ in range(6)], min_ratio=0.2, max_ratio=0.95)
+ out_channels = det_utils.retrieve_out_channels(backbone, size)
+ num_anchors = anchor_generator.num_anchors_per_location()
+ assert len(out_channels) == len(anchor_generator.aspect_ratios)
+
+ defaults = {
+ "score_thresh": 0.001,
+ "nms_thresh": 0.55,
+ "detections_per_img": 300,
+ "topk_candidates": 300,
+ # Rescale the input in a way compatible to the backbone:
+ # The following mean/std rescale the data from [0, 1] to [-1, -1]
+ "image_mean": [0.5, 0.5, 0.5],
+ "image_std": [0.5, 0.5, 0.5],
+ }
+ kwargs = {**defaults, **kwargs}
+ model = SSD(
+ backbone,
+ anchor_generator,
+ size,
+ num_classes,
+ head=SSDLiteHead(out_channels, num_anchors, num_classes, norm_layer),
+ **kwargs,
+ )
+
+ if pretrained:
+ weights_name = "ssdlite320_mobilenet_v3_large_coco"
+ if model_urls.get(weights_name, None) is None:
+ raise ValueError(f"No checkpoint is available for model {weights_name}")
+ state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress)
+ model.load_state_dict(state_dict)
+ return model
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py
new file mode 100644
index 0000000000..af9d13414d
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py
@@ -0,0 +1,302 @@
+import math
+from typing import List, Tuple, Dict, Optional
+
+import torch
+import torchvision
+import pdb
+from torch import nn, Tensor
+
+from .image_list import ImageList
+from .roi_heads import paste_masks_in_image
+
+
+@torch.jit.unused
+def _get_shape_onnx(image: Tensor) -> Tensor:
+ from torch.onnx import operators
+
+ return operators.shape_as_tensor(image)[-2:]
+
+
+@torch.jit.unused
+def _fake_cast_onnx(v: Tensor) -> float:
+ # ONNX requires a tensor but here we fake its type for JIT.
+ return v
+
+
+def _resize_image_and_masks(
+ image: Tensor,
+ self_min_size: float,
+ self_max_size: float,
+ target: Optional[Dict[str, Tensor]] = None,
+ fixed_size: Optional[Tuple[int, int]] = None,
+) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
+ if torchvision._is_tracing():
+ im_shape = _get_shape_onnx(image)
+ else:
+ im_shape = torch.tensor(image.shape[-2:])
+
+ size: Optional[List[int]] = None
+ scale_factor: Optional[float] = None
+ recompute_scale_factor: Optional[bool] = None
+ if fixed_size is not None:
+ size = [fixed_size[1], fixed_size[0]]
+ else:
+ min_size = torch.min(im_shape).to(dtype=torch.float32)
+ max_size = torch.max(im_shape).to(dtype=torch.float32)
+ scale = torch.min(self_min_size / min_size, self_max_size / max_size)
+
+ if torchvision._is_tracing():
+ scale_factor = _fake_cast_onnx(scale)
+ else:
+ scale_factor = scale.item()
+ recompute_scale_factor = True
+
+ image = torch.nn.functional.interpolate(
+ image[None],
+ size=size,
+ scale_factor=scale_factor,
+ mode="bilinear",
+ recompute_scale_factor=recompute_scale_factor,
+ align_corners=False,
+ )[0]
+
+ if target is None:
+ return image, target
+
+ if "masks" in target:
+ mask = target["masks"]
+ mask = torch.nn.functional.interpolate(
+ mask[:, None].float(), size=size, scale_factor=scale_factor, recompute_scale_factor=recompute_scale_factor
+ )[:, 0].byte()
+ target["masks"] = mask
+ return image, target
+
+
+class GeneralizedRCNNTransform(nn.Module):
+ """
+ Performs input / target transformation before feeding the data to a GeneralizedRCNN
+ model.
+
+ The transformations it perform are:
+ - input normalization (mean subtraction and std division)
+ - input / target resizing to match min_size / max_size
+
+ It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets
+ """
+
+ def __init__(
+ self,
+ min_size: int,
+ max_size: int,
+ image_mean: List[float],
+ image_std: List[float],
+ size_divisible: int = 32,
+ fixed_size: Optional[Tuple[int, int]] = None,
+ ):
+ super().__init__()
+ if not isinstance(min_size, (list, tuple)):
+ min_size = (min_size,)
+ self.min_size = min_size
+ self.max_size = max_size
+ self.image_mean = image_mean
+ self.image_std = image_std
+ self.size_divisible = size_divisible
+ self.fixed_size = fixed_size
+
+ def forward(
+ self, images: List[Tensor], targets: Optional[List[Dict[str, Tensor]]] = None
+ ) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]:
+ images = [img for img in images]
+ if targets is not None:
+ # make a copy of targets to avoid modifying it in-place
+ # once torchscript supports dict comprehension
+ # this can be simplified as follows
+ # targets = [{k: v for k,v in t.items()} for t in targets]
+ targets_copy: List[Dict[str, Tensor]] = []
+ for t in targets:
+ data: Dict[str, Tensor] = {}
+ for k, v in t.items():
+ data[k] = v
+ targets_copy.append(data)
+ targets = targets_copy
+ for i in range(len(images)):
+ image = images[i]
+ target_index = targets[i] if targets is not None else None
+
+ if image.dim() != 3:
+ raise ValueError(f"images is expected to be a list of 3d tensors of shape [C, H, W], got {image.shape}")
+ image = self.normalize(image)
+ image, target_index = self.resize(image, target_index)
+ images[i] = image
+ if targets is not None and target_index is not None:
+ targets[i] = target_index
+ image_sizes = [img.shape[-2:] for img in images]
+ images = self.batch_images(images, size_divisible=self.size_divisible)
+ image_sizes_list: List[Tuple[int, int]] = []
+ for image_size in image_sizes:
+ assert len(image_size) == 2
+ image_sizes_list.append((image_size[0], image_size[1]))
+
+ image_list = ImageList(images, image_sizes_list)
+ return image_list, targets
+
+ def normalize(self, image: Tensor) -> Tensor:
+ if not image.is_floating_point():
+ raise TypeError(
+ f"Expected input images to be of floating type (in range [0, 1]), "
+ f"but found type {image.dtype} instead"
+ )
+ dtype, device = image.dtype, image.device
+ mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)
+ std = torch.as_tensor(self.image_std, dtype=dtype, device=device)
+ return (image - mean[:, None, None]) / std[:, None, None]
+
+ def torch_choice(self, k: List[int]) -> int:
+ """
+ Implements `random.choice` via torch ops so it can be compiled with
+ TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803
+ is fixed.
+ """
+ index = int(torch.empty(1).uniform_(0.0, float(len(k))).item())
+ return k[index]
+
+ def resize(
+ self,
+ image: Tensor,
+ target: Optional[Dict[str, Tensor]] = None,
+ ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
+ h, w = image.shape[-2:]
+ if self.training:
+ size = float(self.torch_choice(self.min_size))
+ else:
+ # FIXME assume for now that testing uses the largest scale
+ size = float(self.min_size[-1])
+ image, target = _resize_image_and_masks(image, size, float(self.max_size), target, self.fixed_size)
+
+ if target is None:
+ return image, target
+
+ bbox = target["boxes"]
+ bbox = resize_boxes(bbox, (h, w), image.shape[-2:])
+ target["boxes"] = bbox
+
+ if "keypoints" in target:
+ keypoints = target["keypoints"]
+ keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:])
+ target["keypoints"] = keypoints
+ return image, target
+
+ # _onnx_batch_images() is an implementation of
+ # batch_images() that is supported by ONNX tracing.
+ @torch.jit.unused
+ def _onnx_batch_images(self, images: List[Tensor], size_divisible: int = 32) -> Tensor:
+ max_size = []
+ for i in range(images[0].dim()):
+ max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)
+ max_size.append(max_size_i)
+ stride = size_divisible
+ max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)
+ max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)
+ max_size = tuple(max_size)
+
+ # work around for
+ # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+ # which is not yet supported in onnx
+ padded_imgs = []
+ for img in images:
+ padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
+ padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
+ padded_imgs.append(padded_img)
+
+ return torch.stack(padded_imgs)
+
+ def max_by_axis(self, the_list: List[List[int]]) -> List[int]:
+ maxes = the_list[0]
+ for sublist in the_list[1:]:
+ for index, item in enumerate(sublist):
+ maxes[index] = max(maxes[index], item)
+ return maxes
+
+ def batch_images(self, images: List[Tensor], size_divisible: int = 32) -> Tensor:
+ if torchvision._is_tracing():
+ # batch_images() does not export well to ONNX
+ # call _onnx_batch_images() instead
+ return self._onnx_batch_images(images, size_divisible)
+
+ max_size = self.max_by_axis([list(img.shape) for img in images])
+ stride = float(size_divisible)
+ max_size = list(max_size)
+ max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)
+ max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride)
+
+ batch_shape = [len(images)] + max_size
+ batched_imgs = images[0].new_full(batch_shape, 0)
+ for i in range(batched_imgs.shape[0]):
+ img = images[i]
+ batched_imgs[i, : img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+
+ return batched_imgs
+
+ def postprocess(
+ self,
+ result: List[Dict[str, Tensor]],
+ image_shapes: List[Tuple[int, int]],
+ original_image_sizes: List[Tuple[int, int]],
+ ) -> List[Dict[str, Tensor]]:
+ if self.training:
+ return result
+ for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)):
+ boxes = pred["boxes"]
+ boxes = resize_boxes(boxes, im_s, o_im_s)
+ result[i]["boxes"] = boxes
+ if "masks" in pred:
+ masks = pred["masks"]
+ masks = paste_masks_in_image(masks, boxes, o_im_s)
+ result[i]["masks"] = masks
+ if "keypoints" in pred:
+ keypoints = pred["keypoints"]
+ keypoints = resize_keypoints(keypoints, im_s, o_im_s)
+ result[i]["keypoints"] = keypoints
+ return result
+
+ def __repr__(self) -> str:
+ format_string = self.__class__.__name__ + "("
+ _indent = "\n "
+ format_string += f"{_indent}Normalize(mean={self.image_mean}, std={self.image_std})"
+ format_string += f"{_indent}Resize(min_size={self.min_size}, max_size={self.max_size}, mode='bilinear')"
+ format_string += "\n)"
+ return format_string
+
+
+def resize_keypoints(keypoints: Tensor, original_size: List[int], new_size: List[int]) -> Tensor:
+ ratios = [
+ torch.tensor(s, dtype=torch.float32, device=keypoints.device)
+ / torch.tensor(s_orig, dtype=torch.float32, device=keypoints.device)
+ for s, s_orig in zip(new_size, original_size)
+ ]
+ ratio_h, ratio_w = ratios
+ resized_data = keypoints.clone()
+ if torch._C._get_tracing_state():
+ resized_data_0 = resized_data[:, :, 0] * ratio_w
+ resized_data_1 = resized_data[:, :, 1] * ratio_h
+ resized_data = torch.stack((resized_data_0, resized_data_1, resized_data[:, :, 2]), dim=2)
+ else:
+ resized_data[..., 0] *= ratio_w
+ resized_data[..., 1] *= ratio_h
+ return resized_data
+
+
+def resize_boxes(boxes: Tensor, original_size: List[int], new_size: List[int]) -> Tensor:
+ ratios = [
+ torch.tensor(s, dtype=torch.float32, device=boxes.device)
+ / torch.tensor(s_orig, dtype=torch.float32, device=boxes.device)
+ for s, s_orig in zip(new_size, original_size)
+ ]
+ ratio_height, ratio_width = ratios
+ xmin, ymin, xmax, ymax = boxes.unbind(1)
+
+ xmin = xmin * ratio_width
+ xmax = xmax * ratio_width
+ ymin = ymin * ratio_height
+ ymax = ymax * ratio_height
+ return torch.stack((xmin, ymin, xmax, ymax), dim=1)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py
new file mode 100644
index 0000000000..ffbe2279b7
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py
@@ -0,0 +1,226 @@
+import random
+import math
+import torch
+from torch import nn, Tensor
+import torchvision
+from torch.jit.annotations import List, Tuple, Dict, Optional
+
+from torchvision.ops import misc as misc_nn_ops
+from .image_list import ImageList
+from .roi_heads import paste_masks_in_image
+
+
+class GeneralizedRCNNTransform(nn.Module):
+ """
+ Performs input / target transformation before feeding the data to a GeneralizedRCNN
+ model.
+
+ The transformations it perform are:
+ - input normalization (mean subtraction and std division)
+ - input / target resizing to match min_size / max_size
+
+ It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets
+ """
+
+ def __init__(self, min_size, max_size, image_mean, image_std):
+ super(GeneralizedRCNNTransform, self).__init__()
+ if not isinstance(min_size, (list, tuple)):
+ min_size = (min_size,)
+ self.min_size = min_size
+ self.max_size = max_size
+ self.image_mean = image_mean
+ self.image_std = image_std
+
+ def forward(self, images, targets=None):
+ # type: (List[Tensor], Optional[List[Dict[str, Tensor]]])
+ images = [img for img in images]
+ for i in range(len(images)):
+ image = images[i]
+ target_index = targets[i] if targets is not None else None
+
+ if image.dim() != 3:
+ raise ValueError("images is expected to be a list of 3d tensors "
+ "of shape [C, H, W], got {}".format(image.shape))
+ image = self.normalize(image)
+ image, target_index = self.resize(image, target_index)
+ images[i] = image
+ if targets is not None and target_index is not None:
+ targets[i] = target_index
+
+ image_sizes = [img.shape[-2:] for img in images]
+ images = self.batch_images(images)
+ image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], [])
+ for image_size in image_sizes:
+ assert len(image_size) == 2
+ image_sizes_list.append((image_size[0], image_size[1]))
+
+ image_list = ImageList(images, image_sizes_list)
+ return image_list, targets
+
+ def normalize(self, image):
+ dtype, device = image.dtype, image.device
+ mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)
+ std = torch.as_tensor(self.image_std, dtype=dtype, device=device)
+ return (image - mean[:, None, None]) / std[:, None, None]
+
+ def torch_choice(self, l):
+ # type: (List[int])
+ """
+ Implements `random.choice` via torch ops so it can be compiled with
+ TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803
+ is fixed.
+ """
+ index = int(torch.empty(1).uniform_(0., float(len(l))).item())
+ return l[index]
+
+ def resize(self, image, target):
+ # type: (Tensor, Optional[Dict[str, Tensor]])
+ h, w = image.shape[-2:]
+ im_shape = torch.tensor(image.shape[-2:])
+ min_size = float(torch.min(im_shape))
+ max_size = float(torch.max(im_shape))
+ if self.training:
+ size = float(self.torch_choice(self.min_size))
+ else:
+ # FIXME assume for now that testing uses the largest scale
+ size = float(self.min_size[-1])
+ scale_factor = size / min_size
+ if max_size * scale_factor > self.max_size:
+ scale_factor = self.max_size / max_size
+ image = torch.nn.functional.interpolate(
+ image[None], scale_factor=scale_factor, mode='bilinear',
+ align_corners=False)[0]
+
+ if target is None:
+ return image, target
+
+ bbox = target["boxes"]
+ bbox = resize_boxes(bbox, (h, w), image.shape[-2:])
+ target["boxes"] = bbox
+
+ if "masks" in target:
+ mask = target["masks"]
+ mask = misc_nn_ops.interpolate(mask[None].float(), scale_factor=scale_factor)[0].byte()
+ target["masks"] = mask
+
+ if "keypoints" in target:
+ keypoints = target["keypoints"]
+ keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:])
+ target["keypoints"] = keypoints
+ return image, target
+
+ # _onnx_batch_images() is an implementation of
+ # batch_images() that is supported by ONNX tracing.
+ @torch.jit.unused
+ def _onnx_batch_images(self, images, size_divisible=32):
+ # type: (List[Tensor], int) -> Tensor
+ max_size = []
+ for i in range(images[0].dim()):
+ max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)
+ max_size.append(max_size_i)
+ stride = size_divisible
+ max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)
+ max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)
+ max_size = tuple(max_size)
+
+ # work around for
+ # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+ # which is not yet supported in onnx
+ padded_imgs = []
+ for img in images:
+ padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
+ padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
+ padded_imgs.append(padded_img)
+
+ return torch.stack(padded_imgs)
+
+ def max_by_axis(self, the_list):
+ # type: (List[List[int]]) -> List[int]
+ maxes = the_list[0]
+ for sublist in the_list[1:]:
+ for index, item in enumerate(sublist):
+ maxes[index] = max(maxes[index], item)
+ return maxes
+
+ def batch_images(self, images, size_divisible=32):
+ # type: (List[Tensor], int)
+ if torchvision._is_tracing():
+ # batch_images() does not export well to ONNX
+ # call _onnx_batch_images() instead
+ return self._onnx_batch_images(images, size_divisible)
+
+ max_size = self.max_by_axis([list(img.shape) for img in images])
+ stride = float(size_divisible)
+ max_size = list(max_size)
+ max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)
+ max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride)
+
+ batch_shape = [len(images)] + max_size
+ batched_imgs = images[0].new_full(batch_shape, 0)
+ for img, pad_img in zip(images, batched_imgs):
+ pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+
+ return batched_imgs
+
+ def postprocess(self, result, image_shapes, original_image_sizes):
+ # type: (List[Dict[str, Tensor]], List[Tuple[int, int]], List[Tuple[int, int]])
+ if self.training:
+ return result
+ for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)):
+ boxes = pred["boxes"]
+ boxes = resize_boxes(boxes, im_s, o_im_s)
+ result[i]["boxes"] = boxes
+ if "masks" in pred:
+ masks = pred["masks"]
+ masks = paste_masks_in_image(masks, boxes, o_im_s)
+ result[i]["masks"] = masks
+ if "keypoints" in pred:
+ keypoints = pred["keypoints"]
+ keypoints = resize_keypoints(keypoints, im_s, o_im_s)
+ result[i]["keypoints"] = keypoints
+ return result
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + '('
+ _indent = '\n '
+ format_string += "{0}Normalize(mean={1}, std={2})".format(_indent, self.image_mean, self.image_std)
+ format_string += "{0}Resize(min_size={1}, max_size={2}, mode='bilinear')".format(_indent, self.min_size,
+ self.max_size)
+ format_string += '\n)'
+ return format_string
+
+
+def resize_keypoints(keypoints, original_size, new_size):
+ # type: (Tensor, List[int], List[int])
+ ratios = [
+ torch.tensor(s, dtype=torch.float32, device=keypoints.device) /
+ torch.tensor(s_orig, dtype=torch.float32, device=keypoints.device)
+ for s, s_orig in zip(new_size, original_size)
+ ]
+ ratio_h, ratio_w = ratios
+ resized_data = keypoints.clone()
+ if torch._C._get_tracing_state():
+ resized_data_0 = resized_data[:, :, 0] * ratio_w
+ resized_data_1 = resized_data[:, :, 1] * ratio_h
+ resized_data = torch.stack((resized_data_0, resized_data_1, resized_data[:, :, 2]), dim=2)
+ else:
+ resized_data[..., 0] *= ratio_w
+ resized_data[..., 1] *= ratio_h
+ return resized_data
+
+
+def resize_boxes(boxes, original_size, new_size):
+ # type: (Tensor, List[int], List[int])
+ ratios = [
+ torch.tensor(s, dtype=torch.float32, device=boxes.device) /
+ torch.tensor(s_orig, dtype=torch.float32, device=boxes.device)
+ for s, s_orig in zip(new_size, original_size)
+ ]
+ ratio_height, ratio_width = ratios
+ xmin, ymin, xmax, ymax = boxes.unbind(1)
+
+ xmin = xmin * ratio_width
+ xmax = xmax * ratio_width
+ ymin = ymin * ratio_height
+ ymax = ymax * ratio_height
+ return torch.stack((xmin, ymin, xmax, ymax), dim=1)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py
new file mode 100644
index 0000000000..4b1cb28003
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py
@@ -0,0 +1,290 @@
+import warnings
+from collections import namedtuple
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.jit.annotations import Optional, Tuple
+from torch import Tensor
+from .utils import load_state_dict_from_url
+
+__all__ = ['GoogLeNet', 'googlenet', "GoogLeNetOutputs", "_GoogLeNetOutputs"]
+
+model_urls = {
+ # GoogLeNet ported from TensorFlow
+ 'googlenet': 'https://download.pytorch.org/models/googlenet-1378be20.pth',
+}
+
+GoogLeNetOutputs = namedtuple('GoogLeNetOutputs', ['logits', 'aux_logits2', 'aux_logits1'])
+GoogLeNetOutputs.__annotations__ = {'logits': Tensor, 'aux_logits2': Optional[Tensor],
+ 'aux_logits1': Optional[Tensor]}
+
+# Script annotations failed with _GoogleNetOutputs = namedtuple ...
+# _GoogLeNetOutputs set here for backwards compat
+_GoogLeNetOutputs = GoogLeNetOutputs
+
+
+def googlenet(pretrained=False, progress=True, **kwargs):
+ r"""GoogLeNet (Inception v1) model architecture from
+ `"Going Deeper with Convolutions" `_.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ aux_logits (bool): If True, adds two auxiliary branches that can improve training.
+ Default: *False* when pretrained is True otherwise *True*
+ transform_input (bool): If True, preprocesses the input according to the method with which it
+ was trained on ImageNet. Default: *False*
+ """
+ if pretrained:
+ if 'transform_input' not in kwargs:
+ kwargs['transform_input'] = True
+ if 'aux_logits' not in kwargs:
+ kwargs['aux_logits'] = False
+ if kwargs['aux_logits']:
+ warnings.warn('auxiliary heads in the pretrained googlenet model are NOT pretrained, '
+ 'so make sure to train them')
+ original_aux_logits = kwargs['aux_logits']
+ kwargs['aux_logits'] = True
+ kwargs['init_weights'] = False
+ model = GoogLeNet(**kwargs)
+ state_dict = load_state_dict_from_url(model_urls['googlenet'],
+ progress=progress)
+ model.load_state_dict(state_dict)
+ if not original_aux_logits:
+ model.aux_logits = False
+ model.aux1 = None
+ model.aux2 = None
+ return model
+
+ return GoogLeNet(**kwargs)
+
+
+class GoogLeNet(nn.Module):
+ __constants__ = ['aux_logits', 'transform_input']
+
+ def __init__(self, num_classes=1000, aux_logits=True, transform_input=False, init_weights=True,
+ blocks=None):
+ super(GoogLeNet, self).__init__()
+ if blocks is None:
+ blocks = [BasicConv2d, Inception, InceptionAux]
+ assert len(blocks) == 3
+ conv_block = blocks[0]
+ inception_block = blocks[1]
+ inception_aux_block = blocks[2]
+
+ self.aux_logits = aux_logits
+ self.transform_input = transform_input
+
+ self.conv1 = conv_block(3, 64, kernel_size=7, stride=2, padding=3)
+ self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
+ self.conv2 = conv_block(64, 64, kernel_size=1)
+ self.conv3 = conv_block(64, 192, kernel_size=3, padding=1)
+ self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
+
+ self.inception3a = inception_block(192, 64, 96, 128, 16, 32, 32)
+ self.inception3b = inception_block(256, 128, 128, 192, 32, 96, 64)
+ self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
+
+ self.inception4a = inception_block(480, 192, 96, 208, 16, 48, 64)
+ self.inception4b = inception_block(512, 160, 112, 224, 24, 64, 64)
+ self.inception4c = inception_block(512, 128, 128, 256, 24, 64, 64)
+ self.inception4d = inception_block(512, 112, 144, 288, 32, 64, 64)
+ self.inception4e = inception_block(528, 256, 160, 320, 32, 128, 128)
+ self.maxpool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+
+ self.inception5a = inception_block(832, 256, 160, 320, 32, 128, 128)
+ self.inception5b = inception_block(832, 384, 192, 384, 48, 128, 128)
+
+ if aux_logits:
+ self.aux1 = inception_aux_block(512, num_classes)
+ self.aux2 = inception_aux_block(528, num_classes)
+ else:
+ self.aux1 = None
+ self.aux2 = None
+
+ self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+ self.dropout = nn.Dropout(0.2)
+ self.fc = nn.Linear(1024, num_classes)
+
+ if init_weights:
+ self._initialize_weights()
+
+ def _initialize_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+ import scipy.stats as stats
+ X = stats.truncnorm(-2, 2, scale=0.01)
+ values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype)
+ values = values.view(m.weight.size())
+ with torch.no_grad():
+ m.weight.copy_(values)
+ elif isinstance(m, nn.BatchNorm2d):
+ nn.init.constant_(m.weight, 1)
+ nn.init.constant_(m.bias, 0)
+
+ def _transform_input(self, x):
+ # type: (Tensor) -> Tensor
+ if self.transform_input:
+ x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
+ x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
+ x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
+ x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
+ return x
+
+ def _forward(self, x):
+ # type: (Tensor) -> Tuple[Tensor, Optional[Tensor], Optional[Tensor]]
+ # N x 3 x 224 x 224
+ x = self.conv1(x)
+ # N x 64 x 112 x 112
+ x = self.maxpool1(x)
+ # N x 64 x 56 x 56
+ x = self.conv2(x)
+ # N x 64 x 56 x 56
+ x = self.conv3(x)
+ # N x 192 x 56 x 56
+ x = self.maxpool2(x)
+
+ # N x 192 x 28 x 28
+ x = self.inception3a(x)
+ # N x 256 x 28 x 28
+ x = self.inception3b(x)
+ # N x 480 x 28 x 28
+ x = self.maxpool3(x)
+ # N x 480 x 14 x 14
+ x = self.inception4a(x)
+ # N x 512 x 14 x 14
+ aux1 = torch.jit.annotate(Optional[Tensor], None)
+ if self.aux1 is not None:
+ if self.training:
+ aux1 = self.aux1(x)
+
+ x = self.inception4b(x)
+ # N x 512 x 14 x 14
+ x = self.inception4c(x)
+ # N x 512 x 14 x 14
+ x = self.inception4d(x)
+ # N x 528 x 14 x 14
+ aux2 = torch.jit.annotate(Optional[Tensor], None)
+ if self.aux2 is not None:
+ if self.training:
+ aux2 = self.aux2(x)
+
+ x = self.inception4e(x)
+ # N x 832 x 14 x 14
+ x = self.maxpool4(x)
+ # N x 832 x 7 x 7
+ x = self.inception5a(x)
+ # N x 832 x 7 x 7
+ x = self.inception5b(x)
+ # N x 1024 x 7 x 7
+
+ x = self.avgpool(x)
+ # N x 1024 x 1 x 1
+ x = torch.flatten(x, 1)
+ # N x 1024
+ x = self.dropout(x)
+ x = self.fc(x)
+ # N x 1000 (num_classes)
+ return x, aux2, aux1
+
+ @torch.jit.unused
+ def eager_outputs(self, x, aux2, aux1):
+ # type: (Tensor, Optional[Tensor], Optional[Tensor]) -> GoogLeNetOutputs
+ if self.training and self.aux_logits:
+ return _GoogLeNetOutputs(x, aux2, aux1)
+ else:
+ return x
+
+ def forward(self, x):
+ # type: (Tensor) -> GoogLeNetOutputs
+ x = self._transform_input(x)
+ x, aux1, aux2 = self._forward(x)
+ aux_defined = self.training and self.aux_logits
+ if torch.jit.is_scripting():
+ if not aux_defined:
+ warnings.warn("Scripted GoogleNet always returns GoogleNetOutputs Tuple")
+ return GoogLeNetOutputs(x, aux2, aux1)
+ else:
+ return self.eager_outputs(x, aux2, aux1)
+
+
+class Inception(nn.Module):
+
+ def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj,
+ conv_block=None):
+ super(Inception, self).__init__()
+ if conv_block is None:
+ conv_block = BasicConv2d
+ self.branch1 = conv_block(in_channels, ch1x1, kernel_size=1)
+
+ self.branch2 = nn.Sequential(
+ conv_block(in_channels, ch3x3red, kernel_size=1),
+ conv_block(ch3x3red, ch3x3, kernel_size=3, padding=1)
+ )
+
+ self.branch3 = nn.Sequential(
+ conv_block(in_channels, ch5x5red, kernel_size=1),
+ # Here, kernel_size=3 instead of kernel_size=5 is a known bug.
+ # Please see https://github.com/pytorch/vision/issues/906 for details.
+ conv_block(ch5x5red, ch5x5, kernel_size=3, padding=1)
+ )
+
+ self.branch4 = nn.Sequential(
+ nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True),
+ conv_block(in_channels, pool_proj, kernel_size=1)
+ )
+
+ def _forward(self, x):
+ branch1 = self.branch1(x)
+ branch2 = self.branch2(x)
+ branch3 = self.branch3(x)
+ branch4 = self.branch4(x)
+
+ outputs = [branch1, branch2, branch3, branch4]
+ return outputs
+
+ def forward(self, x):
+ outputs = self._forward(x)
+ return torch.cat(outputs, 1)
+
+
+class InceptionAux(nn.Module):
+
+ def __init__(self, in_channels, num_classes, conv_block=None):
+ super(InceptionAux, self).__init__()
+ if conv_block is None:
+ conv_block = BasicConv2d
+ self.conv = conv_block(in_channels, 128, kernel_size=1)
+
+ self.fc1 = nn.Linear(2048, 1024)
+ self.fc2 = nn.Linear(1024, num_classes)
+
+ def forward(self, x):
+ # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
+ x = F.adaptive_avg_pool2d(x, (4, 4))
+ # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
+ x = self.conv(x)
+ # N x 128 x 4 x 4
+ x = torch.flatten(x, 1)
+ # N x 2048
+ x = F.relu(self.fc1(x), inplace=True)
+ # N x 1024
+ x = F.dropout(x, 0.7, training=self.training)
+ # N x 1024
+ x = self.fc2(x)
+ # N x 1000 (num_classes)
+
+ return x
+
+
+class BasicConv2d(nn.Module):
+
+ def __init__(self, in_channels, out_channels, **kwargs):
+ super(BasicConv2d, self).__init__()
+ self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
+ self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.bn(x)
+ return F.relu(x, inplace=True)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py
new file mode 100644
index 0000000000..e4c5430c31
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py
@@ -0,0 +1,432 @@
+from collections import namedtuple
+import warnings
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.jit.annotations import Optional
+from torch import Tensor
+from .utils import load_state_dict_from_url
+
+
+__all__ = ['Inception3', 'inception_v3', 'InceptionOutputs', '_InceptionOutputs']
+
+
+model_urls = {
+ # Inception v3 ported from TensorFlow
+ 'inception_v3_google': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth',
+}
+
+InceptionOutputs = namedtuple('InceptionOutputs', ['logits', 'aux_logits'])
+InceptionOutputs.__annotations__ = {'logits': torch.Tensor, 'aux_logits': Optional[torch.Tensor]}
+
+# Script annotations failed with _GoogleNetOutputs = namedtuple ...
+# _InceptionOutputs set here for backwards compat
+_InceptionOutputs = InceptionOutputs
+
+
+def inception_v3(pretrained=False, progress=True, **kwargs):
+ r"""Inception v3 model architecture from
+ `"Rethinking the Inception Architecture for Computer Vision" `_.
+
+ .. note::
+ **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
+ N x 3 x 299 x 299, so ensure your images are sized accordingly.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ aux_logits (bool): If True, add an auxiliary branch that can improve training.
+ Default: *True*
+ transform_input (bool): If True, preprocesses the input according to the method with which it
+ was trained on ImageNet. Default: *False*
+ """
+ if pretrained:
+ if 'transform_input' not in kwargs:
+ kwargs['transform_input'] = True
+ if 'aux_logits' in kwargs:
+ original_aux_logits = kwargs['aux_logits']
+ kwargs['aux_logits'] = True
+ else:
+ original_aux_logits = True
+ model = Inception3(**kwargs)
+ state_dict = load_state_dict_from_url(model_urls['inception_v3_google'],
+ progress=progress)
+ model.load_state_dict(state_dict)
+ if not original_aux_logits:
+ model.aux_logits = False
+ del model.AuxLogits
+ return model
+
+ return Inception3(**kwargs)
+
+
+class Inception3(nn.Module):
+
+ def __init__(self, num_classes=1000, aux_logits=True, transform_input=False,
+ inception_blocks=None, init_weights=True):
+ super(Inception3, self).__init__()
+ if inception_blocks is None:
+ inception_blocks = [
+ BasicConv2d, InceptionA, InceptionB, InceptionC,
+ InceptionD, InceptionE, InceptionAux
+ ]
+ assert len(inception_blocks) == 7
+ conv_block = inception_blocks[0]
+ inception_a = inception_blocks[1]
+ inception_b = inception_blocks[2]
+ inception_c = inception_blocks[3]
+ inception_d = inception_blocks[4]
+ inception_e = inception_blocks[5]
+ inception_aux = inception_blocks[6]
+
+ self.aux_logits = aux_logits
+ self.transform_input = transform_input
+ self.Conv2d_1a_3x3 = conv_block(3, 32, kernel_size=3, stride=2)
+ self.Conv2d_2a_3x3 = conv_block(32, 32, kernel_size=3)
+ self.Conv2d_2b_3x3 = conv_block(32, 64, kernel_size=3, padding=1)
+ self.Conv2d_3b_1x1 = conv_block(64, 80, kernel_size=1)
+ self.Conv2d_4a_3x3 = conv_block(80, 192, kernel_size=3)
+ self.Mixed_5b = inception_a(192, pool_features=32)
+ self.Mixed_5c = inception_a(256, pool_features=64)
+ self.Mixed_5d = inception_a(288, pool_features=64)
+ self.Mixed_6a = inception_b(288)
+ self.Mixed_6b = inception_c(768, channels_7x7=128)
+ self.Mixed_6c = inception_c(768, channels_7x7=160)
+ self.Mixed_6d = inception_c(768, channels_7x7=160)
+ self.Mixed_6e = inception_c(768, channels_7x7=192)
+ if aux_logits:
+ self.AuxLogits = inception_aux(768, num_classes)
+ self.Mixed_7a = inception_d(768)
+ self.Mixed_7b = inception_e(1280)
+ self.Mixed_7c = inception_e(2048)
+ self.fc = nn.Linear(2048, num_classes)
+ if init_weights:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+ import scipy.stats as stats
+ stddev = m.stddev if hasattr(m, 'stddev') else 0.1
+ X = stats.truncnorm(-2, 2, scale=stddev)
+ values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype)
+ values = values.view(m.weight.size())
+ with torch.no_grad():
+ m.weight.copy_(values)
+ elif isinstance(m, nn.BatchNorm2d):
+ nn.init.constant_(m.weight, 1)
+ nn.init.constant_(m.bias, 0)
+
+ def _transform_input(self, x):
+ if self.transform_input:
+ x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
+ x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
+ x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
+ x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
+ return x
+
+ def _forward(self, x):
+ # N x 3 x 299 x 299
+ x = self.Conv2d_1a_3x3(x)
+ # N x 32 x 149 x 149
+ x = self.Conv2d_2a_3x3(x)
+ # N x 32 x 147 x 147
+ x = self.Conv2d_2b_3x3(x)
+ # N x 64 x 147 x 147
+ x = F.max_pool2d(x, kernel_size=3, stride=2)
+ # N x 64 x 73 x 73
+ x = self.Conv2d_3b_1x1(x)
+ # N x 80 x 73 x 73
+ x = self.Conv2d_4a_3x3(x)
+ # N x 192 x 71 x 71
+ x = F.max_pool2d(x, kernel_size=3, stride=2)
+ # N x 192 x 35 x 35
+ x = self.Mixed_5b(x)
+ # N x 256 x 35 x 35
+ x = self.Mixed_5c(x)
+ # N x 288 x 35 x 35
+ x = self.Mixed_5d(x)
+ # N x 288 x 35 x 35
+ x = self.Mixed_6a(x)
+ # N x 768 x 17 x 17
+ x = self.Mixed_6b(x)
+ # N x 768 x 17 x 17
+ x = self.Mixed_6c(x)
+ # N x 768 x 17 x 17
+ x = self.Mixed_6d(x)
+ # N x 768 x 17 x 17
+ x = self.Mixed_6e(x)
+ # N x 768 x 17 x 17
+ aux_defined = self.training and self.aux_logits
+ if aux_defined:
+ aux = self.AuxLogits(x)
+ else:
+ aux = None
+ # N x 768 x 17 x 17
+ x = self.Mixed_7a(x)
+ # N x 1280 x 8 x 8
+ x = self.Mixed_7b(x)
+ # N x 2048 x 8 x 8
+ x = self.Mixed_7c(x)
+ # N x 2048 x 8 x 8
+ # Adaptive average pooling
+ x = F.adaptive_avg_pool2d(x, (1, 1))
+ # N x 2048 x 1 x 1
+ x = F.dropout(x, training=self.training)
+ # N x 2048 x 1 x 1
+ x = torch.flatten(x, 1)
+ # N x 2048
+ x = self.fc(x)
+ # N x 1000 (num_classes)
+ return x, aux
+
+ @torch.jit.unused
+ def eager_outputs(self, x, aux):
+ # type: (Tensor, Optional[Tensor]) -> InceptionOutputs
+ if self.training and self.aux_logits:
+ return InceptionOutputs(x, aux)
+ else:
+ return x
+
+ def forward(self, x):
+ x = self._transform_input(x)
+ x, aux = self._forward(x)
+ aux_defined = self.training and self.aux_logits
+ if torch.jit.is_scripting():
+ if not aux_defined:
+ warnings.warn("Scripted Inception3 always returns Inception3 Tuple")
+ return InceptionOutputs(x, aux)
+ else:
+ return self.eager_outputs(x, aux)
+
+
+class InceptionA(nn.Module):
+
+ def __init__(self, in_channels, pool_features, conv_block=None):
+ super(InceptionA, self).__init__()
+ if conv_block is None:
+ conv_block = BasicConv2d
+ self.branch1x1 = conv_block(in_channels, 64, kernel_size=1)
+
+ self.branch5x5_1 = conv_block(in_channels, 48, kernel_size=1)
+ self.branch5x5_2 = conv_block(48, 64, kernel_size=5, padding=2)
+
+ self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1)
+ self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1)
+ self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, padding=1)
+
+ self.branch_pool = conv_block(in_channels, pool_features, kernel_size=1)
+
+ def _forward(self, x):
+ branch1x1 = self.branch1x1(x)
+
+ branch5x5 = self.branch5x5_1(x)
+ branch5x5 = self.branch5x5_2(branch5x5)
+
+ branch3x3dbl = self.branch3x3dbl_1(x)
+ branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+ branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
+
+ branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
+ branch_pool = self.branch_pool(branch_pool)
+
+ outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
+ return outputs
+
+ def forward(self, x):
+ outputs = self._forward(x)
+ return torch.cat(outputs, 1)
+
+
+class InceptionB(nn.Module):
+
+ def __init__(self, in_channels, conv_block=None):
+ super(InceptionB, self).__init__()
+ if conv_block is None:
+ conv_block = BasicConv2d
+ self.branch3x3 = conv_block(in_channels, 384, kernel_size=3, stride=2)
+
+ self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1)
+ self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1)
+ self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, stride=2)
+
+ def _forward(self, x):
+ branch3x3 = self.branch3x3(x)
+
+ branch3x3dbl = self.branch3x3dbl_1(x)
+ branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+ branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
+
+ branch_pool = F.max_pool2d(x, kernel_size=3, stride=2)
+
+ outputs = [branch3x3, branch3x3dbl, branch_pool]
+ return outputs
+
+ def forward(self, x):
+ outputs = self._forward(x)
+ return torch.cat(outputs, 1)
+
+
+class InceptionC(nn.Module):
+
+ def __init__(self, in_channels, channels_7x7, conv_block=None):
+ super(InceptionC, self).__init__()
+ if conv_block is None:
+ conv_block = BasicConv2d
+ self.branch1x1 = conv_block(in_channels, 192, kernel_size=1)
+
+ c7 = channels_7x7
+ self.branch7x7_1 = conv_block(in_channels, c7, kernel_size=1)
+ self.branch7x7_2 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3))
+ self.branch7x7_3 = conv_block(c7, 192, kernel_size=(7, 1), padding=(3, 0))
+
+ self.branch7x7dbl_1 = conv_block(in_channels, c7, kernel_size=1)
+ self.branch7x7dbl_2 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0))
+ self.branch7x7dbl_3 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3))
+ self.branch7x7dbl_4 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0))
+ self.branch7x7dbl_5 = conv_block(c7, 192, kernel_size=(1, 7), padding=(0, 3))
+
+ self.branch_pool = conv_block(in_channels, 192, kernel_size=1)
+
+ def _forward(self, x):
+ branch1x1 = self.branch1x1(x)
+
+ branch7x7 = self.branch7x7_1(x)
+ branch7x7 = self.branch7x7_2(branch7x7)
+ branch7x7 = self.branch7x7_3(branch7x7)
+
+ branch7x7dbl = self.branch7x7dbl_1(x)
+ branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
+ branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
+ branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
+ branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
+
+ branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
+ branch_pool = self.branch_pool(branch_pool)
+
+ outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool]
+ return outputs
+
+ def forward(self, x):
+ outputs = self._forward(x)
+ return torch.cat(outputs, 1)
+
+
+class InceptionD(nn.Module):
+
+ def __init__(self, in_channels, conv_block=None):
+ super(InceptionD, self).__init__()
+ if conv_block is None:
+ conv_block = BasicConv2d
+ self.branch3x3_1 = conv_block(in_channels, 192, kernel_size=1)
+ self.branch3x3_2 = conv_block(192, 320, kernel_size=3, stride=2)
+
+ self.branch7x7x3_1 = conv_block(in_channels, 192, kernel_size=1)
+ self.branch7x7x3_2 = conv_block(192, 192, kernel_size=(1, 7), padding=(0, 3))
+ self.branch7x7x3_3 = conv_block(192, 192, kernel_size=(7, 1), padding=(3, 0))
+ self.branch7x7x3_4 = conv_block(192, 192, kernel_size=3, stride=2)
+
+ def _forward(self, x):
+ branch3x3 = self.branch3x3_1(x)
+ branch3x3 = self.branch3x3_2(branch3x3)
+
+ branch7x7x3 = self.branch7x7x3_1(x)
+ branch7x7x3 = self.branch7x7x3_2(branch7x7x3)
+ branch7x7x3 = self.branch7x7x3_3(branch7x7x3)
+ branch7x7x3 = self.branch7x7x3_4(branch7x7x3)
+
+ branch_pool = F.max_pool2d(x, kernel_size=3, stride=2)
+ outputs = [branch3x3, branch7x7x3, branch_pool]
+ return outputs
+
+ def forward(self, x):
+ outputs = self._forward(x)
+ return torch.cat(outputs, 1)
+
+
+class InceptionE(nn.Module):
+
+ def __init__(self, in_channels, conv_block=None):
+ super(InceptionE, self).__init__()
+ if conv_block is None:
+ conv_block = BasicConv2d
+ self.branch1x1 = conv_block(in_channels, 320, kernel_size=1)
+
+ self.branch3x3_1 = conv_block(in_channels, 384, kernel_size=1)
+ self.branch3x3_2a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1))
+ self.branch3x3_2b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0))
+
+ self.branch3x3dbl_1 = conv_block(in_channels, 448, kernel_size=1)
+ self.branch3x3dbl_2 = conv_block(448, 384, kernel_size=3, padding=1)
+ self.branch3x3dbl_3a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1))
+ self.branch3x3dbl_3b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0))
+
+ self.branch_pool = conv_block(in_channels, 192, kernel_size=1)
+
+ def _forward(self, x):
+ branch1x1 = self.branch1x1(x)
+
+ branch3x3 = self.branch3x3_1(x)
+ branch3x3 = [
+ self.branch3x3_2a(branch3x3),
+ self.branch3x3_2b(branch3x3),
+ ]
+ branch3x3 = torch.cat(branch3x3, 1)
+
+ branch3x3dbl = self.branch3x3dbl_1(x)
+ branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+ branch3x3dbl = [
+ self.branch3x3dbl_3a(branch3x3dbl),
+ self.branch3x3dbl_3b(branch3x3dbl),
+ ]
+ branch3x3dbl = torch.cat(branch3x3dbl, 1)
+
+ branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
+ branch_pool = self.branch_pool(branch_pool)
+
+ outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+ return outputs
+
+ def forward(self, x):
+ outputs = self._forward(x)
+ return torch.cat(outputs, 1)
+
+
+class InceptionAux(nn.Module):
+
+ def __init__(self, in_channels, num_classes, conv_block=None):
+ super(InceptionAux, self).__init__()
+ if conv_block is None:
+ conv_block = BasicConv2d
+ self.conv0 = conv_block(in_channels, 128, kernel_size=1)
+ self.conv1 = conv_block(128, 768, kernel_size=5)
+ self.conv1.stddev = 0.01
+ self.fc = nn.Linear(768, num_classes)
+ self.fc.stddev = 0.001
+
+ def forward(self, x):
+ # N x 768 x 17 x 17
+ x = F.avg_pool2d(x, kernel_size=5, stride=3)
+ # N x 768 x 5 x 5
+ x = self.conv0(x)
+ # N x 128 x 5 x 5
+ x = self.conv1(x)
+ # N x 768 x 1 x 1
+ # Adaptive average pooling
+ x = F.adaptive_avg_pool2d(x, (1, 1))
+ # N x 768 x 1 x 1
+ x = torch.flatten(x, 1)
+ # N x 768
+ x = self.fc(x)
+ # N x 1000
+ return x
+
+
+class BasicConv2d(nn.Module):
+
+ def __init__(self, in_channels, out_channels, **kwargs):
+ super(BasicConv2d, self).__init__()
+ self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
+ self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.bn(x)
+ return F.relu(x, inplace=True)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py
new file mode 100644
index 0000000000..59677427f1
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py
@@ -0,0 +1,258 @@
+import math
+import warnings
+
+import torch
+import torch.nn as nn
+from .utils import load_state_dict_from_url
+
+__all__ = ['MNASNet', 'mnasnet0_5', 'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3']
+
+_MODEL_URLS = {
+ "mnasnet0_5":
+ "https://download.pytorch.org/models/mnasnet0.5_top1_67.823-3ffadce67e.pth",
+ "mnasnet0_75": None,
+ "mnasnet1_0":
+ "https://download.pytorch.org/models/mnasnet1.0_top1_73.512-f206786ef8.pth",
+ "mnasnet1_3": None
+}
+
+# Paper suggests 0.9997 momentum, for TensorFlow. Equivalent PyTorch momentum is
+# 1.0 - tensorflow.
+_BN_MOMENTUM = 1 - 0.9997
+
+
+class _InvertedResidual(nn.Module):
+
+ def __init__(self, in_ch, out_ch, kernel_size, stride, expansion_factor,
+ bn_momentum=0.1):
+ super(_InvertedResidual, self).__init__()
+ assert stride in [1, 2]
+ assert kernel_size in [3, 5]
+ mid_ch = in_ch * expansion_factor
+ self.apply_residual = (in_ch == out_ch and stride == 1)
+ self.layers = nn.Sequential(
+ # Pointwise
+ nn.Conv2d(in_ch, mid_ch, 1, bias=False),
+ nn.BatchNorm2d(mid_ch, momentum=bn_momentum),
+ nn.ReLU(inplace=True),
+ # Depthwise
+ nn.Conv2d(mid_ch, mid_ch, kernel_size, padding=kernel_size // 2,
+ stride=stride, groups=mid_ch, bias=False),
+ nn.BatchNorm2d(mid_ch, momentum=bn_momentum),
+ nn.ReLU(inplace=True),
+ # Linear pointwise. Note that there's no activation.
+ nn.Conv2d(mid_ch, out_ch, 1, bias=False),
+ nn.BatchNorm2d(out_ch, momentum=bn_momentum))
+
+ def forward(self, input):
+ if self.apply_residual:
+ return self.layers(input) + input
+ else:
+ return self.layers(input)
+
+
+def _stack(in_ch, out_ch, kernel_size, stride, exp_factor, repeats,
+ bn_momentum):
+ """ Creates a stack of inverted residuals. """
+ assert repeats >= 1
+ # First one has no skip, because feature map size changes.
+ first = _InvertedResidual(in_ch, out_ch, kernel_size, stride, exp_factor,
+ bn_momentum=bn_momentum)
+ remaining = []
+ for _ in range(1, repeats):
+ remaining.append(
+ _InvertedResidual(out_ch, out_ch, kernel_size, 1, exp_factor,
+ bn_momentum=bn_momentum))
+ return nn.Sequential(first, *remaining)
+
+
+def _round_to_multiple_of(val, divisor, round_up_bias=0.9):
+ """ Asymmetric rounding to make `val` divisible by `divisor`. With default
+ bias, will round up, unless the number is no more than 10% greater than the
+ smaller divisible value, i.e. (83, 8) -> 80, but (84, 8) -> 88. """
+ assert 0.0 < round_up_bias < 1.0
+ new_val = max(divisor, int(val + divisor / 2) // divisor * divisor)
+ return new_val if new_val >= round_up_bias * val else new_val + divisor
+
+
+def _get_depths(alpha):
+ """ Scales tensor depths as in reference MobileNet code, prefers rouding up
+ rather than down. """
+ depths = [32, 16, 24, 40, 80, 96, 192, 320]
+ return [_round_to_multiple_of(depth * alpha, 8) for depth in depths]
+
+
+class MNASNet(torch.nn.Module):
+ """ MNASNet, as described in https://arxiv.org/pdf/1807.11626.pdf. This
+ implements the B1 variant of the model.
+ >>> model = MNASNet(1000, 1.0)
+ >>> x = torch.rand(1, 3, 224, 224)
+ >>> y = model(x)
+ >>> y.dim()
+ 1
+ >>> y.nelement()
+ 1000
+ """
+ # Version 2 adds depth scaling in the initial stages of the network.
+ _version = 2
+
+ def __init__(self, alpha, num_classes=1000, dropout=0.2):
+ super(MNASNet, self).__init__()
+ assert alpha > 0.0
+ self.alpha = alpha
+ self.num_classes = num_classes
+ depths = _get_depths(alpha)
+ layers = [
+ # First layer: regular conv.
+ nn.Conv2d(3, depths[0], 3, padding=1, stride=2, bias=False),
+ nn.BatchNorm2d(depths[0], momentum=_BN_MOMENTUM),
+ nn.ReLU(inplace=True),
+ # Depthwise separable, no skip.
+ nn.Conv2d(depths[0], depths[0], 3, padding=1, stride=1,
+ groups=depths[0], bias=False),
+ nn.BatchNorm2d(depths[0], momentum=_BN_MOMENTUM),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(depths[0], depths[1], 1, padding=0, stride=1, bias=False),
+ nn.BatchNorm2d(depths[1], momentum=_BN_MOMENTUM),
+ # MNASNet blocks: stacks of inverted residuals.
+ _stack(depths[1], depths[2], 3, 2, 3, 3, _BN_MOMENTUM),
+ _stack(depths[2], depths[3], 5, 2, 3, 3, _BN_MOMENTUM),
+ _stack(depths[3], depths[4], 5, 2, 6, 3, _BN_MOMENTUM),
+ _stack(depths[4], depths[5], 3, 1, 6, 2, _BN_MOMENTUM),
+ _stack(depths[5], depths[6], 5, 2, 6, 4, _BN_MOMENTUM),
+ _stack(depths[6], depths[7], 3, 1, 6, 1, _BN_MOMENTUM),
+ # Final mapping to classifier input.
+ nn.Conv2d(depths[7], 1280, 1, padding=0, stride=1, bias=False),
+ nn.BatchNorm2d(1280, momentum=_BN_MOMENTUM),
+ nn.ReLU(inplace=True),
+ ]
+ self.layers = nn.Sequential(*layers)
+ self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True),
+ nn.Linear(1280, num_classes))
+ self._initialize_weights()
+
+ def forward(self, x):
+ x = self.layers(x)
+ # Equivalent to global avgpool and removing H and W dimensions.
+ x = x.mean([2, 3])
+ return self.classifier(x)
+
+ def _initialize_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_normal_(m.weight, mode="fan_out",
+ nonlinearity="relu")
+ if m.bias is not None:
+ nn.init.zeros_(m.bias)
+ elif isinstance(m, nn.BatchNorm2d):
+ nn.init.ones_(m.weight)
+ nn.init.zeros_(m.bias)
+ elif isinstance(m, nn.Linear):
+ nn.init.kaiming_uniform_(m.weight, mode="fan_out",
+ nonlinearity="sigmoid")
+ nn.init.zeros_(m.bias)
+
+ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+ missing_keys, unexpected_keys, error_msgs):
+ version = local_metadata.get("version", None)
+ assert version in [1, 2]
+
+ if version == 1 and not self.alpha == 1.0:
+ # In the initial version of the model (v1), stem was fixed-size.
+ # All other layer configurations were the same. This will patch
+ # the model so that it's identical to v1. Model with alpha 1.0 is
+ # unaffected.
+ depths = _get_depths(self.alpha)
+ v1_stem = [
+ nn.Conv2d(3, 32, 3, padding=1, stride=2, bias=False),
+ nn.BatchNorm2d(32, momentum=_BN_MOMENTUM),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(32, 32, 3, padding=1, stride=1, groups=32,
+ bias=False),
+ nn.BatchNorm2d(32, momentum=_BN_MOMENTUM),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(32, 16, 1, padding=0, stride=1, bias=False),
+ nn.BatchNorm2d(16, momentum=_BN_MOMENTUM),
+ _stack(16, depths[2], 3, 2, 3, 3, _BN_MOMENTUM),
+ ]
+ for idx, layer in enumerate(v1_stem):
+ self.layers[idx] = layer
+
+ # The model is now identical to v1, and must be saved as such.
+ self._version = 1
+ warnings.warn(
+ "A new version of MNASNet model has been implemented. "
+ "Your checkpoint was saved using the previous version. "
+ "This checkpoint will load and work as before, but "
+ "you may want to upgrade by training a newer model or "
+ "transfer learning from an updated ImageNet checkpoint.",
+ UserWarning)
+
+ super(MNASNet, self)._load_from_state_dict(
+ state_dict, prefix, local_metadata, strict, missing_keys,
+ unexpected_keys, error_msgs)
+
+
+def _load_pretrained(model_name, model, progress):
+ if model_name not in _MODEL_URLS or _MODEL_URLS[model_name] is None:
+ raise ValueError(
+ "No checkpoint is available for model type {}".format(model_name))
+ checkpoint_url = _MODEL_URLS[model_name]
+ model.load_state_dict(
+ load_state_dict_from_url(checkpoint_url, progress=progress))
+
+
+def mnasnet0_5(pretrained=False, progress=True, **kwargs):
+ """MNASNet with depth multiplier of 0.5 from
+ `"MnasNet: Platform-Aware Neural Architecture Search for Mobile"
+ `_.
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ model = MNASNet(0.5, **kwargs)
+ if pretrained:
+ _load_pretrained("mnasnet0_5", model, progress)
+ return model
+
+
+def mnasnet0_75(pretrained=False, progress=True, **kwargs):
+ """MNASNet with depth multiplier of 0.75 from
+ `"MnasNet: Platform-Aware Neural Architecture Search for Mobile"
+ `_.
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ model = MNASNet(0.75, **kwargs)
+ if pretrained:
+ _load_pretrained("mnasnet0_75", model, progress)
+ return model
+
+
+def mnasnet1_0(pretrained=False, progress=True, **kwargs):
+ """MNASNet with depth multiplier of 1.0 from
+ `"MnasNet: Platform-Aware Neural Architecture Search for Mobile"
+ `_.
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ model = MNASNet(1.0, **kwargs)
+ if pretrained:
+ _load_pretrained("mnasnet1_0", model, progress)
+ return model
+
+
+def mnasnet1_3(pretrained=False, progress=True, **kwargs):
+ """MNASNet with depth multiplier of 1.3 from
+ `"MnasNet: Platform-Aware Neural Architecture Search for Mobile"
+ `_.
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ model = MNASNet(1.3, **kwargs)
+ if pretrained:
+ _load_pretrained("mnasnet1_3", model, progress)
+ return model
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py
new file mode 100644
index 0000000000..4108305d3f
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py
@@ -0,0 +1,4 @@
+from .mobilenetv2 import MobileNetV2, mobilenet_v2, __all__ as mv2_all
+from .mobilenetv3 import MobileNetV3, mobilenet_v3_large, mobilenet_v3_small, __all__ as mv3_all
+
+__all__ = mv2_all + mv3_all
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py
new file mode 100644
index 0000000000..e4c3069a60
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py
@@ -0,0 +1,177 @@
+from torch import nn
+from .utils import load_state_dict_from_url
+
+
+__all__ = ['MobileNetV2', 'mobilenet_v2']
+
+
+model_urls = {
+ 'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
+}
+
+
+def _make_divisible(v, divisor, min_value=None):
+ """
+ This function is taken from the original tf repo.
+ It ensures that all layers have a channel number that is divisible by 8
+ It can be seen here:
+ https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+ :param v:
+ :param divisor:
+ :param min_value:
+ :return:
+ """
+ if min_value is None:
+ min_value = divisor
+ new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+ # Make sure that round down does not go down by more than 10%.
+ if new_v < 0.9 * v:
+ new_v += divisor
+ return new_v
+
+
+class ConvBNReLU(nn.Sequential):
+ def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
+ padding = (kernel_size - 1) // 2
+ super(ConvBNReLU, self).__init__(
+ nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
+ nn.BatchNorm2d(out_planes),
+ nn.ReLU6(inplace=True)
+ )
+
+
+class InvertedResidual(nn.Module):
+ def __init__(self, inp, oup, stride, expand_ratio):
+ super(InvertedResidual, self).__init__()
+ self.stride = stride
+ assert stride in [1, 2]
+
+ hidden_dim = int(round(inp * expand_ratio))
+ self.use_res_connect = self.stride == 1 and inp == oup
+
+ layers = []
+ if expand_ratio != 1:
+ # pw
+ layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
+ layers.extend([
+ # dw
+ ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
+ # pw-linear
+ nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+ nn.BatchNorm2d(oup),
+ ])
+ self.conv = nn.Sequential(*layers)
+
+ def forward(self, x):
+ if self.use_res_connect:
+ return x + self.conv(x)
+ else:
+ return self.conv(x)
+
+
+class MobileNetV2(nn.Module):
+ def __init__(self,
+ num_classes=1000,
+ width_mult=1.0,
+ inverted_residual_setting=None,
+ round_nearest=8,
+ block=None):
+ """
+ MobileNet V2 main class
+
+ Args:
+ num_classes (int): Number of classes
+ width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
+ inverted_residual_setting: Network structure
+ round_nearest (int): Round the number of channels in each layer to be a multiple of this number
+ Set to 1 to turn off rounding
+ block: Module specifying inverted residual building block for mobilenet
+
+ """
+ super(MobileNetV2, self).__init__()
+
+ if block is None:
+ block = InvertedResidual
+ input_channel = 32
+ last_channel = 1280
+
+ if inverted_residual_setting is None:
+ inverted_residual_setting = [
+ # t, c, n, s
+ [1, 16, 1, 1],
+ [6, 24, 2, 2],
+ [6, 32, 3, 2],
+ [6, 64, 4, 2],
+ [6, 96, 3, 1],
+ [6, 160, 3, 2],
+ [6, 320, 1, 1],
+ ]
+
+ # only check the first element, assuming user knows t,c,n,s are required
+ if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
+ raise ValueError("inverted_residual_setting should be non-empty "
+ "or a 4-element list, got {}".format(inverted_residual_setting))
+
+ # building first layer
+ input_channel = _make_divisible(input_channel * width_mult, round_nearest)
+ self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
+ features = [ConvBNReLU(3, input_channel, stride=2)]
+ # building inverted residual blocks
+ for t, c, n, s in inverted_residual_setting:
+ output_channel = _make_divisible(c * width_mult, round_nearest)
+ for i in range(n):
+ stride = s if i == 0 else 1
+ features.append(block(input_channel, output_channel, stride, expand_ratio=t))
+ input_channel = output_channel
+ # building last several layers
+ features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
+ # make it nn.Sequential
+ self.features = nn.Sequential(*features)
+
+ # building classifier
+ self.classifier = nn.Sequential(
+ nn.Dropout(0.2),
+ nn.Linear(self.last_channel, num_classes),
+ )
+
+ # weight initialization
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_normal_(m.weight, mode='fan_out')
+ if m.bias is not None:
+ nn.init.zeros_(m.bias)
+ elif isinstance(m, nn.BatchNorm2d):
+ nn.init.ones_(m.weight)
+ nn.init.zeros_(m.bias)
+ elif isinstance(m, nn.Linear):
+ nn.init.normal_(m.weight, 0, 0.01)
+ nn.init.zeros_(m.bias)
+
+ def _forward_impl(self, x):
+ # This exists since TorchScript doesn't support inheritance, so the superclass method
+ # (this one) needs to have a name other than `forward` that can be accessed in a subclass
+ x = self.features(x)
+ # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
+ x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
+ x = self.classifier(x)
+ return x
+
+ def forward(self, x):
+ return self._forward_impl(x)
+
+
+def mobilenet_v2(pretrained=False, progress=True, **kwargs):
+ """
+ Constructs a MobileNetV2 architecture from
+ `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ model = MobileNetV2(**kwargs)
+ if pretrained:
+ state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'],
+ progress=progress)
+ model.load_state_dict(state_dict)
+ return model
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py
new file mode 100644
index 0000000000..1a470953df
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py
@@ -0,0 +1,211 @@
+import warnings
+from typing import Callable, Any, Optional, List
+
+import torch
+from torch import Tensor
+from torch import nn
+
+from .._internally_replaced_utils import load_state_dict_from_url
+from ..ops.misc import ConvNormActivation
+from ..utils import _log_api_usage_once
+from ._utils import _make_divisible
+
+
+__all__ = ["MobileNetV2", "mobilenet_v2"]
+
+
+model_urls = {
+ "mobilenet_v2": "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth",
+}
+
+
+# necessary for backwards compatibility
+class _DeprecatedConvBNAct(ConvNormActivation):
+ def __init__(self, *args, **kwargs):
+ warnings.warn(
+ "The ConvBNReLU/ConvBNActivation classes are deprecated and will be removed in future versions. "
+ "Use torchvision.ops.misc.ConvNormActivation instead.",
+ FutureWarning,
+ )
+ if kwargs.get("norm_layer", None) is None:
+ kwargs["norm_layer"] = nn.BatchNorm2d
+ if kwargs.get("activation_layer", None) is None:
+ kwargs["activation_layer"] = nn.ReLU6
+ super().__init__(*args, **kwargs)
+
+
+ConvBNReLU = _DeprecatedConvBNAct
+ConvBNActivation = _DeprecatedConvBNAct
+
+
+class InvertedResidual(nn.Module):
+ def __init__(
+ self, inp: int, oup: int, stride: int, expand_ratio: int, norm_layer: Optional[Callable[..., nn.Module]] = None
+ ) -> None:
+ super().__init__()
+ self.stride = stride
+ assert stride in [1, 2]
+
+ if norm_layer is None:
+ norm_layer = nn.BatchNorm2d
+
+ hidden_dim = int(round(inp * expand_ratio))
+ self.use_res_connect = self.stride == 1 and inp == oup
+
+ layers: List[nn.Module] = []
+ if expand_ratio != 1:
+ # pw
+ layers.append(
+ ConvNormActivation(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.ReLU6)
+ )
+ layers.extend(
+ [
+ # dw
+ ConvNormActivation(
+ hidden_dim,
+ hidden_dim,
+ stride=stride,
+ groups=hidden_dim,
+ norm_layer=norm_layer,
+ activation_layer=nn.ReLU6,
+ ),
+ # pw-linear
+ nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+ norm_layer(oup),
+ ]
+ )
+ self.conv = nn.Sequential(*layers)
+ self.out_channels = oup
+ self._is_cn = stride > 1
+
+ def forward(self, x: Tensor) -> Tensor:
+ if self.use_res_connect:
+ return x + self.conv(x)
+ else:
+ return self.conv(x)
+
+
+class MobileNetV2(nn.Module):
+ def __init__(
+ self,
+ num_classes: int = 1000,
+ width_mult: float = 1.0,
+ inverted_residual_setting: Optional[List[List[int]]] = None,
+ round_nearest: int = 8,
+ block: Optional[Callable[..., nn.Module]] = None,
+ norm_layer: Optional[Callable[..., nn.Module]] = None,
+ dropout: float = 0.2,
+ ) -> None:
+ """
+ MobileNet V2 main class
+
+ Args:
+ num_classes (int): Number of classes
+ width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
+ inverted_residual_setting: Network structure
+ round_nearest (int): Round the number of channels in each layer to be a multiple of this number
+ Set to 1 to turn off rounding
+ block: Module specifying inverted residual building block for mobilenet
+ norm_layer: Module specifying the normalization layer to use
+ dropout (float): The droupout probability
+
+ """
+ super().__init__()
+ _log_api_usage_once(self)
+
+ if block is None:
+ block = InvertedResidual
+
+ if norm_layer is None:
+ norm_layer = nn.BatchNorm2d
+
+ input_channel = 32
+ last_channel = 1280
+
+ if inverted_residual_setting is None:
+ inverted_residual_setting = [
+ # t, c, n, s
+ [1, 16, 1, 1],
+ [6, 24, 2, 2],
+ [6, 32, 3, 2],
+ [6, 64, 4, 2],
+ [6, 96, 3, 1],
+ [6, 160, 3, 2],
+ [6, 320, 1, 1],
+ ]
+
+ # only check the first element, assuming user knows t,c,n,s are required
+ if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
+ raise ValueError(
+ f"inverted_residual_setting should be non-empty or a 4-element list, got {inverted_residual_setting}"
+ )
+
+ # building first layer
+ input_channel = _make_divisible(input_channel * width_mult, round_nearest)
+ self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
+ features: List[nn.Module] = [
+ ConvNormActivation(3, input_channel, stride=2, norm_layer=norm_layer, activation_layer=nn.ReLU6)
+ ]
+ # building inverted residual blocks
+ for t, c, n, s in inverted_residual_setting:
+ output_channel = _make_divisible(c * width_mult, round_nearest)
+ for i in range(n):
+ stride = s if i == 0 else 1
+ features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))
+ input_channel = output_channel
+ # building last several layers
+ features.append(
+ ConvNormActivation(
+ input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.ReLU6
+ )
+ )
+ # make it nn.Sequential
+ self.features = nn.Sequential(*features)
+
+ # building classifier
+ self.classifier = nn.Sequential(
+ nn.Dropout(p=dropout),
+ nn.Linear(self.last_channel, num_classes),
+ )
+
+ # weight initialization
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_normal_(m.weight, mode="fan_out")
+ if m.bias is not None:
+ nn.init.zeros_(m.bias)
+ elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+ nn.init.ones_(m.weight)
+ nn.init.zeros_(m.bias)
+ elif isinstance(m, nn.Linear):
+ nn.init.normal_(m.weight, 0, 0.01)
+ nn.init.zeros_(m.bias)
+
+ def _forward_impl(self, x: Tensor) -> Tensor:
+ # This exists since TorchScript doesn't support inheritance, so the superclass method
+ # (this one) needs to have a name other than `forward` that can be accessed in a subclass
+ x = self.features(x)
+ # Cannot use "squeeze" as batch-size can be 1
+ x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
+ x = torch.flatten(x, 1)
+ x = self.classifier(x)
+ return x
+
+ def forward(self, x: Tensor) -> Tensor:
+ return self._forward_impl(x)
+
+
+def mobilenet_v2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV2:
+ """
+ Constructs a MobileNetV2 architecture from
+ `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ model = MobileNetV2(**kwargs)
+ if pretrained:
+ state_dict = load_state_dict_from_url(model_urls["mobilenet_v2"], progress=progress)
+ model.load_state_dict(state_dict)
+ return model
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py
new file mode 100644
index 0000000000..e6a2bbbfbe
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py
@@ -0,0 +1,333 @@
+import warnings
+from functools import partial
+from typing import Any, Callable, List, Optional, Sequence
+
+import torch
+from torch import nn, Tensor
+
+from .._internally_replaced_utils import load_state_dict_from_url
+from ..ops.misc import ConvNormActivation, SqueezeExcitation as SElayer
+from ..utils import _log_api_usage_once
+from ._utils import _make_divisible
+
+
+__all__ = ["MobileNetV3", "mobilenet_v3_large", "mobilenet_v3_small"]
+
+
+model_urls = {
+ "mobilenet_v3_large": "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth",
+ "mobilenet_v3_small": "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth",
+}
+
+
+class SqueezeExcitation(SElayer):
+ """DEPRECATED"""
+
+ def __init__(self, input_channels: int, squeeze_factor: int = 4):
+ squeeze_channels = _make_divisible(input_channels // squeeze_factor, 8)
+ # super().__init__(input_channels, squeeze_channels, scale_activation=nn.Hardsigmoid)
+ super().__init__(input_channels, squeeze_channels, scale_activation=nn.Sigmoid)
+
+ self.relu = self.activation
+ delattr(self, "activation")
+ warnings.warn(
+ "This SqueezeExcitation class is deprecated and will be removed in future versions. "
+ "Use torchvision.ops.misc.SqueezeExcitation instead.",
+ FutureWarning,
+ )
+
+
+class InvertedResidualConfig:
+ # Stores information listed at Tables 1 and 2 of the MobileNetV3 paper
+ def __init__(
+ self,
+ input_channels: int,
+ kernel: int,
+ expanded_channels: int,
+ out_channels: int,
+ use_se: bool,
+ activation: str,
+ stride: int,
+ dilation: int,
+ width_mult: float,
+ ):
+ self.input_channels = self.adjust_channels(input_channels, width_mult)
+ self.kernel = kernel
+ self.expanded_channels = self.adjust_channels(expanded_channels, width_mult)
+ self.out_channels = self.adjust_channels(out_channels, width_mult)
+ self.use_se = use_se
+ self.use_hs = activation == "HS"
+ self.stride = stride
+ self.dilation = dilation
+
+ @staticmethod
+ def adjust_channels(channels: int, width_mult: float):
+ return _make_divisible(channels * width_mult, 8)
+
+
+class InvertedResidual(nn.Module):
+ # Implemented as described at section 5 of MobileNetV3 paper
+ def __init__(
+ self,
+ cnf: InvertedResidualConfig,
+ norm_layer: Callable[..., nn.Module],
+ se_layer: Callable[..., nn.Module] = partial(SElayer, scale_activation=nn.Sigmoid),
+ ):
+ super().__init__()
+ if not (1 <= cnf.stride <= 2):
+ raise ValueError("illegal stride value")
+
+ self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels
+
+ layers: List[nn.Module] = []
+ # activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU
+ activation_layer = nn.ReLU6
+
+
+ # expand
+ if cnf.expanded_channels != cnf.input_channels:
+ layers.append(
+ ConvNormActivation(
+ cnf.input_channels,
+ cnf.expanded_channels,
+ kernel_size=1,
+ norm_layer=norm_layer,
+ activation_layer=activation_layer,
+ )
+ )
+
+ # depthwise
+ stride = 1 if cnf.dilation > 1 else cnf.stride
+ layers.append(
+ ConvNormActivation(
+ cnf.expanded_channels,
+ cnf.expanded_channels,
+ kernel_size=cnf.kernel,
+ stride=stride,
+ dilation=cnf.dilation,
+ groups=cnf.expanded_channels,
+ norm_layer=norm_layer,
+ activation_layer=activation_layer,
+ )
+ )
+ if cnf.use_se:
+ squeeze_channels = _make_divisible(cnf.expanded_channels // 4, 8)
+ layers.append(se_layer(cnf.expanded_channels, squeeze_channels))
+
+ # project
+ layers.append(
+ ConvNormActivation(
+ cnf.expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None
+ )
+ )
+
+ self.block = nn.Sequential(*layers)
+ self.out_channels = cnf.out_channels
+ self._is_cn = cnf.stride > 1
+
+ def forward(self, input: Tensor) -> Tensor:
+ result = self.block(input)
+ if self.use_res_connect:
+ result += input
+ return result
+
+
+class MobileNetV3(nn.Module):
+ def __init__(
+ self,
+ inverted_residual_setting: List[InvertedResidualConfig],
+ last_channel: int,
+ num_classes: int = 1000,
+ block: Optional[Callable[..., nn.Module]] = None,
+ norm_layer: Optional[Callable[..., nn.Module]] = None,
+ dropout: float = 0.2,
+ **kwargs: Any,
+ ) -> None:
+ """
+ MobileNet V3 main class
+
+ Args:
+ inverted_residual_setting (List[InvertedResidualConfig]): Network structure
+ last_channel (int): The number of channels on the penultimate layer
+ num_classes (int): Number of classes
+ block (Optional[Callable[..., nn.Module]]): Module specifying inverted residual building block for mobilenet
+ norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use
+ dropout (float): The droupout probability
+ """
+ super().__init__()
+ _log_api_usage_once(self)
+
+ if not inverted_residual_setting:
+ raise ValueError("The inverted_residual_setting should not be empty")
+ elif not (
+ isinstance(inverted_residual_setting, Sequence)
+ and all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])
+ ):
+ raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]")
+
+ if block is None:
+ block = InvertedResidual
+
+ if norm_layer is None:
+ norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01)
+
+ layers: List[nn.Module] = []
+
+ # building first layer
+ firstconv_output_channels = inverted_residual_setting[0].input_channels
+ layers.append(
+ ConvNormActivation(
+ 3,
+ firstconv_output_channels,
+ kernel_size=3,
+ stride=2,
+ norm_layer=norm_layer,
+ activation_layer=nn.ReLU6, #nn.Hardswish,
+ )
+ )
+
+ # building inverted residual blocks
+ for cnf in inverted_residual_setting:
+ layers.append(block(cnf, norm_layer))
+
+ # building last several layers
+ lastconv_input_channels = inverted_residual_setting[-1].out_channels
+ lastconv_output_channels = 6 * lastconv_input_channels
+ layers.append(
+ ConvNormActivation(
+ lastconv_input_channels,
+ lastconv_output_channels,
+ kernel_size=1,
+ norm_layer=norm_layer,
+ activation_layer=nn.ReLU6, #nn.Hardswish,
+ )
+ )
+
+ self.features = nn.Sequential(*layers)
+ self.avgpool = nn.AdaptiveAvgPool2d(1)
+ self.classifier = nn.Sequential(
+ nn.Linear(lastconv_output_channels, last_channel),
+ # nn.Hardswish(inplace=True),
+ nn.ReLU6(inplace=True),
+ nn.Dropout(p=dropout, inplace=True),
+ nn.Linear(last_channel, num_classes),
+ )
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_normal_(m.weight, mode="fan_out")
+ if m.bias is not None:
+ nn.init.zeros_(m.bias)
+ elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+ nn.init.ones_(m.weight)
+ nn.init.zeros_(m.bias)
+ elif isinstance(m, nn.Linear):
+ nn.init.normal_(m.weight, 0, 0.01)
+ nn.init.zeros_(m.bias)
+
+ def _forward_impl(self, x: Tensor) -> Tensor:
+ x = self.features(x)
+
+ x = self.avgpool(x)
+ x = torch.flatten(x, 1)
+
+ x = self.classifier(x)
+
+ return x
+
+ def forward(self, x: Tensor) -> Tensor:
+ return self._forward_impl(x)
+
+
+def _mobilenet_v3_conf(
+ arch: str, width_mult: float = 1.0, reduced_tail: bool = False, dilated: bool = False, **kwargs: Any
+):
+ reduce_divider = 2 if reduced_tail else 1
+ dilation = 2 if dilated else 1
+
+ bneck_conf = partial(InvertedResidualConfig, width_mult=width_mult)
+ adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_mult=width_mult)
+
+ if arch == "mobilenet_v3_large":
+ inverted_residual_setting = [
+ bneck_conf(16, 3, 16, 16, False, "RE", 1, 1),
+ bneck_conf(16, 3, 64, 24, False, "RE", 2, 1), # C1
+ bneck_conf(24, 3, 72, 24, False, "RE", 1, 1),
+ bneck_conf(24, 5, 72, 40, True, "RE", 2, 1), # C2
+ bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
+ bneck_conf(40, 5, 120, 40, True, "RE", 1, 1),
+ bneck_conf(40, 3, 240, 80, False, "HS", 2, 1), # C3
+ bneck_conf(80, 3, 200, 80, False, "HS", 1, 1),
+ bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
+ bneck_conf(80, 3, 184, 80, False, "HS", 1, 1),
+ bneck_conf(80, 3, 480, 112, True, "HS", 1, 1),
+ bneck_conf(112, 3, 672, 112, True, "HS", 1, 1),
+ bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2, dilation), # C4
+ bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation),
+ bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation),
+ ]
+ last_channel = adjust_channels(1280 // reduce_divider) # C5
+ elif arch == "mobilenet_v3_small":
+ inverted_residual_setting = [
+ bneck_conf(16, 3, 16, 16, True, "RE", 2, 1), # C1
+ bneck_conf(16, 3, 72, 24, False, "RE", 2, 1), # C2
+ bneck_conf(24, 3, 88, 24, False, "RE", 1, 1),
+ bneck_conf(24, 5, 96, 40, True, "HS", 2, 1), # C3
+ bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
+ bneck_conf(40, 5, 240, 40, True, "HS", 1, 1),
+ bneck_conf(40, 5, 120, 48, True, "HS", 1, 1),
+ bneck_conf(48, 5, 144, 48, True, "HS", 1, 1),
+ bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2, dilation), # C4
+ bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation),
+ bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation),
+ ]
+ last_channel = adjust_channels(1024 // reduce_divider) # C5
+ else:
+ raise ValueError(f"Unsupported model type {arch}")
+
+ return inverted_residual_setting, last_channel
+
+
+def _mobilenet_v3(
+ arch: str,
+ inverted_residual_setting: List[InvertedResidualConfig],
+ last_channel: int,
+ pretrained: bool,
+ progress: bool,
+ **kwargs: Any,
+):
+ model = MobileNetV3(inverted_residual_setting, last_channel, **kwargs)
+ if pretrained:
+ if model_urls.get(arch, None) is None:
+ raise ValueError(f"No checkpoint is available for model type {arch}")
+ state_dict = load_state_dict_from_url(model_urls[arch], progress=progress)
+ model.load_state_dict(state_dict)
+ return model
+
+
+def mobilenet_v3_large(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV3:
+ """
+ Constructs a large MobileNetV3 architecture from
+ `"Searching for MobileNetV3" `_.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ arch = "mobilenet_v3_large"
+ inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch, **kwargs)
+ return _mobilenet_v3(arch, inverted_residual_setting, last_channel, pretrained, progress, **kwargs)
+
+
+def mobilenet_v3_small(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV3:
+ """
+ Constructs a small MobileNetV3 architecture from
+ `"Searching for MobileNetV3" `_.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ arch = "mobilenet_v3_small"
+ inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch, **kwargs)
+ return _mobilenet_v3(arch, inverted_residual_setting, last_channel, pretrained, progress, **kwargs)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/__init__.py
new file mode 100644
index 0000000000..deae997a21
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/__init__.py
@@ -0,0 +1,5 @@
+from .mobilenet import *
+from .resnet import *
+from .googlenet import *
+from .inception import *
+from .shufflenetv2 import *
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py
new file mode 100644
index 0000000000..d01534bc70
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py
@@ -0,0 +1,166 @@
+import warnings
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from torch.jit.annotations import Optional
+
+from torchvision.models.utils import load_state_dict_from_url
+from torchvision.models.googlenet import (
+ GoogLeNetOutputs, BasicConv2d, Inception, InceptionAux, GoogLeNet, model_urls)
+
+from .utils import _replace_relu, quantize_model
+
+
+__all__ = ['QuantizableGoogLeNet', 'googlenet']
+
+quant_model_urls = {
+ # fp32 GoogLeNet ported from TensorFlow, with weights quantized in PyTorch
+ 'googlenet_fbgemm': 'https://download.pytorch.org/models/quantized/googlenet_fbgemm-c00238cf.pth',
+}
+
+
+def googlenet(pretrained=False, progress=True, quantize=False, **kwargs):
+ r"""GoogLeNet (Inception v1) model architecture from
+ `"Going Deeper with Convolutions" `_.
+
+ Note that quantize = True returns a quantized model with 8 bit
+ weights. Quantized models only support inference and run on CPUs.
+ GPU inference is not yet supported
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ aux_logits (bool): If True, adds two auxiliary branches that can improve training.
+ Default: *False* when pretrained is True otherwise *True*
+ transform_input (bool): If True, preprocesses the input according to the method with which it
+ was trained on ImageNet. Default: *False*
+ """
+ if pretrained:
+ if 'transform_input' not in kwargs:
+ kwargs['transform_input'] = True
+ if 'aux_logits' not in kwargs:
+ kwargs['aux_logits'] = False
+ if kwargs['aux_logits']:
+ warnings.warn('auxiliary heads in the pretrained googlenet model are NOT pretrained, '
+ 'so make sure to train them')
+ original_aux_logits = kwargs['aux_logits']
+ kwargs['aux_logits'] = True
+ kwargs['init_weights'] = False
+
+ model = QuantizableGoogLeNet(**kwargs)
+ _replace_relu(model)
+
+ if quantize:
+ # TODO use pretrained as a string to specify the backend
+ backend = 'fbgemm'
+ quantize_model(model, backend)
+ else:
+ assert pretrained in [True, False]
+
+ if pretrained:
+ if quantize:
+ model_url = quant_model_urls['googlenet' + '_' + backend]
+ else:
+ model_url = model_urls['googlenet']
+
+ state_dict = load_state_dict_from_url(model_url,
+ progress=progress)
+
+ model.load_state_dict(state_dict)
+
+ if not original_aux_logits:
+ model.aux_logits = False
+ model.aux1 = None
+ model.aux2 = None
+ return model
+
+
+class QuantizableBasicConv2d(BasicConv2d):
+
+ def __init__(self, *args, **kwargs):
+ super(QuantizableBasicConv2d, self).__init__(*args, **kwargs)
+ self.relu = nn.ReLU()
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.bn(x)
+ x = self.relu(x)
+ return x
+
+ def fuse_model(self):
+ torch.quantization.fuse_modules(self, ["conv", "bn", "relu"], inplace=True)
+
+
+class QuantizableInception(Inception):
+
+ def __init__(self, *args, **kwargs):
+ super(QuantizableInception, self).__init__(
+ conv_block=QuantizableBasicConv2d, *args, **kwargs)
+ self.cat = nn.quantized.FloatFunctional()
+
+ def forward(self, x):
+ outputs = self._forward(x)
+ return self.cat.cat(outputs, 1)
+
+
+class QuantizableInceptionAux(InceptionAux):
+
+ def __init__(self, *args, **kwargs):
+ super(QuantizableInceptionAux, self).__init__(
+ conv_block=QuantizableBasicConv2d, *args, **kwargs)
+ self.relu = nn.ReLU()
+ self.dropout = nn.Dropout(0.7)
+
+ def forward(self, x):
+ # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
+ x = F.adaptive_avg_pool2d(x, (4, 4))
+ # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
+ x = self.conv(x)
+ # N x 128 x 4 x 4
+ x = torch.flatten(x, 1)
+ # N x 2048
+ x = self.relu(self.fc1(x))
+ # N x 1024
+ x = self.dropout(x)
+ # N x 1024
+ x = self.fc2(x)
+ # N x 1000 (num_classes)
+
+ return x
+
+
+class QuantizableGoogLeNet(GoogLeNet):
+
+ def __init__(self, *args, **kwargs):
+ super(QuantizableGoogLeNet, self).__init__(
+ blocks=[QuantizableBasicConv2d, QuantizableInception, QuantizableInceptionAux],
+ *args,
+ **kwargs
+ )
+ self.quant = torch.quantization.QuantStub()
+ self.dequant = torch.quantization.DeQuantStub()
+
+ def forward(self, x):
+ x = self._transform_input(x)
+ x = self.quant(x)
+ x, aux1, aux2 = self._forward(x)
+ x = self.dequant(x)
+ aux_defined = self.training and self.aux_logits
+ if torch.jit.is_scripting():
+ if not aux_defined:
+ warnings.warn("Scripted QuantizableGoogleNet always returns GoogleNetOutputs Tuple")
+ return GoogLeNetOutputs(x, aux2, aux1)
+ else:
+ return self.eager_outputs(x, aux2, aux1)
+
+ def fuse_model(self):
+ r"""Fuse conv/bn/relu modules in googlenet model
+
+ Fuse conv+bn+relu/ conv+relu/conv+bn modules to prepare for quantization.
+ Model is modified in place. Note that this operation does not change numerics
+ and the model after modification is in floating point
+ """
+
+ for m in self.modules():
+ if type(m) == QuantizableBasicConv2d:
+ m.fuse_model()
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py
new file mode 100644
index 0000000000..f452de0281
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py
@@ -0,0 +1,222 @@
+import warnings
+from collections import namedtuple
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision.models import inception as inception_module
+from torchvision.models.inception import InceptionOutputs
+from torch.jit.annotations import Optional
+from torchvision.models.utils import load_state_dict_from_url
+from .utils import _replace_relu, quantize_model
+
+
+__all__ = [
+ "QuantizableInception3",
+ "inception_v3",
+]
+
+
+quant_model_urls = {
+ # fp32 weights ported from TensorFlow, quantized in PyTorch
+ "inception_v3_google_fbgemm":
+ "https://download.pytorch.org/models/quantized/inception_v3_google_fbgemm-71447a44.pth"
+}
+
+
+def inception_v3(pretrained=False, progress=True, quantize=False, **kwargs):
+ r"""Inception v3 model architecture from
+ `"Rethinking the Inception Architecture for Computer Vision" `_.
+
+ .. note::
+ **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
+ N x 3 x 299 x 299, so ensure your images are sized accordingly.
+
+ Note that quantize = True returns a quantized model with 8 bit
+ weights. Quantized models only support inference and run on CPUs.
+ GPU inference is not yet supported
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ aux_logits (bool): If True, add an auxiliary branch that can improve training.
+ Default: *True*
+ transform_input (bool): If True, preprocesses the input according to the method with which it
+ was trained on ImageNet. Default: *False*
+ """
+ if pretrained:
+ if "transform_input" not in kwargs:
+ kwargs["transform_input"] = True
+ if "aux_logits" in kwargs:
+ original_aux_logits = kwargs["aux_logits"]
+ kwargs["aux_logits"] = True
+ else:
+ original_aux_logits = False
+
+ model = QuantizableInception3(**kwargs)
+ _replace_relu(model)
+
+ if quantize:
+ # TODO use pretrained as a string to specify the backend
+ backend = 'fbgemm'
+ quantize_model(model, backend)
+ else:
+ assert pretrained in [True, False]
+
+ if pretrained:
+ if quantize:
+ if not original_aux_logits:
+ model.aux_logits = False
+ del model.AuxLogits
+ model_url = quant_model_urls['inception_v3_google' + '_' + backend]
+ else:
+ model_url = inception_module.model_urls['inception_v3_google']
+
+ state_dict = load_state_dict_from_url(model_url,
+ progress=progress)
+
+ model.load_state_dict(state_dict)
+
+ if not quantize:
+ if not original_aux_logits:
+ model.aux_logits = False
+ del model.AuxLogits
+ return model
+
+
+class QuantizableBasicConv2d(inception_module.BasicConv2d):
+ def __init__(self, *args, **kwargs):
+ super(QuantizableBasicConv2d, self).__init__(*args, **kwargs)
+ self.relu = nn.ReLU()
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.bn(x)
+ x = self.relu(x)
+ return x
+
+ def fuse_model(self):
+ torch.quantization.fuse_modules(self, ["conv", "bn", "relu"], inplace=True)
+
+
+class QuantizableInceptionA(inception_module.InceptionA):
+ def __init__(self, *args, **kwargs):
+ super(QuantizableInceptionA, self).__init__(conv_block=QuantizableBasicConv2d, *args, **kwargs)
+ self.myop = nn.quantized.FloatFunctional()
+
+ def forward(self, x):
+ outputs = self._forward(x)
+ return self.myop.cat(outputs, 1)
+
+
+class QuantizableInceptionB(inception_module.InceptionB):
+ def __init__(self, *args, **kwargs):
+ super(QuantizableInceptionB, self).__init__(conv_block=QuantizableBasicConv2d, *args, **kwargs)
+ self.myop = nn.quantized.FloatFunctional()
+
+ def forward(self, x):
+ outputs = self._forward(x)
+ return self.myop.cat(outputs, 1)
+
+
+class QuantizableInceptionC(inception_module.InceptionC):
+ def __init__(self, *args, **kwargs):
+ super(QuantizableInceptionC, self).__init__(conv_block=QuantizableBasicConv2d, *args, **kwargs)
+ self.myop = nn.quantized.FloatFunctional()
+
+ def forward(self, x):
+ outputs = self._forward(x)
+ return self.myop.cat(outputs, 1)
+
+
+class QuantizableInceptionD(inception_module.InceptionD):
+ def __init__(self, *args, **kwargs):
+ super(QuantizableInceptionD, self).__init__(conv_block=QuantizableBasicConv2d, *args, **kwargs)
+ self.myop = nn.quantized.FloatFunctional()
+
+ def forward(self, x):
+ outputs = self._forward(x)
+ return self.myop.cat(outputs, 1)
+
+
+class QuantizableInceptionE(inception_module.InceptionE):
+ def __init__(self, *args, **kwargs):
+ super(QuantizableInceptionE, self).__init__(conv_block=QuantizableBasicConv2d, *args, **kwargs)
+ self.myop1 = nn.quantized.FloatFunctional()
+ self.myop2 = nn.quantized.FloatFunctional()
+ self.myop3 = nn.quantized.FloatFunctional()
+
+ def _forward(self, x):
+ branch1x1 = self.branch1x1(x)
+
+ branch3x3 = self.branch3x3_1(x)
+ branch3x3 = [self.branch3x3_2a(branch3x3), self.branch3x3_2b(branch3x3)]
+ branch3x3 = self.myop1.cat(branch3x3, 1)
+
+ branch3x3dbl = self.branch3x3dbl_1(x)
+ branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+ branch3x3dbl = [
+ self.branch3x3dbl_3a(branch3x3dbl),
+ self.branch3x3dbl_3b(branch3x3dbl),
+ ]
+ branch3x3dbl = self.myop2.cat(branch3x3dbl, 1)
+
+ branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
+ branch_pool = self.branch_pool(branch_pool)
+
+ outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+ return outputs
+
+ def forward(self, x):
+ outputs = self._forward(x)
+ return self.myop3.cat(outputs, 1)
+
+
+class QuantizableInceptionAux(inception_module.InceptionAux):
+ def __init__(self, *args, **kwargs):
+ super(QuantizableInceptionAux, self).__init__(conv_block=QuantizableBasicConv2d, *args, **kwargs)
+
+
+class QuantizableInception3(inception_module.Inception3):
+ def __init__(self, num_classes=1000, aux_logits=True, transform_input=False):
+ super(QuantizableInception3, self).__init__(
+ num_classes=num_classes,
+ aux_logits=aux_logits,
+ transform_input=transform_input,
+ inception_blocks=[
+ QuantizableBasicConv2d,
+ QuantizableInceptionA,
+ QuantizableInceptionB,
+ QuantizableInceptionC,
+ QuantizableInceptionD,
+ QuantizableInceptionE,
+ QuantizableInceptionAux
+ ]
+ )
+ self.quant = torch.quantization.QuantStub()
+ self.dequant = torch.quantization.DeQuantStub()
+
+ def forward(self, x):
+ x = self._transform_input(x)
+ x = self.quant(x)
+ x, aux = self._forward(x)
+ x = self.dequant(x)
+ aux_defined = self.training and self.aux_logits
+ if torch.jit.is_scripting():
+ if not aux_defined:
+ warnings.warn("Scripted QuantizableInception3 always returns QuantizableInception3 Tuple")
+ return InceptionOutputs(x, aux)
+ else:
+ return self.eager_outputs(x, aux)
+
+ def fuse_model(self):
+ r"""Fuse conv/bn/relu modules in inception model
+
+ Fuse conv+bn+relu/ conv+relu/conv+bn modules to prepare for quantization.
+ Model is modified in place. Note that this operation does not change numerics
+ and the model after modification is in floating point
+ """
+
+ for m in self.modules():
+ if type(m) == QuantizableBasicConv2d:
+ m.fuse_model()
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py
new file mode 100644
index 0000000000..8f2c42db64
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py
@@ -0,0 +1,4 @@
+from .mobilenetv2 import QuantizableMobileNetV2, mobilenet_v2, __all__ as mv2_all
+from .mobilenetv3 import QuantizableMobileNetV3, mobilenet_v3_large, __all__ as mv3_all
+
+__all__ = mv2_all + mv3_all
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py
new file mode 100644
index 0000000000..faa63e73be
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py
@@ -0,0 +1,102 @@
+from typing import Any
+
+from torch import Tensor
+from torch import nn
+from torch.quantization import QuantStub, DeQuantStub, fuse_modules
+from torchvision.models.mobilenetv2 import InvertedResidual, MobileNetV2, model_urls
+
+from ..._internally_replaced_utils import load_state_dict_from_url
+from ...ops.misc import ConvNormActivation
+from .utils import _replace_relu, quantize_model
+
+
+__all__ = ["QuantizableMobileNetV2", "mobilenet_v2"]
+
+quant_model_urls = {
+ "mobilenet_v2_qnnpack": "https://download.pytorch.org/models/quantized/mobilenet_v2_qnnpack_37f702c5.pth"
+}
+
+
+class QuantizableInvertedResidual(InvertedResidual):
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
+ super().__init__(*args, **kwargs)
+ self.skip_add = nn.quantized.FloatFunctional()
+
+ def forward(self, x: Tensor) -> Tensor:
+ if self.use_res_connect:
+ return self.skip_add.add(x, self.conv(x))
+ else:
+ return self.conv(x)
+
+ def fuse_model(self) -> None:
+ for idx in range(len(self.conv)):
+ if type(self.conv[idx]) is nn.Conv2d:
+ fuse_modules(self.conv, [str(idx), str(idx + 1)], inplace=True)
+
+
+class QuantizableMobileNetV2(MobileNetV2):
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
+ """
+ MobileNet V2 main class
+
+ Args:
+ Inherits args from floating point MobileNetV2
+ """
+ super().__init__(*args, **kwargs)
+ self.quant = QuantStub()
+ self.dequant = DeQuantStub()
+
+ def forward(self, x: Tensor) -> Tensor:
+ x = self.quant(x)
+ x = self._forward_impl(x)
+ x = self.dequant(x)
+ return x
+
+ def fuse_model(self) -> None:
+ for m in self.modules():
+ if type(m) is ConvNormActivation:
+ fuse_modules(m, ["0", "1", "2"], inplace=True)
+ if type(m) is QuantizableInvertedResidual:
+ m.fuse_model()
+
+
+def mobilenet_v2(
+ pretrained: bool = False,
+ progress: bool = True,
+ quantize: bool = False,
+ **kwargs: Any,
+) -> QuantizableMobileNetV2:
+ """
+ Constructs a MobileNetV2 architecture from
+ `"MobileNetV2: Inverted Residuals and Linear Bottlenecks"
+ `_.
+
+ Note that quantize = True returns a quantized model with 8 bit
+ weights. Quantized models only support inference and run on CPUs.
+ GPU inference is not yet supported
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet.
+ progress (bool): If True, displays a progress bar of the download to stderr
+ quantize(bool): If True, returns a quantized model, else returns a float model
+ """
+ model = QuantizableMobileNetV2(block=QuantizableInvertedResidual, **kwargs)
+ _replace_relu(model)
+
+ if quantize:
+ # TODO use pretrained as a string to specify the backend
+ backend = "qnnpack"
+ quantize_model(model, backend)
+ else:
+ assert pretrained in [True, False]
+
+ if pretrained:
+ if quantize:
+ model_url = quant_model_urls["mobilenet_v2_" + backend]
+ else:
+ model_url = model_urls["mobilenet_v2"]
+
+ state_dict = load_state_dict_from_url(model_url, progress=progress)
+
+ model.load_state_dict(state_dict)
+ return model
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py
new file mode 100644
index 0000000000..948b72ead7
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py
@@ -0,0 +1,171 @@
+from typing import Any, List, Optional
+
+import torch
+from torch import nn, Tensor
+from torch.quantization import QuantStub, DeQuantStub, fuse_modules
+
+from ..._internally_replaced_utils import load_state_dict_from_url
+from ...ops.misc import ConvNormActivation, SqueezeExcitation
+from ..mobilenetv3 import InvertedResidual, InvertedResidualConfig, MobileNetV3, model_urls, _mobilenet_v3_conf
+from .utils import _replace_relu
+
+
+__all__ = ["QuantizableMobileNetV3", "mobilenet_v3_large"]
+
+quant_model_urls = {
+ "mobilenet_v3_large_qnnpack": "https://download.pytorch.org/models/quantized/mobilenet_v3_large_qnnpack-5bcacf28.pth",
+}
+
+
+class QuantizableSqueezeExcitation(SqueezeExcitation):
+ _version = 2
+
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
+ kwargs["scale_activation"] = nn.Hardsigmoid
+ super().__init__(*args, **kwargs)
+ self.skip_mul = nn.quantized.FloatFunctional()
+
+ def forward(self, input: Tensor) -> Tensor:
+ return self.skip_mul.mul(self._scale(input), input)
+
+ def fuse_model(self) -> None:
+ fuse_modules(self, ["fc1", "activation"], inplace=True)
+
+ def _load_from_state_dict(
+ self,
+ state_dict,
+ prefix,
+ local_metadata,
+ strict,
+ missing_keys,
+ unexpected_keys,
+ error_msgs,
+ ):
+ version = local_metadata.get("version", None)
+
+ if version is None or version < 2:
+ default_state_dict = {
+ "scale_activation.activation_post_process.scale": torch.tensor([1.0]),
+ "scale_activation.activation_post_process.zero_point": torch.tensor([0], dtype=torch.int32),
+ "scale_activation.activation_post_process.fake_quant_enabled": torch.tensor([1]),
+ "scale_activation.activation_post_process.observer_enabled": torch.tensor([1]),
+ }
+ for k, v in default_state_dict.items():
+ full_key = prefix + k
+ if full_key not in state_dict:
+ state_dict[full_key] = v
+
+ super()._load_from_state_dict(
+ state_dict,
+ prefix,
+ local_metadata,
+ strict,
+ missing_keys,
+ unexpected_keys,
+ error_msgs,
+ )
+
+
+class QuantizableInvertedResidual(InvertedResidual):
+ # TODO https://github.com/pytorch/vision/pull/4232#pullrequestreview-730461659
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
+ super().__init__(se_layer=QuantizableSqueezeExcitation, *args, **kwargs) # type: ignore[misc]
+ self.skip_add = nn.quantized.FloatFunctional()
+
+ def forward(self, x: Tensor) -> Tensor:
+ if self.use_res_connect:
+ return self.skip_add.add(x, self.block(x))
+ else:
+ return self.block(x)
+
+
+class QuantizableMobileNetV3(MobileNetV3):
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
+ """
+ MobileNet V3 main class
+
+ Args:
+ Inherits args from floating point MobileNetV3
+ """
+ super().__init__(*args, **kwargs)
+ self.quant = QuantStub()
+ self.dequant = DeQuantStub()
+
+ def forward(self, x: Tensor) -> Tensor:
+ x = self.quant(x)
+ x = self._forward_impl(x)
+ x = self.dequant(x)
+ return x
+
+ def fuse_model(self) -> None:
+ for m in self.modules():
+ if type(m) is ConvNormActivation:
+ modules_to_fuse = ["0", "1"]
+ if len(m) == 3 and type(m[2]) is nn.ReLU:
+ modules_to_fuse.append("2")
+ fuse_modules(m, modules_to_fuse, inplace=True)
+ elif type(m) is QuantizableSqueezeExcitation:
+ m.fuse_model()
+
+
+def _load_weights(arch: str, model: QuantizableMobileNetV3, model_url: Optional[str], progress: bool) -> None:
+ if model_url is None:
+ raise ValueError(f"No checkpoint is available for {arch}")
+ state_dict = load_state_dict_from_url(model_url, progress=progress)
+ model.load_state_dict(state_dict)
+
+
+def _mobilenet_v3_model(
+ arch: str,
+ inverted_residual_setting: List[InvertedResidualConfig],
+ last_channel: int,
+ pretrained: bool,
+ progress: bool,
+ quantize: bool,
+ **kwargs: Any,
+) -> QuantizableMobileNetV3:
+
+ model = QuantizableMobileNetV3(inverted_residual_setting, last_channel, block=QuantizableInvertedResidual, **kwargs)
+ _replace_relu(model)
+
+ if quantize:
+ backend = "qnnpack"
+
+ model.fuse_model()
+ model.qconfig = torch.quantization.get_default_qat_qconfig(backend)
+ torch.quantization.prepare_qat(model, inplace=True)
+
+ if pretrained:
+ _load_weights(arch, model, quant_model_urls.get(arch + "_" + backend, None), progress)
+
+ torch.quantization.convert(model, inplace=True)
+ model.eval()
+ else:
+ if pretrained:
+ _load_weights(arch, model, model_urls.get(arch, None), progress)
+
+ return model
+
+
+def mobilenet_v3_large(
+ pretrained: bool = False,
+ progress: bool = True,
+ quantize: bool = False,
+ **kwargs: Any,
+) -> QuantizableMobileNetV3:
+ """
+ Constructs a MobileNetV3 Large architecture from
+ `"Searching for MobileNetV3" `_.
+
+ Note that quantize = True returns a quantized model with 8 bit
+ weights. Quantized models only support inference and run on CPUs.
+ GPU inference is not yet supported
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet.
+ progress (bool): If True, displays a progress bar of the download to stderr
+ quantize (bool): If True, returns a quantized model, else returns a float model
+ """
+ arch = "mobilenet_v3_large"
+ inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch, **kwargs)
+ return _mobilenet_v3_model(arch, inverted_residual_setting, last_channel, pretrained, progress, quantize, **kwargs)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py
new file mode 100644
index 0000000000..5fd3c03929
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py
@@ -0,0 +1,174 @@
+import torch
+from torchvision.models.resnet import Bottleneck, BasicBlock, ResNet, model_urls
+import torch.nn as nn
+from torchvision.models.utils import load_state_dict_from_url
+from torch.quantization import QuantStub, DeQuantStub, fuse_modules
+from torch._jit_internal import Optional
+from .utils import _replace_relu, quantize_model
+
+__all__ = ['QuantizableResNet', 'resnet18', 'resnet50',
+ 'resnext101_32x8d']
+
+
+quant_model_urls = {
+ 'resnet18_fbgemm':
+ 'https://download.pytorch.org/models/quantized/resnet18_fbgemm_16fa66dd.pth',
+ 'resnet50_fbgemm':
+ 'https://download.pytorch.org/models/quantized/resnet50_fbgemm_bf931d71.pth',
+ 'resnext101_32x8d_fbgemm':
+ 'https://download.pytorch.org/models/quantized/resnext101_32x8_fbgemm_09835ccf.pth',
+}
+
+
+class QuantizableBasicBlock(BasicBlock):
+ def __init__(self, *args, **kwargs):
+ super(QuantizableBasicBlock, self).__init__(*args, **kwargs)
+ self.add_relu = torch.nn.quantized.FloatFunctional()
+
+ def forward(self, x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.bn2(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out = self.add_relu.add_relu(out, identity)
+
+ return out
+
+ def fuse_model(self):
+ torch.quantization.fuse_modules(self, [['conv1', 'bn1', 'relu'],
+ ['conv2', 'bn2']], inplace=True)
+ if self.downsample:
+ torch.quantization.fuse_modules(self.downsample, ['0', '1'], inplace=True)
+
+
+class QuantizableBottleneck(Bottleneck):
+ def __init__(self, *args, **kwargs):
+ super(QuantizableBottleneck, self).__init__(*args, **kwargs)
+ self.skip_add_relu = nn.quantized.FloatFunctional()
+ self.relu1 = nn.ReLU(inplace=False)
+ self.relu2 = nn.ReLU(inplace=False)
+
+ def forward(self, x):
+ identity = x
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu1(out)
+ out = self.conv2(out)
+ out = self.bn2(out)
+ out = self.relu2(out)
+
+ out = self.conv3(out)
+ out = self.bn3(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+ out = self.skip_add_relu.add_relu(out, identity)
+
+ return out
+
+ def fuse_model(self):
+ fuse_modules(self, [['conv1', 'bn1', 'relu1'],
+ ['conv2', 'bn2', 'relu2'],
+ ['conv3', 'bn3']], inplace=True)
+ if self.downsample:
+ torch.quantization.fuse_modules(self.downsample, ['0', '1'], inplace=True)
+
+
+class QuantizableResNet(ResNet):
+
+ def __init__(self, *args, **kwargs):
+ super(QuantizableResNet, self).__init__(*args, **kwargs)
+
+ self.quant = torch.quantization.QuantStub()
+ self.dequant = torch.quantization.DeQuantStub()
+
+ def forward(self, x):
+ x = self.quant(x)
+ # Ensure scriptability
+ # super(QuantizableResNet,self).forward(x)
+ # is not scriptable
+ x = self._forward_impl(x)
+ x = self.dequant(x)
+ return x
+
+ def fuse_model(self):
+ r"""Fuse conv/bn/relu modules in resnet models
+
+ Fuse conv+bn+relu/ Conv+relu/conv+Bn modules to prepare for quantization.
+ Model is modified in place. Note that this operation does not change numerics
+ and the model after modification is in floating point
+ """
+
+ fuse_modules(self, ['conv1', 'bn1', 'relu'], inplace=True)
+ for m in self.modules():
+ if type(m) == QuantizableBottleneck or type(m) == QuantizableBasicBlock:
+ m.fuse_model()
+
+
+def _resnet(arch, block, layers, pretrained, progress, quantize, **kwargs):
+ model = QuantizableResNet(block, layers, **kwargs)
+ _replace_relu(model)
+ if quantize:
+ # TODO use pretrained as a string to specify the backend
+ backend = 'fbgemm'
+ quantize_model(model, backend)
+ else:
+ assert pretrained in [True, False]
+
+ if pretrained:
+ if quantize:
+ model_url = quant_model_urls[arch + '_' + backend]
+ else:
+ model_url = model_urls[arch]
+
+ state_dict = load_state_dict_from_url(model_url,
+ progress=progress)
+
+ model.load_state_dict(state_dict)
+ return model
+
+
+def resnet18(pretrained=False, progress=True, quantize=False, **kwargs):
+ r"""ResNet-18 model from
+ `"Deep Residual Learning for Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _resnet('resnet18', QuantizableBasicBlock, [2, 2, 2, 2], pretrained, progress,
+ quantize, **kwargs)
+
+
+def resnet50(pretrained=False, progress=True, quantize=False, **kwargs):
+ r"""ResNet-50 model from
+ `"Deep Residual Learning for Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _resnet('resnet50', QuantizableBottleneck, [3, 4, 6, 3], pretrained, progress,
+ quantize, **kwargs)
+
+
+def resnext101_32x8d(pretrained=False, progress=True, quantize=False, **kwargs):
+ r"""ResNeXt-101 32x8d model from
+ `"Aggregated Residual Transformation for Deep Neural Networks" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ kwargs['groups'] = 32
+ kwargs['width_per_group'] = 8
+ return _resnet('resnext101_32x8d', QuantizableBottleneck, [3, 4, 23, 3],
+ pretrained, progress, quantize, **kwargs)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py
new file mode 100644
index 0000000000..a2030ca5ec
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py
@@ -0,0 +1,154 @@
+import torch
+import torch.nn as nn
+from torchvision.models.utils import load_state_dict_from_url
+import torchvision.models.shufflenetv2
+import sys
+from .utils import _replace_relu, quantize_model
+
+shufflenetv2 = sys.modules['torchvision.models.shufflenetv2']
+
+__all__ = [
+ 'QuantizableShuffleNetV2', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0',
+ 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0'
+]
+
+quant_model_urls = {
+ 'shufflenetv2_x0.5_fbgemm': None,
+ 'shufflenetv2_x1.0_fbgemm':
+ 'https://download.pytorch.org/models/quantized/shufflenetv2_x1_fbgemm-db332c57.pth',
+ 'shufflenetv2_x1.5_fbgemm': None,
+ 'shufflenetv2_x2.0_fbgemm': None,
+}
+
+
+class QuantizableInvertedResidual(shufflenetv2.InvertedResidual):
+ def __init__(self, *args, **kwargs):
+ super(QuantizableInvertedResidual, self).__init__(*args, **kwargs)
+ self.cat = nn.quantized.FloatFunctional()
+
+ def forward(self, x):
+ if self.stride == 1:
+ x1, x2 = x.chunk(2, dim=1)
+ out = self.cat.cat((x1, self.branch2(x2)), dim=1)
+ else:
+ out = self.cat.cat((self.branch1(x), self.branch2(x)), dim=1)
+
+ out = shufflenetv2.channel_shuffle(out, 2)
+
+ return out
+
+
+class QuantizableShuffleNetV2(shufflenetv2.ShuffleNetV2):
+ def __init__(self, *args, **kwargs):
+ super(QuantizableShuffleNetV2, self).__init__(*args, inverted_residual=QuantizableInvertedResidual, **kwargs)
+ self.quant = torch.quantization.QuantStub()
+ self.dequant = torch.quantization.DeQuantStub()
+
+ def forward(self, x):
+ x = self.quant(x)
+ x = self._forward_impl(x)
+ x = self.dequant(x)
+ return x
+
+ def fuse_model(self):
+ r"""Fuse conv/bn/relu modules in shufflenetv2 model
+
+ Fuse conv+bn+relu/ conv+relu/conv+bn modules to prepare for quantization.
+ Model is modified in place. Note that this operation does not change numerics
+ and the model after modification is in floating point
+ """
+
+ for name, m in self._modules.items():
+ if name in ["conv1", "conv5"]:
+ torch.quantization.fuse_modules(m, [["0", "1", "2"]], inplace=True)
+ for m in self.modules():
+ if type(m) == QuantizableInvertedResidual:
+ if len(m.branch1._modules.items()) > 0:
+ torch.quantization.fuse_modules(
+ m.branch1, [["0", "1"], ["2", "3", "4"]], inplace=True
+ )
+ torch.quantization.fuse_modules(
+ m.branch2,
+ [["0", "1", "2"], ["3", "4"], ["5", "6", "7"]],
+ inplace=True,
+ )
+
+
+def _shufflenetv2(arch, pretrained, progress, quantize, *args, **kwargs):
+ model = QuantizableShuffleNetV2(*args, **kwargs)
+ _replace_relu(model)
+
+ if quantize:
+ # TODO use pretrained as a string to specify the backend
+ backend = 'fbgemm'
+ quantize_model(model, backend)
+ else:
+ assert pretrained in [True, False]
+
+ if pretrained:
+ if quantize:
+ model_url = quant_model_urls[arch + '_' + backend]
+ else:
+ model_url = shufflenetv2.model_urls[arch]
+
+ state_dict = load_state_dict_from_url(model_url,
+ progress=progress)
+
+ model.load_state_dict(state_dict)
+ return model
+
+
+def shufflenet_v2_x0_5(pretrained=False, progress=True, quantize=False, **kwargs):
+ """
+ Constructs a ShuffleNetV2 with 0.5x output channels, as described in
+ `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
+ `_.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _shufflenetv2('shufflenetv2_x0.5', pretrained, progress, quantize,
+ [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs)
+
+
+def shufflenet_v2_x1_0(pretrained=False, progress=True, quantize=False, **kwargs):
+ """
+ Constructs a ShuffleNetV2 with 1.0x output channels, as described in
+ `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
+ `_.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _shufflenetv2('shufflenetv2_x1.0', pretrained, progress, quantize,
+ [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs)
+
+
+def shufflenet_v2_x1_5(pretrained=False, progress=True, quantize=False, **kwargs):
+ """
+ Constructs a ShuffleNetV2 with 1.5x output channels, as described in
+ `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
+ `_.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _shufflenetv2('shufflenetv2_x1.5', pretrained, progress, quantize,
+ [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs)
+
+
+def shufflenet_v2_x2_0(pretrained=False, progress=True, quantize=False, **kwargs):
+ """
+ Constructs a ShuffleNetV2 with 2.0x output channels, as described in
+ `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
+ `_.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _shufflenetv2('shufflenetv2_x2.0', pretrained, progress, quantize,
+ [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py
new file mode 100644
index 0000000000..bf23c9a933
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py
@@ -0,0 +1,40 @@
+import torch
+from torch import nn
+
+
+def _replace_relu(module):
+ reassign = {}
+ for name, mod in module.named_children():
+ _replace_relu(mod)
+ # Checking for explicit type instead of instance
+ # as we only want to replace modules of the exact type
+ # not inherited classes
+ if type(mod) == nn.ReLU or type(mod) == nn.ReLU6:
+ reassign[name] = nn.ReLU(inplace=False)
+
+ for key, value in reassign.items():
+ module._modules[key] = value
+
+
+def quantize_model(model, backend):
+ _dummy_input_data = torch.rand(1, 3, 299, 299)
+ if backend not in torch.backends.quantized.supported_engines:
+ raise RuntimeError("Quantized backend not supported ")
+ torch.backends.quantized.engine = backend
+ model.eval()
+ # Make sure that weight qconfig matches that of the serialized models
+ if backend == 'fbgemm':
+ model.qconfig = torch.quantization.QConfig(
+ activation=torch.quantization.default_observer,
+ weight=torch.quantization.default_per_channel_weight_observer)
+ elif backend == 'qnnpack':
+ model.qconfig = torch.quantization.QConfig(
+ activation=torch.quantization.default_observer,
+ weight=torch.quantization.default_weight_observer)
+
+ model.fuse_model()
+ torch.quantization.prepare(model, inplace=True)
+ model(_dummy_input_data)
+ torch.quantization.convert(model, inplace=True)
+
+ return
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py
new file mode 100644
index 0000000000..797f459f5c
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py
@@ -0,0 +1,353 @@
+import torch
+import torch.nn as nn
+from .utils import load_state_dict_from_url
+
+
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+ 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
+ 'wide_resnet50_2', 'wide_resnet101_2']
+
+
+model_urls = {
+ 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+ 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+ 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+ 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+ 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+ 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
+ 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
+ 'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
+ 'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+ """3x3 convolution with padding"""
+ return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+ padding=dilation, groups=groups, bias=False, dilation=dilation)
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+ """1x1 convolution"""
+ return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+
+
+class BasicBlock(nn.Module):
+ expansion = 1
+
+ def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+ base_width=64, dilation=1, norm_layer=None):
+ super(BasicBlock, self).__init__()
+ if norm_layer is None:
+ norm_layer = nn.BatchNorm2d
+ if groups != 1 or base_width != 64:
+ raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+ if dilation > 1:
+ raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+ # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+ self.conv1 = conv3x3(inplanes, planes, stride)
+ self.bn1 = norm_layer(planes)
+ self.relu = nn.ReLU(inplace=True)
+ self.conv2 = conv3x3(planes, planes)
+ self.bn2 = norm_layer(planes)
+ self.downsample = downsample
+ self.stride = stride
+
+ def forward(self, x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.bn2(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+ out = self.relu(out)
+
+ return out
+
+
+class Bottleneck(nn.Module):
+ # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
+ # while original implementation places the stride at the first 1x1 convolution(self.conv1)
+ # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
+ # This variant is also known as ResNet V1.5 and improves accuracy according to
+ # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
+
+ expansion = 4
+
+ def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+ base_width=64, dilation=1, norm_layer=None):
+ super(Bottleneck, self).__init__()
+ if norm_layer is None:
+ norm_layer = nn.BatchNorm2d
+ width = int(planes * (base_width / 64.)) * groups
+ # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+ self.conv1 = conv1x1(inplanes, width)
+ self.bn1 = norm_layer(width)
+ self.conv2 = conv3x3(width, width, stride, groups, dilation)
+ self.bn2 = norm_layer(width)
+ self.conv3 = conv1x1(width, planes * self.expansion)
+ self.bn3 = norm_layer(planes * self.expansion)
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+ self.stride = stride
+
+ def forward(self, x):
+ identity = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.bn2(out)
+ out = self.relu(out)
+
+ out = self.conv3(out)
+ out = self.bn3(out)
+
+ if self.downsample is not None:
+ identity = self.downsample(x)
+
+ out += identity
+ out = self.relu(out)
+
+ return out
+
+
+class ResNet(nn.Module):
+
+ def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
+ groups=1, width_per_group=64, replace_stride_with_dilation=None,
+ norm_layer=None):
+ super(ResNet, self).__init__()
+ if norm_layer is None:
+ norm_layer = nn.BatchNorm2d
+ self._norm_layer = norm_layer
+
+ self.inplanes = 64
+ self.dilation = 1
+ if replace_stride_with_dilation is None:
+ # each element in the tuple indicates if we should replace
+ # the 2x2 stride with a dilated convolution instead
+ replace_stride_with_dilation = [False, False, False]
+ if len(replace_stride_with_dilation) != 3:
+ raise ValueError("replace_stride_with_dilation should be None "
+ "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+ self.groups = groups
+ self.base_width = width_per_group
+ self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
+ bias=False)
+ self.bn1 = norm_layer(self.inplanes)
+ self.relu = nn.ReLU(inplace=True)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+ self.layer1 = self._make_layer(block, 64, layers[0])
+ self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+ dilate=replace_stride_with_dilation[0])
+ self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+ dilate=replace_stride_with_dilation[1])
+ self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+ dilate=replace_stride_with_dilation[2])
+ self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+ self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+ elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+ nn.init.constant_(m.weight, 1)
+ nn.init.constant_(m.bias, 0)
+
+ # Zero-initialize the last BN in each residual branch,
+ # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+ # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+ if zero_init_residual:
+ for m in self.modules():
+ if isinstance(m, Bottleneck):
+ nn.init.constant_(m.bn3.weight, 0)
+ elif isinstance(m, BasicBlock):
+ nn.init.constant_(m.bn2.weight, 0)
+
+ def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+ norm_layer = self._norm_layer
+ downsample = None
+ previous_dilation = self.dilation
+ if dilate:
+ self.dilation *= stride
+ stride = 1
+ if stride != 1 or self.inplanes != planes * block.expansion:
+ downsample = nn.Sequential(
+ conv1x1(self.inplanes, planes * block.expansion, stride),
+ norm_layer(planes * block.expansion),
+ )
+
+ layers = []
+ layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+ self.base_width, previous_dilation, norm_layer))
+ self.inplanes = planes * block.expansion
+ for _ in range(1, blocks):
+ layers.append(block(self.inplanes, planes, groups=self.groups,
+ base_width=self.base_width, dilation=self.dilation,
+ norm_layer=norm_layer))
+
+ return nn.Sequential(*layers)
+
+ def _forward_impl(self, x):
+ # See note [TorchScript super()]
+ x = self.conv1(x)
+ x = self.bn1(x)
+ x = self.relu(x)
+ x = self.maxpool(x)
+
+ x = self.layer1(x)
+ x = self.layer2(x)
+ x = self.layer3(x)
+ x = self.layer4(x)
+
+ x = self.avgpool(x)
+ x = torch.flatten(x, 1)
+ x = self.fc(x)
+
+ return x
+
+ def forward(self, x):
+ return self._forward_impl(x)
+
+
+def _resnet(arch, block, layers, pretrained, progress, **kwargs):
+ model = ResNet(block, layers, **kwargs)
+ if pretrained:
+ state_dict = load_state_dict_from_url(model_urls[arch],
+ progress=progress)
+ model.load_state_dict(state_dict)
+ return model
+
+
+def resnet18(pretrained=False, progress=True, **kwargs):
+ r"""ResNet-18 model from
+ `"Deep Residual Learning for Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
+ **kwargs)
+
+
+def resnet34(pretrained=False, progress=True, **kwargs):
+ r"""ResNet-34 model from
+ `"Deep Residual Learning for Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
+ **kwargs)
+
+
+def resnet50(pretrained=False, progress=True, **kwargs):
+ r"""ResNet-50 model from
+ `"Deep Residual Learning for Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
+ **kwargs)
+
+
+def resnet101(pretrained=False, progress=True, **kwargs):
+ r"""ResNet-101 model from
+ `"Deep Residual Learning for Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
+ **kwargs)
+
+
+def resnet152(pretrained=False, progress=True, **kwargs):
+ r"""ResNet-152 model from
+ `"Deep Residual Learning for Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
+ **kwargs)
+
+
+def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
+ r"""ResNeXt-50 32x4d model from
+ `"Aggregated Residual Transformation for Deep Neural Networks" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ kwargs['groups'] = 32
+ kwargs['width_per_group'] = 4
+ return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
+ pretrained, progress, **kwargs)
+
+
+def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
+ r"""ResNeXt-101 32x8d model from
+ `"Aggregated Residual Transformation for Deep Neural Networks" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ kwargs['groups'] = 32
+ kwargs['width_per_group'] = 8
+ return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
+ pretrained, progress, **kwargs)
+
+
+def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
+ r"""Wide ResNet-50-2 model from
+ `"Wide Residual Networks" `_
+
+ The model is the same as ResNet except for the bottleneck number of channels
+ which is twice larger in every block. The number of channels in outer 1x1
+ convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+ channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ kwargs['width_per_group'] = 64 * 2
+ return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
+ pretrained, progress, **kwargs)
+
+
+def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
+ r"""Wide ResNet-101-2 model from
+ `"Wide Residual Networks" `_
+
+ The model is the same as ResNet except for the bottleneck number of channels
+ which is twice larger in every block. The number of channels in outer 1x1
+ convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+ channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ kwargs['width_per_group'] = 64 * 2
+ return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
+ pretrained, progress, **kwargs)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/__init__.py
new file mode 100644
index 0000000000..43c80c355a
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/__init__.py
@@ -0,0 +1,3 @@
+from .segmentation import *
+from .fcn import *
+from .deeplabv3 import *
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py
new file mode 100644
index 0000000000..c5a7ae99e4
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py
@@ -0,0 +1,34 @@
+from collections import OrderedDict
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+
+class _SimpleSegmentationModel(nn.Module):
+ __constants__ = ['aux_classifier']
+
+ def __init__(self, backbone, classifier, aux_classifier=None):
+ super(_SimpleSegmentationModel, self).__init__()
+ self.backbone = backbone
+ self.classifier = classifier
+ self.aux_classifier = aux_classifier
+
+ def forward(self, x):
+ input_shape = x.shape[-2:]
+ # contract: features is a dict of tensors
+ features = self.backbone(x)
+
+ result = OrderedDict()
+ x = features["out"]
+ x = self.classifier(x)
+ x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)
+ result["out"] = x
+
+ if self.aux_classifier is not None:
+ x = features["aux"]
+ x = self.aux_classifier(x)
+ x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False)
+ result["aux"] = x
+
+ return result
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py
new file mode 100644
index 0000000000..ae652cd7d2
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py
@@ -0,0 +1,94 @@
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from ._utils import _SimpleSegmentationModel
+
+
+__all__ = ["DeepLabV3"]
+
+
+class DeepLabV3(_SimpleSegmentationModel):
+ """
+ Implements DeepLabV3 model from
+ `"Rethinking Atrous Convolution for Semantic Image Segmentation"
+ `_.
+
+ Arguments:
+ backbone (nn.Module): the network used to compute the features for the model.
+ The backbone should return an OrderedDict[Tensor], with the key being
+ "out" for the last feature map used, and "aux" if an auxiliary classifier
+ is used.
+ classifier (nn.Module): module that takes the "out" element returned from
+ the backbone and returns a dense prediction.
+ aux_classifier (nn.Module, optional): auxiliary classifier used during training
+ """
+ pass
+
+
+class DeepLabHead(nn.Sequential):
+ def __init__(self, in_channels, num_classes):
+ super(DeepLabHead, self).__init__(
+ ASPP(in_channels, [12, 24, 36]),
+ nn.Conv2d(256, 256, 3, padding=1, bias=False),
+ nn.BatchNorm2d(256),
+ nn.ReLU(),
+ nn.Conv2d(256, num_classes, 1)
+ )
+
+
+class ASPPConv(nn.Sequential):
+ def __init__(self, in_channels, out_channels, dilation):
+ modules = [
+ nn.Conv2d(in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False),
+ nn.BatchNorm2d(out_channels),
+ nn.ReLU()
+ ]
+ super(ASPPConv, self).__init__(*modules)
+
+
+class ASPPPooling(nn.Sequential):
+ def __init__(self, in_channels, out_channels):
+ super(ASPPPooling, self).__init__(
+ nn.AdaptiveAvgPool2d(1),
+ nn.Conv2d(in_channels, out_channels, 1, bias=False),
+ nn.BatchNorm2d(out_channels),
+ nn.ReLU())
+
+ def forward(self, x):
+ size = x.shape[-2:]
+ for mod in self:
+ x = mod(x)
+ return F.interpolate(x, size=size, mode='bilinear', align_corners=False)
+
+
+class ASPP(nn.Module):
+ def __init__(self, in_channels, atrous_rates):
+ super(ASPP, self).__init__()
+ out_channels = 256
+ modules = []
+ modules.append(nn.Sequential(
+ nn.Conv2d(in_channels, out_channels, 1, bias=False),
+ nn.BatchNorm2d(out_channels),
+ nn.ReLU()))
+
+ rate1, rate2, rate3 = tuple(atrous_rates)
+ modules.append(ASPPConv(in_channels, out_channels, rate1))
+ modules.append(ASPPConv(in_channels, out_channels, rate2))
+ modules.append(ASPPConv(in_channels, out_channels, rate3))
+ modules.append(ASPPPooling(in_channels, out_channels))
+
+ self.convs = nn.ModuleList(modules)
+
+ self.project = nn.Sequential(
+ nn.Conv2d(5 * out_channels, out_channels, 1, bias=False),
+ nn.BatchNorm2d(out_channels),
+ nn.ReLU(),
+ nn.Dropout(0.5))
+
+ def forward(self, x):
+ res = []
+ for conv in self.convs:
+ res.append(conv(x))
+ res = torch.cat(res, dim=1)
+ return self.project(res)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py
new file mode 100644
index 0000000000..4d7701cc4e
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py
@@ -0,0 +1,36 @@
+from torch import nn
+
+from ._utils import _SimpleSegmentationModel
+
+
+__all__ = ["FCN"]
+
+
+class FCN(_SimpleSegmentationModel):
+ """
+ Implements a Fully-Convolutional Network for semantic segmentation.
+
+ Arguments:
+ backbone (nn.Module): the network used to compute the features for the model.
+ The backbone should return an OrderedDict[Tensor], with the key being
+ "out" for the last feature map used, and "aux" if an auxiliary classifier
+ is used.
+ classifier (nn.Module): module that takes the "out" element returned from
+ the backbone and returns a dense prediction.
+ aux_classifier (nn.Module, optional): auxiliary classifier used during training
+ """
+ pass
+
+
+class FCNHead(nn.Sequential):
+ def __init__(self, in_channels, channels):
+ inter_channels = in_channels // 4
+ layers = [
+ nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False),
+ nn.BatchNorm2d(inter_channels),
+ nn.ReLU(),
+ nn.Dropout(0.1),
+ nn.Conv2d(inter_channels, channels, 1)
+ ]
+
+ super(FCNHead, self).__init__(*layers)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py
new file mode 100644
index 0000000000..15df4d8ae3
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py
@@ -0,0 +1,106 @@
+from .._utils import IntermediateLayerGetter
+from ..utils import load_state_dict_from_url
+from .. import resnet
+from .deeplabv3 import DeepLabHead, DeepLabV3
+from .fcn import FCN, FCNHead
+
+
+__all__ = ['fcn_resnet50', 'fcn_resnet101', 'deeplabv3_resnet50', 'deeplabv3_resnet101']
+
+
+model_urls = {
+ 'fcn_resnet50_coco': None,
+ 'fcn_resnet101_coco': 'https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth',
+ 'deeplabv3_resnet50_coco': None,
+ 'deeplabv3_resnet101_coco': 'https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth',
+}
+
+
+def _segm_resnet(name, backbone_name, num_classes, aux, pretrained_backbone=True):
+ backbone = resnet.__dict__[backbone_name](
+ pretrained=pretrained_backbone,
+ replace_stride_with_dilation=[False, True, True])
+
+ return_layers = {'layer4': 'out'}
+ if aux:
+ return_layers['layer3'] = 'aux'
+ backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)
+
+ aux_classifier = None
+ if aux:
+ inplanes = 1024
+ aux_classifier = FCNHead(inplanes, num_classes)
+
+ model_map = {
+ 'deeplabv3': (DeepLabHead, DeepLabV3),
+ 'fcn': (FCNHead, FCN),
+ }
+ inplanes = 2048
+ classifier = model_map[name][0](inplanes, num_classes)
+ base_model = model_map[name][1]
+
+ model = base_model(backbone, classifier, aux_classifier)
+ return model
+
+
+def _load_model(arch_type, backbone, pretrained, progress, num_classes, aux_loss, **kwargs):
+ if pretrained:
+ aux_loss = True
+ model = _segm_resnet(arch_type, backbone, num_classes, aux_loss, **kwargs)
+ if pretrained:
+ arch = arch_type + '_' + backbone + '_coco'
+ model_url = model_urls[arch]
+ if model_url is None:
+ raise NotImplementedError('pretrained {} is not supported as of now'.format(arch))
+ else:
+ state_dict = load_state_dict_from_url(model_url, progress=progress)
+ model.load_state_dict(state_dict)
+ return model
+
+
+def fcn_resnet50(pretrained=False, progress=True,
+ num_classes=21, aux_loss=None, **kwargs):
+ """Constructs a Fully-Convolutional Network model with a ResNet-50 backbone.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on COCO train2017 which
+ contains the same classes as Pascal VOC
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _load_model('fcn', 'resnet50', pretrained, progress, num_classes, aux_loss, **kwargs)
+
+
+def fcn_resnet101(pretrained=False, progress=True,
+ num_classes=21, aux_loss=None, **kwargs):
+ """Constructs a Fully-Convolutional Network model with a ResNet-101 backbone.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on COCO train2017 which
+ contains the same classes as Pascal VOC
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _load_model('fcn', 'resnet101', pretrained, progress, num_classes, aux_loss, **kwargs)
+
+
+def deeplabv3_resnet50(pretrained=False, progress=True,
+ num_classes=21, aux_loss=None, **kwargs):
+ """Constructs a DeepLabV3 model with a ResNet-50 backbone.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on COCO train2017 which
+ contains the same classes as Pascal VOC
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _load_model('deeplabv3', 'resnet50', pretrained, progress, num_classes, aux_loss, **kwargs)
+
+
+def deeplabv3_resnet101(pretrained=False, progress=True,
+ num_classes=21, aux_loss=None, **kwargs):
+ """Constructs a DeepLabV3 model with a ResNet-101 backbone.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on COCO train2017 which
+ contains the same classes as Pascal VOC
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _load_model('deeplabv3', 'resnet101', pretrained, progress, num_classes, aux_loss, **kwargs)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py
new file mode 100644
index 0000000000..14f9521886
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py
@@ -0,0 +1,208 @@
+import torch
+import torch.nn as nn
+from .utils import load_state_dict_from_url
+
+
+__all__ = [
+ 'ShuffleNetV2', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0',
+ 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0'
+]
+
+model_urls = {
+ 'shufflenetv2_x0.5': 'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth',
+ 'shufflenetv2_x1.0': 'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth',
+ 'shufflenetv2_x1.5': None,
+ 'shufflenetv2_x2.0': None,
+}
+
+
+def channel_shuffle(x, groups):
+ # type: (torch.Tensor, int) -> torch.Tensor
+ batchsize, num_channels, height, width = x.data.size()
+ channels_per_group = num_channels // groups
+
+ # reshape
+ x = x.view(batchsize, groups,
+ channels_per_group, height, width)
+
+ x = torch.transpose(x, 1, 2).contiguous()
+
+ # flatten
+ x = x.view(batchsize, -1, height, width)
+
+ return x
+
+
+class InvertedResidual(nn.Module):
+ def __init__(self, inp, oup, stride):
+ super(InvertedResidual, self).__init__()
+
+ if not (1 <= stride <= 3):
+ raise ValueError('illegal stride value')
+ self.stride = stride
+
+ branch_features = oup // 2
+ assert (self.stride != 1) or (inp == branch_features << 1)
+
+ if self.stride > 1:
+ self.branch1 = nn.Sequential(
+ self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1),
+ nn.BatchNorm2d(inp),
+ nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
+ nn.BatchNorm2d(branch_features),
+ nn.ReLU(inplace=True),
+ )
+ else:
+ self.branch1 = nn.Sequential()
+
+ self.branch2 = nn.Sequential(
+ nn.Conv2d(inp if (self.stride > 1) else branch_features,
+ branch_features, kernel_size=1, stride=1, padding=0, bias=False),
+ nn.BatchNorm2d(branch_features),
+ nn.ReLU(inplace=True),
+ self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1),
+ nn.BatchNorm2d(branch_features),
+ nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
+ nn.BatchNorm2d(branch_features),
+ nn.ReLU(inplace=True),
+ )
+
+ @staticmethod
+ def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
+ return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
+
+ def forward(self, x):
+ if self.stride == 1:
+ x1, x2 = x.chunk(2, dim=1)
+ out = torch.cat((x1, self.branch2(x2)), dim=1)
+ else:
+ out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
+
+ out = channel_shuffle(out, 2)
+
+ return out
+
+
+class ShuffleNetV2(nn.Module):
+ def __init__(self, stages_repeats, stages_out_channels, num_classes=1000, inverted_residual=InvertedResidual):
+ super(ShuffleNetV2, self).__init__()
+
+ if len(stages_repeats) != 3:
+ raise ValueError('expected stages_repeats as list of 3 positive ints')
+ if len(stages_out_channels) != 5:
+ raise ValueError('expected stages_out_channels as list of 5 positive ints')
+ self._stage_out_channels = stages_out_channels
+
+ input_channels = 3
+ output_channels = self._stage_out_channels[0]
+ self.conv1 = nn.Sequential(
+ nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False),
+ nn.BatchNorm2d(output_channels),
+ nn.ReLU(inplace=True),
+ )
+ input_channels = output_channels
+
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ stage_names = ['stage{}'.format(i) for i in [2, 3, 4]]
+ for name, repeats, output_channels in zip(
+ stage_names, stages_repeats, self._stage_out_channels[1:]):
+ seq = [inverted_residual(input_channels, output_channels, 2)]
+ for i in range(repeats - 1):
+ seq.append(inverted_residual(output_channels, output_channels, 1))
+ setattr(self, name, nn.Sequential(*seq))
+ input_channels = output_channels
+
+ output_channels = self._stage_out_channels[-1]
+ self.conv5 = nn.Sequential(
+ nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False),
+ nn.BatchNorm2d(output_channels),
+ nn.ReLU(inplace=True),
+ )
+
+ self.fc = nn.Linear(output_channels, num_classes)
+
+ def _forward_impl(self, x):
+ # See note [TorchScript super()]
+ x = self.conv1(x)
+ x = self.maxpool(x)
+ x = self.stage2(x)
+ x = self.stage3(x)
+ x = self.stage4(x)
+ x = self.conv5(x)
+ x = x.mean([2, 3]) # globalpool
+ x = self.fc(x)
+ return x
+
+ def forward(self, x):
+ return self._forward_impl(x)
+
+
+def _shufflenetv2(arch, pretrained, progress, *args, **kwargs):
+ model = ShuffleNetV2(*args, **kwargs)
+
+ if pretrained:
+ model_url = model_urls[arch]
+ if model_url is None:
+ raise NotImplementedError('pretrained {} is not supported as of now'.format(arch))
+ else:
+ state_dict = load_state_dict_from_url(model_url, progress=progress)
+ model.load_state_dict(state_dict)
+
+ return model
+
+
+def shufflenet_v2_x0_5(pretrained=False, progress=True, **kwargs):
+ """
+ Constructs a ShuffleNetV2 with 0.5x output channels, as described in
+ `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
+ `_.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _shufflenetv2('shufflenetv2_x0.5', pretrained, progress,
+ [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs)
+
+
+def shufflenet_v2_x1_0(pretrained=False, progress=True, **kwargs):
+ """
+ Constructs a ShuffleNetV2 with 1.0x output channels, as described in
+ `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
+ `_.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _shufflenetv2('shufflenetv2_x1.0', pretrained, progress,
+ [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs)
+
+
+def shufflenet_v2_x1_5(pretrained=False, progress=True, **kwargs):
+ """
+ Constructs a ShuffleNetV2 with 1.5x output channels, as described in
+ `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
+ `_.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _shufflenetv2('shufflenetv2_x1.5', pretrained, progress,
+ [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs)
+
+
+def shufflenet_v2_x2_0(pretrained=False, progress=True, **kwargs):
+ """
+ Constructs a ShuffleNetV2 with 2.0x output channels, as described in
+ `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
+ `_.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _shufflenetv2('shufflenetv2_x2.0', pretrained, progress,
+ [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py
new file mode 100644
index 0000000000..964f3ec66d
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py
@@ -0,0 +1,137 @@
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+from .utils import load_state_dict_from_url
+
+__all__ = ['SqueezeNet', 'squeezenet1_0', 'squeezenet1_1']
+
+model_urls = {
+ 'squeezenet1_0': 'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth',
+ 'squeezenet1_1': 'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth',
+}
+
+
+class Fire(nn.Module):
+
+ def __init__(self, inplanes, squeeze_planes,
+ expand1x1_planes, expand3x3_planes):
+ super(Fire, self).__init__()
+ self.inplanes = inplanes
+ self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
+ self.squeeze_activation = nn.ReLU(inplace=True)
+ self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes,
+ kernel_size=1)
+ self.expand1x1_activation = nn.ReLU(inplace=True)
+ self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes,
+ kernel_size=3, padding=1)
+ self.expand3x3_activation = nn.ReLU(inplace=True)
+
+ def forward(self, x):
+ x = self.squeeze_activation(self.squeeze(x))
+ return torch.cat([
+ self.expand1x1_activation(self.expand1x1(x)),
+ self.expand3x3_activation(self.expand3x3(x))
+ ], 1)
+
+
+class SqueezeNet(nn.Module):
+
+ def __init__(self, version='1_0', num_classes=1000):
+ super(SqueezeNet, self).__init__()
+ self.num_classes = num_classes
+ if version == '1_0':
+ self.features = nn.Sequential(
+ nn.Conv2d(3, 96, kernel_size=7, stride=2),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
+ Fire(96, 16, 64, 64),
+ Fire(128, 16, 64, 64),
+ Fire(128, 32, 128, 128),
+ nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
+ Fire(256, 32, 128, 128),
+ Fire(256, 48, 192, 192),
+ Fire(384, 48, 192, 192),
+ Fire(384, 64, 256, 256),
+ nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
+ Fire(512, 64, 256, 256),
+ )
+ elif version == '1_1':
+ self.features = nn.Sequential(
+ nn.Conv2d(3, 64, kernel_size=3, stride=2),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
+ Fire(64, 16, 64, 64),
+ Fire(128, 16, 64, 64),
+ nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
+ Fire(128, 32, 128, 128),
+ Fire(256, 32, 128, 128),
+ nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
+ Fire(256, 48, 192, 192),
+ Fire(384, 48, 192, 192),
+ Fire(384, 64, 256, 256),
+ Fire(512, 64, 256, 256),
+ )
+ else:
+ # FIXME: Is this needed? SqueezeNet should only be called from the
+ # FIXME: squeezenet1_x() functions
+ # FIXME: This checking is not done for the other models
+ raise ValueError("Unsupported SqueezeNet version {version}:"
+ "1_0 or 1_1 expected".format(version=version))
+
+ # Final convolution is initialized differently from the rest
+ final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
+ self.classifier = nn.Sequential(
+ nn.Dropout(p=0.5),
+ final_conv,
+ nn.ReLU(inplace=True),
+ nn.AdaptiveAvgPool2d((1, 1))
+ )
+
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ if m is final_conv:
+ init.normal_(m.weight, mean=0.0, std=0.01)
+ else:
+ init.kaiming_uniform_(m.weight)
+ if m.bias is not None:
+ init.constant_(m.bias, 0)
+
+ def forward(self, x):
+ x = self.features(x)
+ x = self.classifier(x)
+ return torch.flatten(x, 1)
+
+
+def _squeezenet(version, pretrained, progress, **kwargs):
+ model = SqueezeNet(version, **kwargs)
+ if pretrained:
+ arch = 'squeezenet' + version
+ state_dict = load_state_dict_from_url(model_urls[arch],
+ progress=progress)
+ model.load_state_dict(state_dict)
+ return model
+
+
+def squeezenet1_0(pretrained=False, progress=True, **kwargs):
+ r"""SqueezeNet model architecture from the `"SqueezeNet: AlexNet-level
+ accuracy with 50x fewer parameters and <0.5MB model size"
+ `_ paper.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _squeezenet('1_0', pretrained, progress, **kwargs)
+
+
+def squeezenet1_1(pretrained=False, progress=True, **kwargs):
+ r"""SqueezeNet 1.1 model from the `official SqueezeNet repo
+ `_.
+ SqueezeNet 1.1 has 2.4x less computation and slightly fewer parameters
+ than SqueezeNet 1.0, without sacrificing accuracy.
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _squeezenet('1_1', pretrained, progress, **kwargs)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py
new file mode 100644
index 0000000000..638ef07cd8
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py
@@ -0,0 +1,4 @@
+try:
+ from torch.hub import load_state_dict_from_url
+except ImportError:
+ from torch.utils.model_zoo import load_url as load_state_dict_from_url
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py
new file mode 100644
index 0000000000..dba534f651
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py
@@ -0,0 +1,183 @@
+import torch
+import torch.nn as nn
+from .utils import load_state_dict_from_url
+
+
+__all__ = [
+ 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
+ 'vgg19_bn', 'vgg19',
+]
+
+
+model_urls = {
+ 'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
+ 'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
+ 'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
+ 'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
+ 'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
+ 'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
+ 'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
+ 'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
+}
+
+
+class VGG(nn.Module):
+
+ def __init__(self, features, num_classes=1000, init_weights=True):
+ super(VGG, self).__init__()
+ self.features = features
+ self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
+ self.classifier = nn.Sequential(
+ nn.Linear(512 * 7 * 7, 4096),
+ nn.ReLU(True),
+ nn.Dropout(),
+ nn.Linear(4096, 4096),
+ nn.ReLU(True),
+ nn.Dropout(),
+ nn.Linear(4096, num_classes),
+ )
+ if init_weights:
+ self._initialize_weights()
+
+ def forward(self, x):
+ x = self.features(x)
+ x = self.avgpool(x)
+ x = torch.flatten(x, 1)
+ x = self.classifier(x)
+ return x
+
+ def _initialize_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+ if m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.BatchNorm2d):
+ nn.init.constant_(m.weight, 1)
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.Linear):
+ nn.init.normal_(m.weight, 0, 0.01)
+ nn.init.constant_(m.bias, 0)
+
+
+def make_layers(cfg, batch_norm=False):
+ layers = []
+ in_channels = 3
+ for v in cfg:
+ if v == 'M':
+ layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
+ else:
+ conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
+ if batch_norm:
+ layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
+ else:
+ layers += [conv2d, nn.ReLU(inplace=True)]
+ in_channels = v
+ return nn.Sequential(*layers)
+
+
+cfgs = {
+ 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+ 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+ 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
+ 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
+}
+
+
+def _vgg(arch, cfg, batch_norm, pretrained, progress, **kwargs):
+ if pretrained:
+ kwargs['init_weights'] = False
+ model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs)
+ if pretrained:
+ state_dict = load_state_dict_from_url(model_urls[arch],
+ progress=progress)
+ model.load_state_dict(state_dict)
+ return model
+
+
+def vgg11(pretrained=False, progress=True, **kwargs):
+ r"""VGG 11-layer model (configuration "A") from
+ `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _vgg('vgg11', 'A', False, pretrained, progress, **kwargs)
+
+
+def vgg11_bn(pretrained=False, progress=True, **kwargs):
+ r"""VGG 11-layer model (configuration "A") with batch normalization
+ `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _vgg('vgg11_bn', 'A', True, pretrained, progress, **kwargs)
+
+
+def vgg13(pretrained=False, progress=True, **kwargs):
+ r"""VGG 13-layer model (configuration "B")
+ `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _vgg('vgg13', 'B', False, pretrained, progress, **kwargs)
+
+
+def vgg13_bn(pretrained=False, progress=True, **kwargs):
+ r"""VGG 13-layer model (configuration "B") with batch normalization
+ `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _vgg('vgg13_bn', 'B', True, pretrained, progress, **kwargs)
+
+
+def vgg16(pretrained=False, progress=True, **kwargs):
+ r"""VGG 16-layer model (configuration "D")
+ `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _vgg('vgg16', 'D', False, pretrained, progress, **kwargs)
+
+
+def vgg16_bn(pretrained=False, progress=True, **kwargs):
+ r"""VGG 16-layer model (configuration "D") with batch normalization
+ `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _vgg('vgg16_bn', 'D', True, pretrained, progress, **kwargs)
+
+
+def vgg19(pretrained=False, progress=True, **kwargs):
+ r"""VGG 19-layer model (configuration "E")
+ `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _vgg('vgg19', 'E', False, pretrained, progress, **kwargs)
+
+
+def vgg19_bn(pretrained=False, progress=True, **kwargs):
+ r"""VGG 19-layer model (configuration 'E') with batch normalization
+ `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
+ progress (bool): If True, displays a progress bar of the download to stderr
+ """
+ return _vgg('vgg19_bn', 'E', True, pretrained, progress, **kwargs)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/__init__.py
new file mode 100644
index 0000000000..b792ca6ecf
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/__init__.py
@@ -0,0 +1 @@
+from .resnet import *
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py
new file mode 100644
index 0000000000..a9e59a149c
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py
@@ -0,0 +1,341 @@
+import torch
+import torch.nn as nn
+
+from ..utils import load_state_dict_from_url
+
+
+__all__ = ['r3d_18', 'mc3_18', 'r2plus1d_18']
+
+model_urls = {
+ 'r3d_18': 'https://download.pytorch.org/models/r3d_18-b3b3357e.pth',
+ 'mc3_18': 'https://download.pytorch.org/models/mc3_18-a90a0ba3.pth',
+ 'r2plus1d_18': 'https://download.pytorch.org/models/r2plus1d_18-91a641e6.pth',
+}
+
+
+class Conv3DSimple(nn.Conv3d):
+ def __init__(self,
+ in_planes,
+ out_planes,
+ midplanes=None,
+ stride=1,
+ padding=1):
+
+ super(Conv3DSimple, self).__init__(
+ in_channels=in_planes,
+ out_channels=out_planes,
+ kernel_size=(3, 3, 3),
+ stride=stride,
+ padding=padding,
+ bias=False)
+
+ @staticmethod
+ def get_downsample_stride(stride):
+ return (stride, stride, stride)
+
+
+class Conv2Plus1D(nn.Sequential):
+
+ def __init__(self,
+ in_planes,
+ out_planes,
+ midplanes,
+ stride=1,
+ padding=1):
+ super(Conv2Plus1D, self).__init__(
+ nn.Conv3d(in_planes, midplanes, kernel_size=(1, 3, 3),
+ stride=(1, stride, stride), padding=(0, padding, padding),
+ bias=False),
+ nn.BatchNorm3d(midplanes),
+ nn.ReLU(inplace=True),
+ nn.Conv3d(midplanes, out_planes, kernel_size=(3, 1, 1),
+ stride=(stride, 1, 1), padding=(padding, 0, 0),
+ bias=False))
+
+ @staticmethod
+ def get_downsample_stride(stride):
+ return (stride, stride, stride)
+
+
+class Conv3DNoTemporal(nn.Conv3d):
+
+ def __init__(self,
+ in_planes,
+ out_planes,
+ midplanes=None,
+ stride=1,
+ padding=1):
+
+ super(Conv3DNoTemporal, self).__init__(
+ in_channels=in_planes,
+ out_channels=out_planes,
+ kernel_size=(1, 3, 3),
+ stride=(1, stride, stride),
+ padding=(0, padding, padding),
+ bias=False)
+
+ @staticmethod
+ def get_downsample_stride(stride):
+ return (1, stride, stride)
+
+
+class BasicBlock(nn.Module):
+
+ expansion = 1
+
+ def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None):
+ midplanes = (inplanes * planes * 3 * 3 * 3) // (inplanes * 3 * 3 + 3 * planes)
+
+ super(BasicBlock, self).__init__()
+ self.conv1 = nn.Sequential(
+ conv_builder(inplanes, planes, midplanes, stride),
+ nn.BatchNorm3d(planes),
+ nn.ReLU(inplace=True)
+ )
+ self.conv2 = nn.Sequential(
+ conv_builder(planes, planes, midplanes),
+ nn.BatchNorm3d(planes)
+ )
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+ self.stride = stride
+
+ def forward(self, x):
+ residual = x
+
+ out = self.conv1(x)
+ out = self.conv2(out)
+ if self.downsample is not None:
+ residual = self.downsample(x)
+
+ out += residual
+ out = self.relu(out)
+
+ return out
+
+
+class Bottleneck(nn.Module):
+ expansion = 4
+
+ def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None):
+
+ super(Bottleneck, self).__init__()
+ midplanes = (inplanes * planes * 3 * 3 * 3) // (inplanes * 3 * 3 + 3 * planes)
+
+ # 1x1x1
+ self.conv1 = nn.Sequential(
+ nn.Conv3d(inplanes, planes, kernel_size=1, bias=False),
+ nn.BatchNorm3d(planes),
+ nn.ReLU(inplace=True)
+ )
+ # Second kernel
+ self.conv2 = nn.Sequential(
+ conv_builder(planes, planes, midplanes, stride),
+ nn.BatchNorm3d(planes),
+ nn.ReLU(inplace=True)
+ )
+
+ # 1x1x1
+ self.conv3 = nn.Sequential(
+ nn.Conv3d(planes, planes * self.expansion, kernel_size=1, bias=False),
+ nn.BatchNorm3d(planes * self.expansion)
+ )
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+ self.stride = stride
+
+ def forward(self, x):
+ residual = x
+
+ out = self.conv1(x)
+ out = self.conv2(out)
+ out = self.conv3(out)
+
+ if self.downsample is not None:
+ residual = self.downsample(x)
+
+ out += residual
+ out = self.relu(out)
+
+ return out
+
+
+class BasicStem(nn.Sequential):
+ """The default conv-batchnorm-relu stem
+ """
+ def __init__(self):
+ super(BasicStem, self).__init__(
+ nn.Conv3d(3, 64, kernel_size=(3, 7, 7), stride=(1, 2, 2),
+ padding=(1, 3, 3), bias=False),
+ nn.BatchNorm3d(64),
+ nn.ReLU(inplace=True))
+
+
+class R2Plus1dStem(nn.Sequential):
+ """R(2+1)D stem is different than the default one as it uses separated 3D convolution
+ """
+ def __init__(self):
+ super(R2Plus1dStem, self).__init__(
+ nn.Conv3d(3, 45, kernel_size=(1, 7, 7),
+ stride=(1, 2, 2), padding=(0, 3, 3),
+ bias=False),
+ nn.BatchNorm3d(45),
+ nn.ReLU(inplace=True),
+ nn.Conv3d(45, 64, kernel_size=(3, 1, 1),
+ stride=(1, 1, 1), padding=(1, 0, 0),
+ bias=False),
+ nn.BatchNorm3d(64),
+ nn.ReLU(inplace=True))
+
+
+class VideoResNet(nn.Module):
+
+ def __init__(self, block, conv_makers, layers,
+ stem, num_classes=400,
+ zero_init_residual=False):
+ """Generic resnet video generator.
+
+ Args:
+ block (nn.Module): resnet building block
+ conv_makers (list(functions)): generator function for each layer
+ layers (List[int]): number of blocks per layer
+ stem (nn.Module, optional): Resnet stem, if None, defaults to conv-bn-relu. Defaults to None.
+ num_classes (int, optional): Dimension of the final FC layer. Defaults to 400.
+ zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False.
+ """
+ super(VideoResNet, self).__init__()
+ self.inplanes = 64
+
+ self.stem = stem()
+
+ self.layer1 = self._make_layer(block, conv_makers[0], 64, layers[0], stride=1)
+ self.layer2 = self._make_layer(block, conv_makers[1], 128, layers[1], stride=2)
+ self.layer3 = self._make_layer(block, conv_makers[2], 256, layers[2], stride=2)
+ self.layer4 = self._make_layer(block, conv_makers[3], 512, layers[3], stride=2)
+
+ self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1))
+ self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+ # init weights
+ self._initialize_weights()
+
+ if zero_init_residual:
+ for m in self.modules():
+ if isinstance(m, Bottleneck):
+ nn.init.constant_(m.bn3.weight, 0)
+
+ def forward(self, x):
+ x = self.stem(x)
+
+ x = self.layer1(x)
+ x = self.layer2(x)
+ x = self.layer3(x)
+ x = self.layer4(x)
+
+ x = self.avgpool(x)
+ # Flatten the layer to fc
+ x = x.flatten(1)
+ x = self.fc(x)
+
+ return x
+
+ def _make_layer(self, block, conv_builder, planes, blocks, stride=1):
+ downsample = None
+
+ if stride != 1 or self.inplanes != planes * block.expansion:
+ ds_stride = conv_builder.get_downsample_stride(stride)
+ downsample = nn.Sequential(
+ nn.Conv3d(self.inplanes, planes * block.expansion,
+ kernel_size=1, stride=ds_stride, bias=False),
+ nn.BatchNorm3d(planes * block.expansion)
+ )
+ layers = []
+ layers.append(block(self.inplanes, planes, conv_builder, stride, downsample))
+
+ self.inplanes = planes * block.expansion
+ for i in range(1, blocks):
+ layers.append(block(self.inplanes, planes, conv_builder))
+
+ return nn.Sequential(*layers)
+
+ def _initialize_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv3d):
+ nn.init.kaiming_normal_(m.weight, mode='fan_out',
+ nonlinearity='relu')
+ if m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.BatchNorm3d):
+ nn.init.constant_(m.weight, 1)
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.Linear):
+ nn.init.normal_(m.weight, 0, 0.01)
+ nn.init.constant_(m.bias, 0)
+
+
+def _video_resnet(arch, pretrained=False, progress=True, **kwargs):
+ model = VideoResNet(**kwargs)
+
+ if pretrained:
+ state_dict = load_state_dict_from_url(model_urls[arch],
+ progress=progress)
+ model.load_state_dict(state_dict)
+ return model
+
+
+def r3d_18(pretrained=False, progress=True, **kwargs):
+ """Construct 18 layer Resnet3D model as in
+ https://arxiv.org/abs/1711.11248
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on Kinetics-400
+ progress (bool): If True, displays a progress bar of the download to stderr
+
+ Returns:
+ nn.Module: R3D-18 network
+ """
+
+ return _video_resnet('r3d_18',
+ pretrained, progress,
+ block=BasicBlock,
+ conv_makers=[Conv3DSimple] * 4,
+ layers=[2, 2, 2, 2],
+ stem=BasicStem, **kwargs)
+
+
+def mc3_18(pretrained=False, progress=True, **kwargs):
+ """Constructor for 18 layer Mixed Convolution network as in
+ https://arxiv.org/abs/1711.11248
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on Kinetics-400
+ progress (bool): If True, displays a progress bar of the download to stderr
+
+ Returns:
+ nn.Module: MC3 Network definition
+ """
+ return _video_resnet('mc3_18',
+ pretrained, progress,
+ block=BasicBlock,
+ conv_makers=[Conv3DSimple] + [Conv3DNoTemporal] * 3,
+ layers=[2, 2, 2, 2],
+ stem=BasicStem, **kwargs)
+
+
+def r2plus1d_18(pretrained=False, progress=True, **kwargs):
+ """Constructor for the 18 layer deep R(2+1)D network as in
+ https://arxiv.org/abs/1711.11248
+
+ Args:
+ pretrained (bool): If True, returns a model pre-trained on Kinetics-400
+ progress (bool): If True, displays a progress bar of the download to stderr
+
+ Returns:
+ nn.Module: R(2+1)D-18 network
+ """
+ return _video_resnet('r2plus1d_18',
+ pretrained, progress,
+ block=BasicBlock,
+ conv_makers=[Conv2Plus1D] * 4,
+ layers=[2, 2, 2, 2],
+ stem=R2Plus1dStem, **kwargs)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/__init__.py
new file mode 100644
index 0000000000..0ff2b0be2c
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/__init__.py
@@ -0,0 +1,20 @@
+from .boxes import nms, box_iou
+from .new_empty_tensor import _new_empty_tensor
+from .deform_conv import deform_conv2d, DeformConv2d
+from .roi_align import roi_align, RoIAlign
+from .roi_pool import roi_pool, RoIPool
+from .ps_roi_align import ps_roi_align, PSRoIAlign
+from .ps_roi_pool import ps_roi_pool, PSRoIPool
+from .poolers import MultiScaleRoIAlign
+from .feature_pyramid_network import FeaturePyramidNetwork
+
+from ._register_onnx_ops import _register_custom_op
+
+_register_custom_op()
+
+
+__all__ = [
+ 'deform_conv2d', 'DeformConv2d', 'nms', 'roi_align', 'RoIAlign', 'roi_pool',
+ 'RoIPool', '_new_empty_tensor', 'ps_roi_align', 'PSRoIAlign', 'ps_roi_pool',
+ 'PSRoIPool', 'MultiScaleRoIAlign', 'FeaturePyramidNetwork'
+]
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py
new file mode 100644
index 0000000000..d9d9c5c094
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py
@@ -0,0 +1,51 @@
+import sys
+import torch
+
+_onnx_opset_version = 11
+
+
+def _register_custom_op():
+ from torch.onnx.symbolic_helper import parse_args, scalar_type_to_onnx, scalar_type_to_pytorch_type, \
+ cast_pytorch_to_onnx
+ from torch.onnx.symbolic_opset9 import select, unsqueeze, squeeze, _cast_Long, reshape
+
+ @parse_args('v', 'v', 'f')
+ def symbolic_multi_label_nms(g, boxes, scores, iou_threshold):
+ boxes = unsqueeze(g, boxes, 0)
+ scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
+ max_output_per_class = g.op('Constant', value_t=torch.tensor([sys.maxsize], dtype=torch.long))
+ iou_threshold = g.op('Constant', value_t=torch.tensor([iou_threshold], dtype=torch.float))
+ nms_out = g.op('NonMaxSuppression', boxes, scores, max_output_per_class, iou_threshold)
+ return squeeze(g, select(g, nms_out, 1, g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))), 1)
+
+ @parse_args('v', 'v', 'f', 'i', 'i', 'i', 'i')
+ def roi_align(g, input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned):
+ if(aligned):
+ raise RuntimeError('Unsupported: ONNX export of roi_align with aligned')
+ batch_indices = _cast_Long(g, squeeze(g, select(g, rois, 1, g.op('Constant',
+ value_t=torch.tensor([0], dtype=torch.long))), 1), False)
+ rois = select(g, rois, 1, g.op('Constant', value_t=torch.tensor([1, 2, 3, 4], dtype=torch.long)))
+ return g.op('RoiAlign', input, rois, batch_indices, spatial_scale_f=spatial_scale,
+ output_height_i=pooled_height, output_width_i=pooled_width, sampling_ratio_i=sampling_ratio)
+
+ @parse_args('v', 'v', 'f', 'i', 'i')
+ def roi_pool(g, input, rois, spatial_scale, pooled_height, pooled_width):
+ roi_pool = g.op('MaxRoiPool', input, rois,
+ pooled_shape_i=(pooled_height, pooled_width), spatial_scale_f=spatial_scale)
+ return roi_pool, None
+
+ @parse_args('v', 'is')
+ def new_empty_tensor_op(g, input, shape):
+ dtype = input.type().scalarType()
+ if dtype is None:
+ dtype = 'Float'
+ dtype = scalar_type_to_onnx.index(cast_pytorch_to_onnx[dtype])
+ shape = g.op("Constant", value_t=torch.tensor(shape))
+ return g.op("ConstantOfShape", shape,
+ value_t=torch.tensor([0], dtype=scalar_type_to_pytorch_type[dtype]))
+
+ from torch.onnx import register_custom_op_symbolic
+ register_custom_op_symbolic('torchvision::nms', symbolic_multi_label_nms, _onnx_opset_version)
+ register_custom_op_symbolic('torchvision::roi_align', roi_align, _onnx_opset_version)
+ register_custom_op_symbolic('torchvision::roi_pool', roi_pool, _onnx_opset_version)
+ register_custom_op_symbolic('torchvision::_new_empty_tensor_op', new_empty_tensor_op, _onnx_opset_version)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py
new file mode 100644
index 0000000000..3a07c747f5
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py
@@ -0,0 +1,63 @@
+from typing import List, Optional, Tuple, Union
+
+import torch
+from torch import nn, Tensor
+
+
+def _cat(tensors: List[Tensor], dim: int = 0) -> Tensor:
+ """
+ Efficient version of torch.cat that avoids a copy if there is only a single element in a list
+ """
+ # TODO add back the assert
+ # assert isinstance(tensors, (list, tuple))
+ if len(tensors) == 1:
+ return tensors[0]
+ return torch.cat(tensors, dim)
+
+
+def convert_boxes_to_roi_format(boxes: List[Tensor]) -> Tensor:
+ concat_boxes = _cat([b for b in boxes], dim=0)
+ temp = []
+ for i, b in enumerate(boxes):
+ temp.append(torch.full_like(b[:, :1], i))
+ ids = _cat(temp, dim=0)
+ rois = torch.cat([ids, concat_boxes], dim=1)
+ return rois
+
+
+def check_roi_boxes_shape(boxes: Union[Tensor, List[Tensor]]):
+ if isinstance(boxes, (list, tuple)):
+ for _tensor in boxes:
+ assert (
+ _tensor.size(1) == 4
+ ), "The shape of the tensor in the boxes list is not correct as List[Tensor[L, 4]]"
+ elif isinstance(boxes, torch.Tensor):
+ assert boxes.size(1) == 5, "The boxes tensor shape is not correct as Tensor[K, 5]"
+ else:
+ assert False, "boxes is expected to be a Tensor[L, 5] or a List[Tensor[K, 4]]"
+ return
+
+
+def split_normalization_params(
+ model: nn.Module, norm_classes: Optional[List[type]] = None
+) -> Tuple[List[Tensor], List[Tensor]]:
+ # Adapted from https://github.com/facebookresearch/ClassyVision/blob/659d7f78/classy_vision/generic/util.py#L501
+ if not norm_classes:
+ norm_classes = [nn.modules.batchnorm._BatchNorm, nn.LayerNorm, nn.GroupNorm]
+
+ for t in norm_classes:
+ if not issubclass(t, nn.Module):
+ raise ValueError(f"Class {t} is not a subclass of nn.Module.")
+
+ classes = tuple(norm_classes)
+
+ norm_params = []
+ other_params = []
+ for module in model.modules():
+ if next(module.children(), None):
+ other_params.extend(p for p in module.parameters(recurse=False) if p.requires_grad)
+ elif isinstance(module, classes):
+ norm_params.extend(p for p in module.parameters() if p.requires_grad)
+ else:
+ other_params.extend(p for p in module.parameters() if p.requires_grad)
+ return norm_params, other_params
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py
new file mode 100644
index 0000000000..714022f042
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py
@@ -0,0 +1,38 @@
+import torch
+from torch import Tensor
+from torch.jit.annotations import List
+
+
+def _cat(tensors, dim=0):
+ # type: (List[Tensor], int) -> Tensor
+ """
+ Efficient version of torch.cat that avoids a copy if there is only a single element in a list
+ """
+ # TODO add back the assert
+ # assert isinstance(tensors, (list, tuple))
+ if len(tensors) == 1:
+ return tensors[0]
+ return torch.cat(tensors, dim)
+
+
+def convert_boxes_to_roi_format(boxes):
+ # type: (List[Tensor]) -> Tensor
+ concat_boxes = _cat([b for b in boxes], dim=0)
+ temp = []
+ for i, b in enumerate(boxes):
+ temp.append(torch.full_like(b[:, :1], i))
+ ids = _cat(temp, dim=0)
+ rois = torch.cat([ids, concat_boxes], dim=1)
+ return rois
+
+
+def check_roi_boxes_shape(boxes):
+ if isinstance(boxes, list):
+ for _tensor in boxes:
+ assert _tensor.size(1) == 4, \
+ 'The shape of the tensor in the boxes list is not correct as List[Tensor[L, 4]]'
+ elif isinstance(boxes, torch.Tensor):
+ assert boxes.size(1) == 5, 'The boxes tensor shape is not correct as Tensor[K, 5]'
+ else:
+ assert False, 'boxes is expected to be a Tensor[L, 5] or a List[Tensor[K, 4]]'
+ return
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py
new file mode 100644
index 0000000000..ac0dba1fe7
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py
@@ -0,0 +1,237 @@
+import torch
+from torch.jit.annotations import Tuple
+from torch import Tensor
+import torchvision
+import sys
+
+
+# for servers to immediately record the logs
+def flush_print(func):
+ def new_print(*args, **kwargs):
+ func(*args, **kwargs)
+ sys.stdout.flush()
+ return new_print
+print = flush_print(print)
+
+
+def nms_origin(boxes, scores, iou_threshold):
+ # type: (Tensor, Tensor, float)
+ """
+ Performs non-maximum suppression (NMS) on the boxes according
+ to their intersection-over-union (IoU).
+
+ NMS iteratively removes lower scoring boxes which have an
+ IoU greater than iou_threshold with another (higher scoring)
+ box.
+
+ Parameters
+ ----------
+ boxes : Tensor[N, 4])
+ boxes to perform NMS on. They
+ are expected to be in (x1, y1, x2, y2) format
+ scores : Tensor[N]
+ scores for each one of the boxes
+ iou_threshold : float
+ discards all overlapping
+ boxes with IoU > iou_threshold
+
+ Returns
+ -------
+ keep : Tensor
+ int64 tensor with the indices
+ of the elements that have been kept
+ by NMS, sorted in decreasing order of scores
+ """
+ keep = [] # 最终保留的结果, 在boxes中对应的索引;
+ idxs = scores.argsort() # 值从小到大的 索引
+ while idxs.numel() > 0: # 循环直到null; numel(): 数组元素个数
+ # 得分最大框对应的索引, 以及对应的坐标
+ max_score_index = idxs[-1]
+ max_score_box = boxes[max_score_index][None, :] # [1, 4]
+ keep.append(max_score_index)
+ if idxs.size(0) == 1: # 就剩余一个框了;
+ break
+ idxs = idxs[:-1] # 将得分最大框 从索引中删除; 剩余索引对应的框 和 得分最大框 计算IoU;
+ other_boxes = boxes[idxs] # [?, 4]
+ ious = box_iou(max_score_box, other_boxes) # 一个框和其余框比较 1XM
+ idxs = idxs[ious[0] <= iou_threshold]
+
+ keep = idxs.new(keep) # Tensor
+ return keep
+ # return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
+
+
+def nms(bboxes, scores, threshold=0.5):
+ x1 = bboxes[:, 0]
+ y1 = bboxes[:, 1]
+ x2 = bboxes[:, 2]
+ y2 = bboxes[:, 3]
+ areas = (x2 - x1) * (y2 - y1)
+ _, order = scores.sort(0, descending=True)
+
+ keep = []
+ while order.numel() > 0:
+ if order.numel() == 1:
+ i = order.item()
+ keep.append(i)
+ break
+ else:
+ i = order[0].item()
+ keep.append(i)
+
+ xx1 = x1[order[1:]].clamp(min=x1[i].item())
+ yy1 = y1[order[1:]].clamp(min=y1[i].item())
+ xx2 = x2[order[1:]].clamp(max=x2[i].item())
+ yy2 = y2[order[1:]].clamp(max=y2[i].item())
+ inter = (xx2 - xx1).clamp(min=0) * (yy2 - yy1).clamp(min=0)
+
+ iou = inter / (areas[i] + areas[order[1:]] - inter)
+ idx = (iou <= threshold).nonzero().squeeze()
+ if idx.numel() == 0:
+ break
+ order = order[idx + 1]
+ return torch.LongTensor(keep)
+
+
+def batched_nms(boxes, scores, idxs, iou_threshold):
+ # type: (Tensor, Tensor, Tensor, float)
+ """
+ Performs non-maximum suppression in a batched fashion.
+
+ Each index value correspond to a category, and NMS
+ will not be applied between elements of different categories.
+
+ Parameters
+ ----------
+ boxes : Tensor[N, 4]
+ boxes where NMS will be performed. They
+ are expected to be in (x1, y1, x2, y2) format
+ scores : Tensor[N]
+ scores for each one of the boxes
+ idxs : Tensor[N]
+ indices of the categories for each one of the boxes.
+ iou_threshold : float
+ discards all overlapping boxes
+ with IoU > iou_threshold
+
+ Returns
+ -------
+ keep : Tensor
+ int64 tensor with the indices of
+ the elements that have been kept by NMS, sorted
+ in decreasing order of scores
+ """
+ if boxes.numel() == 0:
+ return torch.empty((0,), dtype=torch.int64, device=boxes.device)
+ # strategy: in order to perform NMS independently per class.
+ # we add an offset to all the boxes. The offset is dependent
+ # only on the class idx, and is large enough so that boxes
+ # from different classes do not overlap
+ max_coordinate = boxes.max()
+ offsets = idxs.to(boxes) * (max_coordinate + 1)
+ boxes_for_nms = boxes + offsets[:, None]
+ keep = nms(boxes_for_nms, scores, iou_threshold)
+ return keep
+
+
+def remove_small_boxes(boxes, min_size):
+ # type: (Tensor, float)
+ """
+ Remove boxes which contains at least one side smaller than min_size.
+
+ Arguments:
+ boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format
+ min_size (float): minimum size
+
+ Returns:
+ keep (Tensor[K]): indices of the boxes that have both sides
+ larger than min_size
+ """
+ ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1]
+ keep = (ws >= min_size) & (hs >= min_size)
+ keep = keep.nonzero().squeeze(1)
+ return keep
+
+
+def clip_boxes_to_image(boxes, size):
+ # type: (Tensor, Tuple[int, int])
+ """
+ Clip boxes so that they lie inside an image of size `size`.
+
+ Arguments:
+ boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format
+ size (Tuple[height, width]): size of the image
+
+ Returns:
+ clipped_boxes (Tensor[N, 4])
+ """
+ dim = boxes.dim()
+ boxes_x = boxes[..., 0::2]
+ boxes_y = boxes[..., 1::2]
+ height, width = size
+
+ if torchvision._is_tracing():
+ boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))
+ boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device))
+ boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device))
+ boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device))
+ else:
+ boxes_x = boxes_x.clamp(min=0, max=width)
+ boxes_y = boxes_y.clamp(min=0, max=height)
+
+ clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim)
+ return clipped_boxes.reshape(boxes.shape)
+
+
+def box_area(boxes):
+ """
+ Computes the area of a set of bounding boxes, which are specified by its
+ (x1, y1, x2, y2) coordinates.
+
+ Arguments:
+ boxes (Tensor[N, 4]): boxes for which the area will be computed. They
+ are expected to be in (x1, y1, x2, y2) format
+
+ Returns:
+ area (Tensor[N]): area for each box
+ """
+
+ # torch.save(boxes, 'boxes.pth')
+ a = (boxes[:, 2] - boxes[:, 0])
+ b = (boxes[:, 3] - boxes[:, 1])
+ c = a * b
+ return c
+ # return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+
+
+# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
+# with slight modifications
+def box_iou(boxes1, boxes2):
+ """
+ Return intersection-over-union (Jaccard index) of boxes.
+
+ Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+
+ Arguments:
+ boxes1 (Tensor[N, 4])
+ boxes2 (Tensor[M, 4])
+
+ Returns:
+ iou (Tensor[N, M]): the NxM matrix containing the pairwise
+ IoU values for every element in boxes1 and boxes2
+ """
+ area1 = box_area(boxes1)
+ area2 = box_area(boxes2)
+
+ # lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2]
+ # rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2]
+ lt = torch.max(boxes1[:, None, :2], boxes2[:, :2].float()) # [N,M,2]
+ rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:].float()) # [N,M,2]
+
+ wh = (rb - lt).clamp(min=0) # [N,M,2]
+ a = wh[:, :, 0]
+ b = wh[:, :, 1]
+ inter = a * b # [N,M]
+
+ iou = inter / (area1[:, None] + area2 - inter)
+ return iou
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py
new file mode 100644
index 0000000000..c948b16419
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py
@@ -0,0 +1,139 @@
+import math
+
+import torch
+from torch import nn, Tensor
+from torch.nn import init
+from torch.nn.parameter import Parameter
+from torch.nn.modules.utils import _pair
+from torch.jit.annotations import Optional, Tuple
+
+
+def deform_conv2d(input, offset, weight, bias=None, stride=(1, 1), padding=(0, 0), dilation=(1, 1)):
+ # type: (Tensor, Tensor, Tensor, Optional[Tensor], Tuple[int, int], Tuple[int, int], Tuple[int, int]) -> Tensor
+ """
+ Performs Deformable Convolution, described in Deformable Convolutional Networks
+
+ Arguments:
+ input (Tensor[batch_size, in_channels, in_height, in_width]): input tensor
+ offset (Tensor[batch_size, 2 * offset_groups * kernel_height * kernel_width,
+ out_height, out_width]): offsets to be applied for each position in the
+ convolution kernel.
+ weight (Tensor[out_channels, in_channels // groups, kernel_height, kernel_width]):
+ convolution weights, split into groups of size (in_channels // groups)
+ bias (Tensor[out_channels]): optional bias of shape (out_channels,). Default: None
+ stride (int or Tuple[int, int]): distance between convolution centers. Default: 1
+ padding (int or Tuple[int, int]): height/width of padding of zeroes around
+ each image. Default: 0
+ dilation (int or Tuple[int, int]): the spacing between kernel elements. Default: 1
+
+ Returns:
+ output (Tensor[batch_sz, out_channels, out_h, out_w]): result of convolution
+
+
+ Examples::
+ >>> input = torch.rand(1, 3, 10, 10)
+ >>> kh, kw = 3, 3
+ >>> weight = torch.rand(5, 3, kh, kw)
+ >>> # offset should have the same spatial size as the output
+ >>> # of the convolution. In this case, for an input of 10, stride of 1
+ >>> # and kernel size of 3, without padding, the output size is 8
+ >>> offset = torch.rand(5, 2 * kh * kw, 8, 8)
+ >>> out = deform_conv2d(input, offset, weight)
+ >>> print(out.shape)
+ >>> # returns
+ >>> torch.Size([1, 5, 8, 8])
+ """
+
+ out_channels = weight.shape[0]
+ if bias is None:
+ bias = torch.zeros(out_channels, device=input.device, dtype=input.dtype)
+
+ stride_h, stride_w = _pair(stride)
+ pad_h, pad_w = _pair(padding)
+ dil_h, dil_w = _pair(dilation)
+ weights_h, weights_w = weight.shape[-2:]
+ _, n_in_channels, in_h, in_w = input.shape
+
+ n_offset_grps = offset.shape[1] // (2 * weights_h * weights_w)
+ n_weight_grps = n_in_channels // weight.shape[1]
+
+ if n_offset_grps == 0:
+ raise RuntimeError(
+ "the shape of the offset tensor at dimension 1 is not valid. It should "
+ "be a multiple of 2 * weight.size[2] * weight.size[3].\n"
+ "Got offset.shape[1]={}, while 2 * weight.size[2] * weight.size[3]={}".format(
+ offset.shape[1], 2 * weights_h * weights_w))
+
+ return torch.ops.torchvision.deform_conv2d(
+ input,
+ weight,
+ offset,
+ bias,
+ stride_h, stride_w,
+ pad_h, pad_w,
+ dil_h, dil_w,
+ n_weight_grps,
+ n_offset_grps)
+
+
+class DeformConv2d(nn.Module):
+ """
+ See deform_conv2d
+ """
+ def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0,
+ dilation=1, groups=1, bias=True):
+ super(DeformConv2d, self).__init__()
+
+ if in_channels % groups != 0:
+ raise ValueError('in_channels must be divisible by groups')
+ if out_channels % groups != 0:
+ raise ValueError('out_channels must be divisible by groups')
+
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.kernel_size = _pair(kernel_size)
+ self.stride = _pair(stride)
+ self.padding = _pair(padding)
+ self.dilation = _pair(dilation)
+ self.groups = groups
+
+ self.weight = Parameter(torch.empty(out_channels, in_channels // groups,
+ self.kernel_size[0], self.kernel_size[1]))
+
+ if bias:
+ self.bias = Parameter(torch.empty(out_channels))
+ else:
+ self.register_parameter('bias', None)
+
+ self.reset_parameters()
+
+ def reset_parameters(self):
+ init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+ if self.bias is not None:
+ fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
+ bound = 1 / math.sqrt(fan_in)
+ init.uniform_(self.bias, -bound, bound)
+
+ def forward(self, input, offset):
+ """
+ Arguments:
+ input (Tensor[batch_size, in_channels, in_height, in_width]): input tensor
+ offset (Tensor[batch_size, 2 * offset_groups * kernel_height * kernel_width,
+ out_height, out_width]): offsets to be applied for each position in the
+ convolution kernel.
+ """
+ return deform_conv2d(input, offset, self.weight, self.bias, stride=self.stride,
+ padding=self.padding, dilation=self.dilation)
+
+ def __repr__(self):
+ s = self.__class__.__name__ + '('
+ s += '{in_channels}'
+ s += ', {out_channels}'
+ s += ', kernel_size={kernel_size}'
+ s += ', stride={stride}'
+ s += ', padding={padding}' if self.padding != (0, 0) else ''
+ s += ', dilation={dilation}' if self.dilation != (1, 1) else ''
+ s += ', groups={groups}' if self.groups != 1 else ''
+ s += ', bias=False' if self.bias is None else ''
+ s += ')'
+ return s.format(**self.__dict__)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py
new file mode 100644
index 0000000000..09e79cc7ef
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py
@@ -0,0 +1,193 @@
+from collections import OrderedDict
+
+import torch
+import torch.nn.functional as F
+from torch import nn, Tensor
+
+from torch.jit.annotations import Tuple, List, Dict
+
+
+class FeaturePyramidNetwork(nn.Module):
+ """
+ Module that adds a FPN from on top of a set of feature maps. This is based on
+ `"Feature Pyramid Network for Object Detection" `_.
+
+ The feature maps are currently supposed to be in increasing depth
+ order.
+
+ The input to the model is expected to be an OrderedDict[Tensor], containing
+ the feature maps on top of which the FPN will be added.
+
+ Arguments:
+ in_channels_list (list[int]): number of channels for each feature map that
+ is passed to the module
+ out_channels (int): number of channels of the FPN representation
+ extra_blocks (ExtraFPNBlock or None): if provided, extra operations will
+ be performed. It is expected to take the fpn features, the original
+ features and the names of the original features as input, and returns
+ a new list of feature maps and their corresponding names
+
+ Examples::
+
+ >>> m = torchvision.ops.FeaturePyramidNetwork([10, 20, 30], 5)
+ >>> # get some dummy data
+ >>> x = OrderedDict()
+ >>> x['feat0'] = torch.rand(1, 10, 64, 64)
+ >>> x['feat2'] = torch.rand(1, 20, 16, 16)
+ >>> x['feat3'] = torch.rand(1, 30, 8, 8)
+ >>> # compute the FPN on top of x
+ >>> output = m(x)
+ >>> print([(k, v.shape) for k, v in output.items()])
+ >>> # returns
+ >>> [('feat0', torch.Size([1, 5, 64, 64])),
+ >>> ('feat2', torch.Size([1, 5, 16, 16])),
+ >>> ('feat3', torch.Size([1, 5, 8, 8]))]
+
+ """
+ def __init__(self, in_channels_list, out_channels, extra_blocks=None):
+ super(FeaturePyramidNetwork, self).__init__()
+ self.inner_blocks = nn.ModuleList()
+ self.layer_blocks = nn.ModuleList()
+ for in_channels in in_channels_list:
+ if in_channels == 0:
+ raise ValueError("in_channels=0 is currently not supported")
+ inner_block_module = nn.Conv2d(in_channels, out_channels, 1)
+ layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1)
+ self.inner_blocks.append(inner_block_module)
+ self.layer_blocks.append(layer_block_module)
+
+ # initialize parameters now to avoid modifying the initialization of top_blocks
+ for m in self.children():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_uniform_(m.weight, a=1)
+ nn.init.constant_(m.bias, 0)
+
+ if extra_blocks is not None:
+ assert isinstance(extra_blocks, ExtraFPNBlock)
+ self.extra_blocks = extra_blocks
+
+ def get_result_from_inner_blocks(self, x, idx):
+ # type: (Tensor, int)
+ """
+ This is equivalent to self.inner_blocks[idx](x),
+ but torchscript doesn't support this yet
+ """
+ num_blocks = 0
+ for m in self.inner_blocks:
+ num_blocks += 1
+ if idx < 0:
+ idx += num_blocks
+ i = 0
+ out = x
+ for module in self.inner_blocks:
+ if i == idx:
+ out = module(x)
+ i += 1
+ return out
+
+ def get_result_from_layer_blocks(self, x, idx):
+ # type: (Tensor, int)
+ """
+ This is equivalent to self.layer_blocks[idx](x),
+ but torchscript doesn't support this yet
+ """
+ num_blocks = 0
+ for m in self.layer_blocks:
+ num_blocks += 1
+ if idx < 0:
+ idx += num_blocks
+ i = 0
+ out = x
+ for module in self.layer_blocks:
+ if i == idx:
+ out = module(x)
+ i += 1
+ return out
+
+ def forward(self, x):
+ # type: (Dict[str, Tensor])
+ """
+ Computes the FPN for a set of feature maps.
+
+ Arguments:
+ x (OrderedDict[Tensor]): feature maps for each feature level.
+
+ Returns:
+ results (OrderedDict[Tensor]): feature maps after FPN layers.
+ They are ordered from highest resolution first.
+ """
+ # unpack OrderedDict into two lists for easier handling
+ names = list(x.keys())
+ x = list(x.values())
+
+ last_inner = self.get_result_from_inner_blocks(x[-1], -1)
+ results = []
+ results.append(self.get_result_from_layer_blocks(last_inner, -1))
+
+ for idx in range(len(x) - 2, -1, -1):
+ inner_lateral = self.get_result_from_inner_blocks(x[idx], idx)
+ feat_shape = inner_lateral.shape[-2:]
+ inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest")
+ last_inner = inner_lateral + inner_top_down
+ results.insert(0, self.get_result_from_layer_blocks(last_inner, idx))
+
+ if self.extra_blocks is not None:
+ results, names = self.extra_blocks(results, x, names)
+
+ # make it back an OrderedDict
+ out = OrderedDict([(k, v) for k, v in zip(names, results)])
+
+ return out
+
+
+class ExtraFPNBlock(nn.Module):
+ """
+ Base class for the extra block in the FPN.
+
+ Arguments:
+ results (List[Tensor]): the result of the FPN
+ x (List[Tensor]): the original feature maps
+ names (List[str]): the names for each one of the
+ original feature maps
+
+ Returns:
+ results (List[Tensor]): the extended set of results
+ of the FPN
+ names (List[str]): the extended set of names for the results
+ """
+ def forward(self, results, x, names):
+ pass
+
+
+class LastLevelMaxPool(ExtraFPNBlock):
+ """
+ Applies a max_pool2d on top of the last feature map
+ """
+ def forward(self, x, y, names):
+ # type: (List[Tensor], List[Tensor], List[str])
+ names.append("pool")
+ x.append(F.max_pool2d(x[-1], 1, 2, 0))
+ return x, names
+
+
+class LastLevelP6P7(ExtraFPNBlock):
+ """
+ This module is used in RetinaNet to generate extra layers, P6 and P7.
+ """
+ def __init__(self, in_channels, out_channels):
+ super(LastLevelP6P7, self).__init__()
+ self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
+ self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
+ for module in [self.p6, self.p7]:
+ nn.init.kaiming_uniform_(module.weight, a=1)
+ nn.init.constant_(module.bias, 0)
+ self.use_P5 = in_channels == out_channels
+
+ def forward(self, p, c, names):
+ p5, c5 = p[-1], c[-1]
+ x = p5 if self.use_P5 else c5
+ p6 = self.p6(x)
+ p7 = self.p7(F.relu(p6))
+ p.extend([p6, p7])
+ names.extend(["p6", "p7"])
+ return p, names
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py
new file mode 100644
index 0000000000..caf0d999f7
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py
@@ -0,0 +1,168 @@
+"""
+helper class that supports empty tensors on some nn functions.
+
+Ideally, add support directly in PyTorch to empty tensors in
+those functions.
+
+This can be removed once https://github.com/pytorch/pytorch/issues/12013
+is implemented
+"""
+
+import warnings
+from typing import Callable, List, Optional
+
+import torch
+from torch import Tensor
+
+
+class Conv2d(torch.nn.Conv2d):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ "torchvision.ops.misc.Conv2d is deprecated and will be "
+ "removed in future versions, use torch.nn.Conv2d instead.",
+ FutureWarning,
+ )
+
+
+class ConvTranspose2d(torch.nn.ConvTranspose2d):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ "torchvision.ops.misc.ConvTranspose2d is deprecated and will be "
+ "removed in future versions, use torch.nn.ConvTranspose2d instead.",
+ FutureWarning,
+ )
+
+
+class BatchNorm2d(torch.nn.BatchNorm2d):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ "torchvision.ops.misc.BatchNorm2d is deprecated and will be "
+ "removed in future versions, use torch.nn.BatchNorm2d instead.",
+ FutureWarning,
+ )
+
+
+interpolate = torch.nn.functional.interpolate
+
+
+# This is not in nn
+class FrozenBatchNorm2d(torch.nn.Module):
+ """
+ BatchNorm2d where the batch statistics and the affine parameters
+ are fixed
+ """
+
+ def __init__(
+ self,
+ num_features: int,
+ eps: float = 1e-5,
+ n: Optional[int] = None,
+ ):
+ # n=None for backward-compatibility
+ if n is not None:
+ warnings.warn("`n` argument is deprecated and has been renamed `num_features`", DeprecationWarning)
+ num_features = n
+ super().__init__()
+ self.eps = eps
+ self.register_buffer("weight", torch.ones(num_features))
+ self.register_buffer("bias", torch.zeros(num_features))
+ self.register_buffer("running_mean", torch.zeros(num_features))
+ self.register_buffer("running_var", torch.ones(num_features))
+
+ def _load_from_state_dict(
+ self,
+ state_dict: dict,
+ prefix: str,
+ local_metadata: dict,
+ strict: bool,
+ missing_keys: List[str],
+ unexpected_keys: List[str],
+ error_msgs: List[str],
+ ):
+ num_batches_tracked_key = prefix + "num_batches_tracked"
+ if num_batches_tracked_key in state_dict:
+ del state_dict[num_batches_tracked_key]
+
+ super()._load_from_state_dict(
+ state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
+ )
+
+ def forward(self, x: Tensor) -> Tensor:
+ # move reshapes to the beginning
+ # to make it fuser-friendly
+ w = self.weight.reshape(1, -1, 1, 1)
+ b = self.bias.reshape(1, -1, 1, 1)
+ rv = self.running_var.reshape(1, -1, 1, 1)
+ rm = self.running_mean.reshape(1, -1, 1, 1)
+ scale = w * (rv + self.eps).rsqrt()
+ bias = b - rm * scale
+ return x * scale + bias
+
+ def __repr__(self) -> str:
+ return f"{self.__class__.__name__}({self.weight.shape[0]}, eps={self.eps})"
+
+
+class ConvNormActivation(torch.nn.Sequential):
+ def __init__(
+ self,
+ in_channels: int,
+ out_channels: int,
+ kernel_size: int = 3,
+ stride: int = 1,
+ padding: Optional[int] = None,
+ groups: int = 1,
+ norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d,
+ activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU,
+ dilation: int = 1,
+ inplace: bool = True,
+ ) -> None:
+ if padding is None:
+ padding = (kernel_size - 1) // 2 * dilation
+ layers = [
+ torch.nn.Conv2d(
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride,
+ padding,
+ dilation=dilation,
+ groups=groups,
+ bias=norm_layer is None,
+ )
+ ]
+ if norm_layer is not None:
+ layers.append(norm_layer(out_channels))
+ if activation_layer is not None:
+ layers.append(activation_layer(inplace=inplace))
+ super().__init__(*layers)
+ self.out_channels = out_channels
+
+
+class SqueezeExcitation(torch.nn.Module):
+ def __init__(
+ self,
+ input_channels: int,
+ squeeze_channels: int,
+ activation: Callable[..., torch.nn.Module] = torch.nn.ReLU,
+ scale_activation: Callable[..., torch.nn.Module] = torch.nn.Sigmoid,
+ ) -> None:
+ super().__init__()
+ self.avgpool = torch.nn.AdaptiveAvgPool2d(1)
+ self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1)
+ self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1)
+ self.activation = activation()
+ self.scale_activation = scale_activation()
+
+ def _scale(self, input: Tensor) -> Tensor:
+ scale = self.avgpool(input)
+ scale = self.fc1(scale)
+ scale = self.activation(scale)
+ scale = self.fc2(scale)
+ return self.scale_activation(scale)
+
+ def forward(self, input: Tensor) -> Tensor:
+ scale = self._scale(input)
+ return scale * input
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py
new file mode 100644
index 0000000000..65e150700a
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py
@@ -0,0 +1,153 @@
+from collections import OrderedDict
+from torch.jit.annotations import Optional, List
+from torch import Tensor
+
+"""
+helper class that supports empty tensors on some nn functions.
+
+Ideally, add support directly in PyTorch to empty tensors in
+those functions.
+
+This can be removed once https://github.com/pytorch/pytorch/issues/12013
+is implemented
+"""
+
+import math
+import torch
+from torchvision.ops import _new_empty_tensor
+from torch.nn import Module, Conv2d
+import torch.nn.functional as F
+
+
+class ConvTranspose2d(torch.nn.ConvTranspose2d):
+ """
+ Equivalent to nn.ConvTranspose2d, but with support for empty batch sizes.
+ This will eventually be supported natively by PyTorch, and this
+ class can go away.
+ """
+ def forward(self, x):
+ if x.numel() > 0:
+ return self.super_forward(x)
+ # get output shape
+
+ output_shape = [
+ (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op
+ for i, p, di, k, d, op in zip(
+ x.shape[-2:],
+ list(self.padding),
+ list(self.dilation),
+ list(self.kernel_size),
+ list(self.stride),
+ list(self.output_padding),
+ )
+ ]
+ output_shape = [x.shape[0], self.out_channels] + output_shape
+ return _new_empty_tensor(x, output_shape)
+
+ def super_forward(self, input, output_size=None):
+ # type: (Tensor, Optional[List[int]]) -> Tensor
+ if self.padding_mode != 'zeros':
+ raise ValueError('Only `zeros` padding mode is supported for ConvTranspose2d')
+
+ output_padding = self._output_padding(input, output_size, self.stride, self.padding, self.kernel_size)
+
+ return F.conv_transpose2d(
+ input, self.weight, self.bias, self.stride, self.padding,
+ output_padding, self.groups, self.dilation)
+
+
+class BatchNorm2d(torch.nn.BatchNorm2d):
+ """
+ Equivalent to nn.BatchNorm2d, but with support for empty batch sizes.
+ This will eventually be supported natively by PyTorch, and this
+ class can go away.
+ """
+ def forward(self, x):
+ if x.numel() > 0:
+ return super(BatchNorm2d, self).forward(x)
+ # get output shape
+ output_shape = x.shape
+ return _new_empty_tensor(x, output_shape)
+
+
+def _check_size_scale_factor(dim, size, scale_factor):
+ # type: (int, Optional[List[int]], Optional[float]) -> None
+ if size is None and scale_factor is None:
+ raise ValueError("either size or scale_factor should be defined")
+ if size is not None and scale_factor is not None:
+ raise ValueError("only one of size or scale_factor should be defined")
+ if scale_factor is not None:
+ if isinstance(scale_factor, (list, tuple)):
+ if len(scale_factor) != dim:
+ raise ValueError(
+ "scale_factor shape must match input shape. "
+ "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor))
+ )
+
+
+def _output_size(dim, input, size, scale_factor):
+ # type: (int, Tensor, Optional[List[int]], Optional[float]) -> List[int]
+ assert dim == 2
+ _check_size_scale_factor(dim, size, scale_factor)
+ if size is not None:
+ return size
+ # if dim is not 2 or scale_factor is iterable use _ntuple instead of concat
+ assert scale_factor is not None and isinstance(scale_factor, (int, float))
+ scale_factors = [scale_factor, scale_factor]
+ # math.floor might return float in py2.7
+ return [
+ int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim)
+ ]
+
+
+def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
+ # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
+ """
+ Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
+ This will eventually be supported natively by PyTorch, and this
+ class can go away.
+ """
+ if input.numel() > 0:
+ return torch.nn.functional.interpolate(
+ input, size, scale_factor, mode, align_corners
+ )
+
+ output_shape = _output_size(2, input, size, scale_factor)
+ output_shape = list(input.shape[:-2]) + output_shape
+ return _new_empty_tensor(input, output_shape)
+
+
+# This is not in nn
+class FrozenBatchNorm2d(torch.nn.Module):
+ """
+ BatchNorm2d where the batch statistics and the affine parameters
+ are fixed
+ """
+
+ def __init__(self, n):
+ super(FrozenBatchNorm2d, self).__init__()
+ self.register_buffer("weight", torch.ones(n))
+ self.register_buffer("bias", torch.zeros(n))
+ self.register_buffer("running_mean", torch.zeros(n))
+ self.register_buffer("running_var", torch.ones(n))
+
+ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+ missing_keys, unexpected_keys, error_msgs):
+ num_batches_tracked_key = prefix + 'num_batches_tracked'
+ if num_batches_tracked_key in state_dict:
+ del state_dict[num_batches_tracked_key]
+
+ super(FrozenBatchNorm2d, self)._load_from_state_dict(
+ state_dict, prefix, local_metadata, strict,
+ missing_keys, unexpected_keys, error_msgs)
+
+ def forward(self, x):
+ # move reshapes to the beginning
+ # to make it fuser-friendly
+ w = self.weight.reshape(1, -1, 1, 1)
+ b = self.bias.reshape(1, -1, 1, 1)
+ rv = self.running_var.reshape(1, -1, 1, 1)
+ rm = self.running_mean.reshape(1, -1, 1, 1)
+ scale = w * rv.rsqrt()
+ bias = b - rm * scale
+ return x * scale + bias
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py
new file mode 100644
index 0000000000..74455a98c4
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py
@@ -0,0 +1,16 @@
+import torch
+from torch.jit.annotations import List
+from torch import Tensor
+
+
+def _new_empty_tensor(x, shape):
+ # type: (Tensor, List[int]) -> Tensor
+ """
+ Arguments:
+ input (Tensor): input tensor
+ shape List[int]: the new empty tensor shape
+
+ Returns:
+ output (Tensor)
+ """
+ return torch.ops.torchvision._new_empty_tensor_op(x, shape)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py
new file mode 100644
index 0000000000..b94a9eb405
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py
@@ -0,0 +1,232 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+import torch
+import torch.nn.functional as F
+from torch import nn, Tensor
+
+from torchvision.ops import roi_align
+from torchvision.ops.boxes import box_area
+
+from torch.jit.annotations import Optional, List, Dict, Tuple
+import torchvision
+
+# copying result_idx_in_level to a specific index in result[]
+# is not supported by ONNX tracing yet.
+# _onnx_merge_levels() is an implementation supported by ONNX
+# that merges the levels to the right indices
+@torch.jit.unused
+def _onnx_merge_levels(levels, unmerged_results):
+ # type: (Tensor, List[Tensor]) -> Tensor
+ first_result = unmerged_results[0]
+ dtype, device = first_result.dtype, first_result.device
+ res = torch.zeros((levels.size(0), first_result.size(1),
+ first_result.size(2), first_result.size(3)),
+ dtype=dtype, device=device)
+ for l in range(len(unmerged_results)):
+ index = (levels == l).nonzero().view(-1, 1, 1, 1)
+ index = index.expand(index.size(0),
+ unmerged_results[l].size(1),
+ unmerged_results[l].size(2),
+ unmerged_results[l].size(3))
+ res = res.scatter(0, index, unmerged_results[l])
+ return res
+
+
+# TODO: (eellison) T54974082 https://github.com/pytorch/pytorch/issues/26744/pytorch/issues/26744
+def initLevelMapper(k_min, k_max, canonical_scale=224, canonical_level=4, eps=1e-6):
+ # type: (int, int, int, int, float)
+ return LevelMapper(k_min, k_max, canonical_scale, canonical_level, eps)
+
+
+@torch.jit.script
+class LevelMapper(object):
+ """Determine which FPN level each RoI in a set of RoIs should map to based
+ on the heuristic in the FPN paper.
+
+ Arguments:
+ k_min (int)
+ k_max (int)
+ canonical_scale (int)
+ canonical_level (int)
+ eps (float)
+ """
+
+ def __init__(self, k_min, k_max, canonical_scale=224, canonical_level=4, eps=1e-6):
+ # type: (int, int, int, int, float)
+ self.k_min = k_min
+ self.k_max = k_max
+ self.s0 = canonical_scale
+ self.lvl0 = canonical_level
+ self.eps = eps
+
+ def __call__(self, boxlists):
+ # type: (List[Tensor])
+ """
+ Arguments:
+ boxlists (list[BoxList])
+ """
+ # Compute level ids
+ s = torch.sqrt(torch.cat([box_area(boxlist) for boxlist in boxlists]))
+
+ # Eqn.(1) in FPN paper
+ target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0) + torch.tensor(self.eps, dtype=s.dtype))
+ target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max)
+ return (target_lvls.to(torch.int64) - self.k_min).to(torch.int64)
+
+
+class MultiScaleRoIAlign(nn.Module):
+ """
+ Multi-scale RoIAlign pooling, which is useful for detection with or without FPN.
+
+ It infers the scale of the pooling via the heuristics present in the FPN paper.
+
+ Arguments:
+ featmap_names (List[str]): the names of the feature maps that will be used
+ for the pooling.
+ output_size (List[Tuple[int, int]] or List[int]): output size for the pooled region
+ sampling_ratio (int): sampling ratio for ROIAlign
+
+ Examples::
+
+ >>> m = torchvision.ops.MultiScaleRoIAlign(['feat1', 'feat3'], 3, 2)
+ >>> i = OrderedDict()
+ >>> i['feat1'] = torch.rand(1, 5, 64, 64)
+ >>> i['feat2'] = torch.rand(1, 5, 32, 32) # this feature won't be used in the pooling
+ >>> i['feat3'] = torch.rand(1, 5, 16, 16)
+ >>> # create some random bounding boxes
+ >>> boxes = torch.rand(6, 4) * 256; boxes[:, 2:] += boxes[:, :2]
+ >>> # original image size, before computing the feature maps
+ >>> image_sizes = [(512, 512)]
+ >>> output = m(i, [boxes], image_sizes)
+ >>> print(output.shape)
+ >>> torch.Size([6, 5, 3, 3])
+
+ """
+
+ __annotations__ = {
+ 'scales': Optional[List[float]],
+ 'map_levels': Optional[LevelMapper]
+ }
+
+ def __init__(self, featmap_names, output_size, sampling_ratio):
+ super(MultiScaleRoIAlign, self).__init__()
+ if isinstance(output_size, int):
+ output_size = (output_size, output_size)
+ self.featmap_names = featmap_names
+ self.sampling_ratio = sampling_ratio
+ self.output_size = tuple(output_size)
+ self.scales = None
+ self.map_levels = None
+
+ def convert_to_roi_format(self, boxes):
+ # type: (List[Tensor])
+ concat_boxes = torch.cat(boxes, dim=0)
+ device, dtype = concat_boxes.device, concat_boxes.dtype
+ ids = torch.cat(
+ [
+ torch.full_like(b[:, :1], i, dtype=dtype, layout=torch.strided, device=device)
+ for i, b in enumerate(boxes)
+ ],
+ dim=0,
+ )
+ rois = torch.cat([ids, concat_boxes], dim=1)
+ return rois
+
+ def infer_scale(self, feature, original_size):
+ # type: (Tensor, List[int])
+ # assumption: the scale is of the form 2 ** (-k), with k integer
+ size = feature.shape[-2:]
+ possible_scales = torch.jit.annotate(List[float], [])
+ for s1, s2 in zip(size, original_size):
+ approx_scale = float(s1) / float(s2)
+ scale = 2 ** float(torch.tensor(approx_scale).log2().round())
+ possible_scales.append(scale)
+ assert possible_scales[0] == possible_scales[1]
+ return possible_scales[0]
+
+ def setup_scales(self, features, image_shapes):
+ # type: (List[Tensor], List[Tuple[int, int]])
+ assert len(image_shapes) != 0
+ max_x = 0
+ max_y = 0
+ for shape in image_shapes:
+ max_x = max(shape[0], max_x)
+ max_y = max(shape[1], max_y)
+ original_input_shape = (max_x, max_y)
+
+ scales = [self.infer_scale(feat, original_input_shape) for feat in features]
+ # get the levels in the feature map by leveraging the fact that the network always
+ # downsamples by a factor of 2 at each level.
+ lvl_min = -torch.log2(torch.tensor(scales[0], dtype=torch.float32)).item()
+ lvl_max = -torch.log2(torch.tensor(scales[-1], dtype=torch.float32)).item()
+ self.scales = scales
+ self.map_levels = initLevelMapper(int(lvl_min), int(lvl_max))
+
+ def forward(self, x, boxes, image_shapes):
+ # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]])
+ """
+ Arguments:
+ x (OrderedDict[Tensor]): feature maps for each level. They are assumed to have
+ all the same number of channels, but they can have different sizes.
+ boxes (List[Tensor[N, 4]]): boxes to be used to perform the pooling operation, in
+ (x1, y1, x2, y2) format and in the image reference size, not the feature map
+ reference.
+ image_shapes (List[Tuple[height, width]]): the sizes of each image before they
+ have been fed to a CNN to obtain feature maps. This allows us to infer the
+ scale factor for each one of the levels to be pooled.
+ Returns:
+ result (Tensor)
+ """
+ x_filtered = []
+ for k, v in x.items():
+ if k in self.featmap_names:
+ x_filtered.append(v)
+ num_levels = len(x_filtered)
+ rois = self.convert_to_roi_format(boxes)
+ if self.scales is None:
+ self.setup_scales(x_filtered, image_shapes)
+
+ scales = self.scales
+ assert scales is not None
+
+ if num_levels == 1:
+ return roi_align(
+ x_filtered[0], rois,
+ output_size=self.output_size,
+ spatial_scale=scales[0],
+ sampling_ratio=self.sampling_ratio
+ )
+
+ mapper = self.map_levels
+ assert mapper is not None
+
+ levels = mapper(boxes)
+
+ num_rois = len(rois)
+ num_channels = x_filtered[0].shape[1]
+
+ dtype, device = x_filtered[0].dtype, x_filtered[0].device
+ result = torch.zeros(
+ (num_rois, num_channels,) + self.output_size,
+ dtype=dtype,
+ device=device,
+ )
+
+ tracing_results = []
+ for level, (per_level_feature, scale) in enumerate(zip(x_filtered, scales)):
+ idx_in_level = torch.nonzero(levels == level).squeeze(1)
+ rois_per_level = rois[idx_in_level]
+
+ result_idx_in_level = roi_align(
+ per_level_feature, rois_per_level,
+ output_size=self.output_size,
+ spatial_scale=scale, sampling_ratio=self.sampling_ratio)
+
+ if torchvision._is_tracing():
+ tracing_results.append(result_idx_in_level.to(dtype))
+ else:
+ result[idx_in_level] = result_idx_in_level
+
+ if torchvision._is_tracing():
+ result = _onnx_merge_levels(levels, tracing_results)
+
+ return result
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py
new file mode 100644
index 0000000000..c0c761b72c
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py
@@ -0,0 +1,68 @@
+import torch
+from torch import nn, Tensor
+
+from torch.nn.modules.utils import _pair
+from torch.jit.annotations import List
+
+from ._utils import convert_boxes_to_roi_format, check_roi_boxes_shape
+
+
+def ps_roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1):
+ # type: (Tensor, Tensor, int, float, int) -> Tensor
+ """
+ Performs Position-Sensitive Region of Interest (RoI) Align operator
+ mentioned in Light-Head R-CNN.
+
+ Arguments:
+ input (Tensor[N, C, H, W]): input tensor
+ boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2)
+ format where the regions will be taken from. If a single Tensor is passed,
+ then the first column should contain the batch index. If a list of Tensors
+ is passed, then each Tensor will correspond to the boxes for an element i
+ in a batch
+ output_size (int or Tuple[int, int]): the size of the output after the cropping
+ is performed, as (height, width)
+ spatial_scale (float): a scaling factor that maps the input coordinates to
+ the box coordinates. Default: 1.0
+ sampling_ratio (int): number of sampling points in the interpolation grid
+ used to compute the output value of each pooled output bin. If > 0
+ then exactly sampling_ratio x sampling_ratio grid points are used.
+ If <= 0, then an adaptive number of grid points are used (computed as
+ ceil(roi_width / pooled_w), and likewise for height). Default: -1
+
+ Returns:
+ output (Tensor[K, C, output_size[0], output_size[1]])
+ """
+ check_roi_boxes_shape(boxes)
+ rois = boxes
+ output_size = _pair(output_size)
+ if not isinstance(rois, torch.Tensor):
+ rois = convert_boxes_to_roi_format(rois)
+ output, _ = torch.ops.torchvision.ps_roi_align(input, rois, spatial_scale,
+ output_size[0],
+ output_size[1],
+ sampling_ratio)
+ return output
+
+
+class PSRoIAlign(nn.Module):
+ """
+ See ps_roi_align
+ """
+ def __init__(self, output_size, spatial_scale, sampling_ratio):
+ super(PSRoIAlign, self).__init__()
+ self.output_size = output_size
+ self.spatial_scale = spatial_scale
+ self.sampling_ratio = sampling_ratio
+
+ def forward(self, input, rois):
+ return ps_roi_align(input, rois, self.output_size, self.spatial_scale,
+ self.sampling_ratio)
+
+ def __repr__(self):
+ tmpstr = self.__class__.__name__ + '('
+ tmpstr += 'output_size=' + str(self.output_size)
+ tmpstr += ', spatial_scale=' + str(self.spatial_scale)
+ tmpstr += ', sampling_ratio=' + str(self.sampling_ratio)
+ tmpstr += ')'
+ return tmpstr
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py
new file mode 100644
index 0000000000..710f2cb019
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py
@@ -0,0 +1,59 @@
+import torch
+from torch import nn, Tensor
+
+from torch.nn.modules.utils import _pair
+from torch.jit.annotations import List
+
+from ._utils import convert_boxes_to_roi_format, check_roi_boxes_shape
+
+
+def ps_roi_pool(input, boxes, output_size, spatial_scale=1.0):
+ # type: (Tensor, Tensor, int, float) -> Tensor
+ """
+ Performs Position-Sensitive Region of Interest (RoI) Pool operator
+ described in R-FCN
+
+ Arguments:
+ input (Tensor[N, C, H, W]): input tensor
+ boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2)
+ format where the regions will be taken from. If a single Tensor is passed,
+ then the first column should contain the batch index. If a list of Tensors
+ is passed, then each Tensor will correspond to the boxes for an element i
+ in a batch
+ output_size (int or Tuple[int, int]): the size of the output after the cropping
+ is performed, as (height, width)
+ spatial_scale (float): a scaling factor that maps the input coordinates to
+ the box coordinates. Default: 1.0
+
+ Returns:
+ output (Tensor[K, C, output_size[0], output_size[1]])
+ """
+ check_roi_boxes_shape(boxes)
+ rois = boxes
+ output_size = _pair(output_size)
+ if not isinstance(rois, torch.Tensor):
+ rois = convert_boxes_to_roi_format(rois)
+ output, _ = torch.ops.torchvision.ps_roi_pool(input, rois, spatial_scale,
+ output_size[0],
+ output_size[1])
+ return output
+
+
+class PSRoIPool(nn.Module):
+ """
+ See ps_roi_pool
+ """
+ def __init__(self, output_size, spatial_scale):
+ super(PSRoIPool, self).__init__()
+ self.output_size = output_size
+ self.spatial_scale = spatial_scale
+
+ def forward(self, input, rois):
+ return ps_roi_pool(input, rois, self.output_size, self.spatial_scale)
+
+ def __repr__(self):
+ tmpstr = self.__class__.__name__ + '('
+ tmpstr += 'output_size=' + str(self.output_size)
+ tmpstr += ', spatial_scale=' + str(self.spatial_scale)
+ tmpstr += ')'
+ return tmpstr
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py
new file mode 100644
index 0000000000..14224d8a83
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py
@@ -0,0 +1,69 @@
+import torch
+from torch import nn, Tensor
+
+from torch.nn.modules.utils import _pair
+from torch.jit.annotations import List, BroadcastingList2
+
+from ._utils import convert_boxes_to_roi_format, check_roi_boxes_shape
+
+
+def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1, aligned=False):
+ # type: (Tensor, Tensor, BroadcastingList2[int], float, int, bool) -> Tensor
+ """
+ Performs Region of Interest (RoI) Align operator described in Mask R-CNN
+
+ Arguments:
+ input (Tensor[N, C, H, W]): input tensor
+ boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2)
+ format where the regions will be taken from. If a single Tensor is passed,
+ then the first column should contain the batch index. If a list of Tensors
+ is passed, then each Tensor will correspond to the boxes for an element i
+ in a batch
+ output_size (int or Tuple[int, int]): the size of the output after the cropping
+ is performed, as (height, width)
+ spatial_scale (float): a scaling factor that maps the input coordinates to
+ the box coordinates. Default: 1.0
+ sampling_ratio (int): number of sampling points in the interpolation grid
+ used to compute the output value of each pooled output bin. If > 0,
+ then exactly sampling_ratio x sampling_ratio grid points are used. If
+ <= 0, then an adaptive number of grid points are used (computed as
+ ceil(roi_width / pooled_w), and likewise for height). Default: -1
+ aligned (bool): If False, use the legacy implementation.
+ If True, pixel shift it by -0.5 for align more perfectly about two neighboring pixel indices.
+ This version in Detectron2
+
+ Returns:
+ output (Tensor[K, C, output_size[0], output_size[1]])
+ """
+ check_roi_boxes_shape(boxes)
+ rois = boxes
+ output_size = _pair(output_size)
+ if not isinstance(rois, torch.Tensor):
+ rois = convert_boxes_to_roi_format(rois)
+ return torch.ops.torchvision.roi_align(input, rois, spatial_scale,
+ output_size[0], output_size[1],
+ sampling_ratio, aligned)
+
+
+class RoIAlign(nn.Module):
+ """
+ See roi_align
+ """
+ def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=False):
+ super(RoIAlign, self).__init__()
+ self.output_size = output_size
+ self.spatial_scale = spatial_scale
+ self.sampling_ratio = sampling_ratio
+ self.aligned = aligned
+
+ def forward(self, input, rois):
+ return roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned)
+
+ def __repr__(self):
+ tmpstr = self.__class__.__name__ + '('
+ tmpstr += 'output_size=' + str(self.output_size)
+ tmpstr += ', spatial_scale=' + str(self.spatial_scale)
+ tmpstr += ', sampling_ratio=' + str(self.sampling_ratio)
+ tmpstr += ', aligned=' + str(self.aligned)
+ tmpstr += ')'
+ return tmpstr
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py
new file mode 100644
index 0000000000..10232f16b4
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py
@@ -0,0 +1,57 @@
+import torch
+from torch import nn, Tensor
+
+from torch.nn.modules.utils import _pair
+from torch.jit.annotations import List, BroadcastingList2
+
+from ._utils import convert_boxes_to_roi_format, check_roi_boxes_shape
+
+
+def roi_pool(input, boxes, output_size, spatial_scale=1.0):
+ # type: (Tensor, Tensor, BroadcastingList2[int], float) -> Tensor
+ """
+ Performs Region of Interest (RoI) Pool operator described in Fast R-CNN
+
+ Arguments:
+ input (Tensor[N, C, H, W]): input tensor
+ boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2)
+ format where the regions will be taken from. If a single Tensor is passed,
+ then the first column should contain the batch index. If a list of Tensors
+ is passed, then each Tensor will correspond to the boxes for an element i
+ in a batch
+ output_size (int or Tuple[int, int]): the size of the output after the cropping
+ is performed, as (height, width)
+ spatial_scale (float): a scaling factor that maps the input coordinates to
+ the box coordinates. Default: 1.0
+
+ Returns:
+ output (Tensor[K, C, output_size[0], output_size[1]])
+ """
+ check_roi_boxes_shape(boxes)
+ rois = boxes
+ output_size = _pair(output_size)
+ if not isinstance(rois, torch.Tensor):
+ rois = convert_boxes_to_roi_format(rois)
+ output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale,
+ output_size[0], output_size[1])
+ return output
+
+
+class RoIPool(nn.Module):
+ """
+ See roi_pool
+ """
+ def __init__(self, output_size, spatial_scale):
+ super(RoIPool, self).__init__()
+ self.output_size = output_size
+ self.spatial_scale = spatial_scale
+
+ def forward(self, input, rois):
+ return roi_pool(input, rois, self.output_size, self.spatial_scale)
+
+ def __repr__(self):
+ tmpstr = self.__class__.__name__ + '('
+ tmpstr += 'output_size=' + str(self.output_size)
+ tmpstr += ', spatial_scale=' + str(self.spatial_scale)
+ tmpstr += ')'
+ return tmpstr
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/__init__.py
new file mode 100644
index 0000000000..7986cdd642
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/__init__.py
@@ -0,0 +1 @@
+from .transforms import *
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py
new file mode 100644
index 0000000000..06c3071690
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py
@@ -0,0 +1,101 @@
+import torch
+
+
+def _is_tensor_video_clip(clip):
+ if not torch.is_tensor(clip):
+ raise TypeError("clip should be Tesnor. Got %s" % type(clip))
+
+ if not clip.ndimension() == 4:
+ raise ValueError("clip should be 4D. Got %dD" % clip.dim())
+
+ return True
+
+
+def crop(clip, i, j, h, w):
+ """
+ Args:
+ clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+ """
+ assert len(clip.size()) == 4, "clip should be a 4D tensor"
+ return clip[..., i:i + h, j:j + w]
+
+
+def resize(clip, target_size, interpolation_mode):
+ assert len(target_size) == 2, "target size should be tuple (height, width)"
+ return torch.nn.functional.interpolate(
+ clip, size=target_size, mode=interpolation_mode
+ )
+
+
+def resized_crop(clip, i, j, h, w, size, interpolation_mode="bilinear"):
+ """
+ Do spatial cropping and resizing to the video clip
+ Args:
+ clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+ i (int): i in (i,j) i.e coordinates of the upper left corner.
+ j (int): j in (i,j) i.e coordinates of the upper left corner.
+ h (int): Height of the cropped region.
+ w (int): Width of the cropped region.
+ size (tuple(int, int)): height and width of resized clip
+ Returns:
+ clip (torch.tensor): Resized and cropped clip. Size is (C, T, H, W)
+ """
+ assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor"
+ clip = crop(clip, i, j, h, w)
+ clip = resize(clip, size, interpolation_mode)
+ return clip
+
+
+def center_crop(clip, crop_size):
+ assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor"
+ h, w = clip.size(-2), clip.size(-1)
+ th, tw = crop_size
+ assert h >= th and w >= tw, "height and width must be no smaller than crop_size"
+
+ i = int(round((h - th) / 2.0))
+ j = int(round((w - tw) / 2.0))
+ return crop(clip, i, j, th, tw)
+
+
+def to_tensor(clip):
+ """
+ Convert tensor data type from uint8 to float, divide value by 255.0 and
+ permute the dimenions of clip tensor
+ Args:
+ clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
+ Return:
+ clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W)
+ """
+ _is_tensor_video_clip(clip)
+ if not clip.dtype == torch.uint8:
+ raise TypeError("clip tensor should have data type uint8. Got %s" % str(clip.dtype))
+ return clip.float().permute(3, 0, 1, 2) / 255.0
+
+
+def normalize(clip, mean, std, inplace=False):
+ """
+ Args:
+ clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W)
+ mean (tuple): pixel RGB mean. Size is (3)
+ std (tuple): pixel standard deviation. Size is (3)
+ Returns:
+ normalized clip (torch.tensor): Size is (C, T, H, W)
+ """
+ assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor"
+ if not inplace:
+ clip = clip.clone()
+ mean = torch.as_tensor(mean, dtype=clip.dtype, device=clip.device)
+ std = torch.as_tensor(std, dtype=clip.dtype, device=clip.device)
+ clip.sub_(mean[:, None, None, None]).div_(std[:, None, None, None])
+ return clip
+
+
+def hflip(clip):
+ """
+ Args:
+ clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W)
+ Returns:
+ flipped clip (torch.tensor): Size is (C, T, H, W)
+ """
+ assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor"
+ return clip.flip((-1))
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py
new file mode 100644
index 0000000000..aa1a4b0531
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python3
+
+import numbers
+import random
+
+from torchvision.transforms import (
+ RandomCrop,
+ RandomResizedCrop,
+)
+
+from . import _functional_video as F
+
+
+__all__ = [
+ "RandomCropVideo",
+ "RandomResizedCropVideo",
+ "CenterCropVideo",
+ "NormalizeVideo",
+ "ToTensorVideo",
+ "RandomHorizontalFlipVideo",
+]
+
+
+class RandomCropVideo(RandomCrop):
+ def __init__(self, size):
+ if isinstance(size, numbers.Number):
+ self.size = (int(size), int(size))
+ else:
+ self.size = size
+
+ def __call__(self, clip):
+ """
+ Args:
+ clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+ Returns:
+ torch.tensor: randomly cropped/resized video clip.
+ size is (C, T, OH, OW)
+ """
+ i, j, h, w = self.get_params(clip, self.size)
+ return F.crop(clip, i, j, h, w)
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(size={0})'.format(self.size)
+
+
+class RandomResizedCropVideo(RandomResizedCrop):
+ def __init__(
+ self,
+ size,
+ scale=(0.08, 1.0),
+ ratio=(3.0 / 4.0, 4.0 / 3.0),
+ interpolation_mode="bilinear",
+ ):
+ if isinstance(size, tuple):
+ assert len(size) == 2, "size should be tuple (height, width)"
+ self.size = size
+ else:
+ self.size = (size, size)
+
+ self.interpolation_mode = interpolation_mode
+ self.scale = scale
+ self.ratio = ratio
+
+ def __call__(self, clip):
+ """
+ Args:
+ clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+ Returns:
+ torch.tensor: randomly cropped/resized video clip.
+ size is (C, T, H, W)
+ """
+ i, j, h, w = self.get_params(clip, self.scale, self.ratio)
+ return F.resized_crop(clip, i, j, h, w, self.size, self.interpolation_mode)
+
+ def __repr__(self):
+ return self.__class__.__name__ + \
+ '(size={0}, interpolation_mode={1}, scale={2}, ratio={3})'.format(
+ self.size, self.interpolation_mode, self.scale, self.ratio
+ )
+
+
+class CenterCropVideo(object):
+ def __init__(self, crop_size):
+ if isinstance(crop_size, numbers.Number):
+ self.crop_size = (int(crop_size), int(crop_size))
+ else:
+ self.crop_size = crop_size
+
+ def __call__(self, clip):
+ """
+ Args:
+ clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
+ Returns:
+ torch.tensor: central cropping of video clip. Size is
+ (C, T, crop_size, crop_size)
+ """
+ return F.center_crop(clip, self.crop_size)
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(crop_size={0})'.format(self.crop_size)
+
+
+class NormalizeVideo(object):
+ """
+ Normalize the video clip by mean subtraction and division by standard deviation
+ Args:
+ mean (3-tuple): pixel RGB mean
+ std (3-tuple): pixel RGB standard deviation
+ inplace (boolean): whether do in-place normalization
+ """
+
+ def __init__(self, mean, std, inplace=False):
+ self.mean = mean
+ self.std = std
+ self.inplace = inplace
+
+ def __call__(self, clip):
+ """
+ Args:
+ clip (torch.tensor): video clip to be normalized. Size is (C, T, H, W)
+ """
+ return F.normalize(clip, self.mean, self.std, self.inplace)
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(mean={0}, std={1}, inplace={2})'.format(
+ self.mean, self.std, self.inplace)
+
+
+class ToTensorVideo(object):
+ """
+ Convert tensor data type from uint8 to float, divide value by 255.0 and
+ permute the dimenions of clip tensor
+ """
+
+ def __init__(self):
+ pass
+
+ def __call__(self, clip):
+ """
+ Args:
+ clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
+ Return:
+ clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W)
+ """
+ return F.to_tensor(clip)
+
+ def __repr__(self):
+ return self.__class__.__name__
+
+
+class RandomHorizontalFlipVideo(object):
+ """
+ Flip the video clip along the horizonal direction with a given probability
+ Args:
+ p (float): probability of the clip being flipped. Default value is 0.5
+ """
+
+ def __init__(self, p=0.5):
+ self.p = p
+
+ def __call__(self, clip):
+ """
+ Args:
+ clip (torch.tensor): Size is (C, T, H, W)
+ Return:
+ clip (torch.tensor): Size is (C, T, H, W)
+ """
+ if random.random() < self.p:
+ clip = F.hflip(clip)
+ return clip
+
+ def __repr__(self):
+ return self.__class__.__name__ + "(p={0})".format(self.p)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py
new file mode 100644
index 0000000000..bd5b170626
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py
@@ -0,0 +1,1392 @@
+import math
+import numbers
+import warnings
+from enum import Enum
+from typing import List, Tuple, Any, Optional
+
+import numpy as np
+import torch
+from PIL import Image
+from torch import Tensor
+
+try:
+ import accimage
+except ImportError:
+ accimage = None
+
+from . import functional_pil as F_pil
+from . import functional_tensor as F_t
+
+
+class InterpolationMode(Enum):
+ """Interpolation modes
+ Available interpolation methods are ``nearest``, ``bilinear``, ``bicubic``, ``box``, ``hamming``, and ``lanczos``.
+ """
+
+ NEAREST = "nearest"
+ BILINEAR = "bilinear"
+ BICUBIC = "bicubic"
+ # For PIL compatibility
+ BOX = "box"
+ HAMMING = "hamming"
+ LANCZOS = "lanczos"
+
+
+# TODO: Once torchscript supports Enums with staticmethod
+# this can be put into InterpolationMode as staticmethod
+def _interpolation_modes_from_int(i: int) -> InterpolationMode:
+ inverse_modes_mapping = {
+ 0: InterpolationMode.NEAREST,
+ 2: InterpolationMode.BILINEAR,
+ 3: InterpolationMode.BICUBIC,
+ 4: InterpolationMode.BOX,
+ 5: InterpolationMode.HAMMING,
+ 1: InterpolationMode.LANCZOS,
+ }
+ return inverse_modes_mapping[i]
+
+
+pil_modes_mapping = {
+ InterpolationMode.NEAREST: 0,
+ InterpolationMode.BILINEAR: 2,
+ InterpolationMode.BICUBIC: 3,
+ InterpolationMode.BOX: 4,
+ InterpolationMode.HAMMING: 5,
+ InterpolationMode.LANCZOS: 1,
+}
+
+_is_pil_image = F_pil._is_pil_image
+
+
+def get_image_size(img: Tensor) -> List[int]:
+ """Returns the size of an image as [width, height].
+
+ Args:
+ img (PIL Image or Tensor): The image to be checked.
+
+ Returns:
+ List[int]: The image size.
+ """
+ if isinstance(img, torch.Tensor):
+ return F_t.get_image_size(img)
+
+ return F_pil.get_image_size(img)
+
+
+def get_image_num_channels(img: Tensor) -> int:
+ """Returns the number of channels of an image.
+
+ Args:
+ img (PIL Image or Tensor): The image to be checked.
+
+ Returns:
+ int: The number of channels.
+ """
+ if isinstance(img, torch.Tensor):
+ return F_t.get_image_num_channels(img)
+
+ return F_pil.get_image_num_channels(img)
+
+
+@torch.jit.unused
+def _is_numpy(img: Any) -> bool:
+ return isinstance(img, np.ndarray)
+
+
+@torch.jit.unused
+def _is_numpy_image(img: Any) -> bool:
+ return img.ndim in {2, 3}
+
+
+def to_tensor(pic):
+ """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+ This function does not support torchscript.
+
+ See :class:`~torchvision.transforms.ToTensor` for more details.
+
+ Args:
+ pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+
+ Returns:
+ Tensor: Converted image.
+ """
+ if not (F_pil._is_pil_image(pic) or _is_numpy(pic)):
+ raise TypeError(f"pic should be PIL Image or ndarray. Got {type(pic)}")
+
+ if _is_numpy(pic) and not _is_numpy_image(pic):
+ raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions.")
+
+ default_float_dtype = torch.get_default_dtype()
+
+ if isinstance(pic, np.ndarray):
+ # handle numpy array
+ if pic.ndim == 2:
+ pic = pic[:, :, None]
+
+ img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()
+ # backward compatibility
+ if isinstance(img, torch.ByteTensor):
+ return img.to(dtype=default_float_dtype).div(255)
+ else:
+ return img
+
+ if accimage is not None and isinstance(pic, accimage.Image):
+ nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
+ pic.copyto(nppic)
+ return torch.from_numpy(nppic).to(dtype=default_float_dtype)
+
+ # handle PIL Image
+ mode_to_nptype = {"I": np.int32, "I;16": np.int16, "F": np.float32}
+ img = torch.from_numpy(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True))
+
+ if pic.mode == "1":
+ img = 255 * img
+ img = img.view(pic.size[1], pic.size[0], len(pic.getbands()))
+ # put it from HWC to CHW format
+ img = img.permute((2, 0, 1)).contiguous()
+ if isinstance(img, torch.ByteTensor):
+ return img.to(dtype=default_float_dtype).div(255)
+ else:
+ return img
+
+
+def pil_to_tensor(pic):
+ """Convert a ``PIL Image`` to a tensor of the same type.
+ This function does not support torchscript.
+
+ See :class:`~torchvision.transforms.PILToTensor` for more details.
+
+ .. note::
+
+ A deep copy of the underlying array is performed.
+
+ Args:
+ pic (PIL Image): Image to be converted to tensor.
+
+ Returns:
+ Tensor: Converted image.
+ """
+ if not F_pil._is_pil_image(pic):
+ raise TypeError(f"pic should be PIL Image. Got {type(pic)}")
+
+ if accimage is not None and isinstance(pic, accimage.Image):
+ # accimage format is always uint8 internally, so always return uint8 here
+ nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.uint8)
+ pic.copyto(nppic)
+ return torch.as_tensor(nppic)
+
+ # handle PIL Image
+ img = torch.as_tensor(np.array(pic, copy=True))
+ img = img.view(pic.size[1], pic.size[0], len(pic.getbands()))
+ # put it from HWC to CHW format
+ img = img.permute((2, 0, 1))
+ return img
+
+
+def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor:
+ """Convert a tensor image to the given ``dtype`` and scale the values accordingly
+ This function does not support PIL Image.
+
+ Args:
+ image (torch.Tensor): Image to be converted
+ dtype (torch.dtype): Desired data type of the output
+
+ Returns:
+ Tensor: Converted image
+
+ .. note::
+
+ When converting from a smaller to a larger integer ``dtype`` the maximum values are **not** mapped exactly.
+ If converted back and forth, this mismatch has no effect.
+
+ Raises:
+ RuntimeError: When trying to cast :class:`torch.float32` to :class:`torch.int32` or :class:`torch.int64` as
+ well as for trying to cast :class:`torch.float64` to :class:`torch.int64`. These conversions might lead to
+ overflow errors since the floating point ``dtype`` cannot store consecutive integers over the whole range
+ of the integer ``dtype``.
+ """
+ if not isinstance(image, torch.Tensor):
+ raise TypeError("Input img should be Tensor Image")
+
+ return F_t.convert_image_dtype(image, dtype)
+
+
+def to_pil_image(pic, mode=None):
+ """Convert a tensor or an ndarray to PIL Image. This function does not support torchscript.
+
+ See :class:`~torchvision.transforms.ToPILImage` for more details.
+
+ Args:
+ pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
+ mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
+
+ .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes
+
+ Returns:
+ PIL Image: Image converted to PIL Image.
+ """
+ if not (isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)):
+ raise TypeError(f"pic should be Tensor or ndarray. Got {type(pic)}.")
+
+ elif isinstance(pic, torch.Tensor):
+ if pic.ndimension() not in {2, 3}:
+ raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndimension()} dimensions.")
+
+ elif pic.ndimension() == 2:
+ # if 2D image, add channel dimension (CHW)
+ pic = pic.unsqueeze(0)
+
+ # check number of channels
+ if pic.shape[-3] > 4:
+ raise ValueError(f"pic should not have > 4 channels. Got {pic.shape[-3]} channels.")
+
+ elif isinstance(pic, np.ndarray):
+ if pic.ndim not in {2, 3}:
+ raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions.")
+
+ elif pic.ndim == 2:
+ # if 2D image, add channel dimension (HWC)
+ pic = np.expand_dims(pic, 2)
+
+ # check number of channels
+ if pic.shape[-1] > 4:
+ raise ValueError(f"pic should not have > 4 channels. Got {pic.shape[-1]} channels.")
+
+ npimg = pic
+ if isinstance(pic, torch.Tensor):
+ if pic.is_floating_point() and mode != "F":
+ pic = pic.mul(255).byte()
+ npimg = np.transpose(pic.cpu().numpy(), (1, 2, 0))
+
+ if not isinstance(npimg, np.ndarray):
+ raise TypeError("Input pic must be a torch.Tensor or NumPy ndarray, not {type(npimg)}")
+
+ if npimg.shape[2] == 1:
+ expected_mode = None
+ npimg = npimg[:, :, 0]
+ if npimg.dtype == np.uint8:
+ expected_mode = "L"
+ elif npimg.dtype == np.int16:
+ expected_mode = "I;16"
+ elif npimg.dtype == np.int32:
+ expected_mode = "I"
+ elif npimg.dtype == np.float32:
+ expected_mode = "F"
+ if mode is not None and mode != expected_mode:
+ raise ValueError(f"Incorrect mode ({mode}) supplied for input type {np.dtype}. Should be {expected_mode}")
+ mode = expected_mode
+
+ elif npimg.shape[2] == 2:
+ permitted_2_channel_modes = ["LA"]
+ if mode is not None and mode not in permitted_2_channel_modes:
+ raise ValueError(f"Only modes {permitted_2_channel_modes} are supported for 2D inputs")
+
+ if mode is None and npimg.dtype == np.uint8:
+ mode = "LA"
+
+ elif npimg.shape[2] == 4:
+ permitted_4_channel_modes = ["RGBA", "CMYK", "RGBX"]
+ if mode is not None and mode not in permitted_4_channel_modes:
+ raise ValueError(f"Only modes {permitted_4_channel_modes} are supported for 4D inputs")
+
+ if mode is None and npimg.dtype == np.uint8:
+ mode = "RGBA"
+ else:
+ permitted_3_channel_modes = ["RGB", "YCbCr", "HSV"]
+ if mode is not None and mode not in permitted_3_channel_modes:
+ raise ValueError(f"Only modes {permitted_3_channel_modes} are supported for 3D inputs")
+ if mode is None and npimg.dtype == np.uint8:
+ mode = "RGB"
+
+ if mode is None:
+ raise TypeError(f"Input type {npimg.dtype} is not supported")
+
+ return Image.fromarray(npimg, mode=mode)
+
+
+def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool = False) -> Tensor:
+ """Normalize a float tensor image with mean and standard deviation.
+ This transform does not support PIL Image.
+
+ .. note::
+ This transform acts out of place by default, i.e., it does not mutates the input tensor.
+
+ See :class:`~torchvision.transforms.Normalize` for more details.
+
+ Args:
+ tensor (Tensor): Float tensor image of size (C, H, W) or (B, C, H, W) to be normalized.
+ mean (sequence): Sequence of means for each channel.
+ std (sequence): Sequence of standard deviations for each channel.
+ inplace(bool,optional): Bool to make this operation inplace.
+
+ Returns:
+ Tensor: Normalized Tensor image.
+ """
+ if not isinstance(tensor, torch.Tensor):
+ raise TypeError(f"Input tensor should be a torch tensor. Got {type(tensor)}.")
+
+ if not tensor.is_floating_point():
+ raise TypeError(f"Input tensor should be a float tensor. Got {tensor.dtype}.")
+
+ if tensor.ndim < 3:
+ raise ValueError(
+ f"Expected tensor to be a tensor image of size (..., C, H, W). Got tensor.size() = {tensor.size()}"
+ )
+
+ if not inplace:
+ tensor = tensor.clone()
+
+ dtype = tensor.dtype
+ mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device)
+ std = torch.as_tensor(std, dtype=dtype, device=tensor.device)
+ if (std == 0).any():
+ raise ValueError(f"std evaluated to zero after conversion to {dtype}, leading to division by zero.")
+ if mean.ndim == 1:
+ mean = mean.view(-1, 1, 1)
+ if std.ndim == 1:
+ std = std.view(-1, 1, 1)
+ tensor.sub_(mean).div_(std)
+ return tensor
+
+
+def resize(
+ img: Tensor,
+ size: List[int],
+ interpolation: InterpolationMode = InterpolationMode.BILINEAR,
+ max_size: Optional[int] = None,
+ antialias: Optional[bool] = None,
+) -> Tensor:
+ r"""Resize the input image to the given size.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+ .. warning::
+ The output image might be different depending on its type: when downsampling, the interpolation of PIL images
+ and tensors is slightly different, because PIL applies antialiasing. This may lead to significant differences
+ in the performance of a network. Therefore, it is preferable to train and serve a model with the same input
+ types. See also below the ``antialias`` parameter, which can help making the output of PIL images and tensors
+ closer.
+
+ Args:
+ img (PIL Image or Tensor): Image to be resized.
+ size (sequence or int): Desired output size. If size is a sequence like
+ (h, w), the output size will be matched to this. If size is an int,
+ the smaller edge of the image will be matched to this number maintaining
+ the aspect ratio. i.e, if height > width, then image will be rescaled to
+ :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`.
+
+ .. note::
+ In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
+ interpolation (InterpolationMode): Desired interpolation enum defined by
+ :class:`torchvision.transforms.InterpolationMode`.
+ Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``,
+ ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
+ For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+ max_size (int, optional): The maximum allowed for the longer edge of
+ the resized image: if the longer edge of the image is greater
+ than ``max_size`` after being resized according to ``size``, then
+ the image is resized again so that the longer edge is equal to
+ ``max_size``. As a result, ``size`` might be overruled, i.e the
+ smaller edge may be shorter than ``size``. This is only supported
+ if ``size`` is an int (or a sequence of length 1 in torchscript
+ mode).
+ antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias
+ is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for
+ ``InterpolationMode.BILINEAR`` only mode. This can help making the output for PIL images and tensors
+ closer.
+
+ .. warning::
+ There is no autodiff support for ``antialias=True`` option with input ``img`` as Tensor.
+
+ Returns:
+ PIL Image or Tensor: Resized image.
+ """
+ # Backward compatibility with integer value
+ if isinstance(interpolation, int):
+ warnings.warn(
+ "Argument interpolation should be of type InterpolationMode instead of int. "
+ "Please, use InterpolationMode enum."
+ )
+ interpolation = _interpolation_modes_from_int(interpolation)
+
+ if not isinstance(interpolation, InterpolationMode):
+ raise TypeError("Argument interpolation should be a InterpolationMode")
+
+ if not isinstance(img, torch.Tensor):
+ if antialias is not None and not antialias:
+ warnings.warn("Anti-alias option is always applied for PIL Image input. Argument antialias is ignored.")
+ pil_interpolation = pil_modes_mapping[interpolation]
+ return F_pil.resize(img, size=size, interpolation=pil_interpolation, max_size=max_size)
+
+ return F_t.resize(img, size=size, interpolation=interpolation.value, max_size=max_size, antialias=antialias)
+
+
+def scale(*args, **kwargs):
+ warnings.warn("The use of the transforms.Scale transform is deprecated, please use transforms.Resize instead.")
+ return resize(*args, **kwargs)
+
+
+def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor:
+ r"""Pad the given image on all sides with the given "pad" value.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric,
+ at most 3 leading dimensions for mode edge,
+ and an arbitrary number of leading dimensions for mode constant
+
+ Args:
+ img (PIL Image or Tensor): Image to be padded.
+ padding (int or sequence): Padding on each border. If a single int is provided this
+ is used to pad all borders. If sequence of length 2 is provided this is the padding
+ on left/right and top/bottom respectively. If a sequence of length 4 is provided
+ this is the padding for the left, top, right and bottom borders respectively.
+
+ .. note::
+ In torchscript mode padding as single int is not supported, use a sequence of
+ length 1: ``[padding, ]``.
+ fill (number or str or tuple): Pixel fill value for constant fill. Default is 0.
+ If a tuple of length 3, it is used to fill R, G, B channels respectively.
+ This value is only used when the padding_mode is constant.
+ Only number is supported for torch Tensor.
+ Only int or str or tuple value is supported for PIL Image.
+ padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
+ Default is constant.
+
+ - constant: pads with a constant value, this value is specified with fill
+
+ - edge: pads with the last value at the edge of the image.
+ If input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2
+
+ - reflect: pads with reflection of image without repeating the last value on the edge.
+ For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+ will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+ - symmetric: pads with reflection of image repeating the last value on the edge.
+ For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+ will result in [2, 1, 1, 2, 3, 4, 4, 3]
+
+ Returns:
+ PIL Image or Tensor: Padded image.
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.pad(img, padding=padding, fill=fill, padding_mode=padding_mode)
+
+ return F_t.pad(img, padding=padding, fill=fill, padding_mode=padding_mode)
+
+
+def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
+ """Crop the given image at specified location and output size.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+ If image size is smaller than output size along any edge, image is padded with 0 and then cropped.
+
+ Args:
+ img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
+ top (int): Vertical component of the top left corner of the crop box.
+ left (int): Horizontal component of the top left corner of the crop box.
+ height (int): Height of the crop box.
+ width (int): Width of the crop box.
+
+ Returns:
+ PIL Image or Tensor: Cropped image.
+ """
+
+ if not isinstance(img, torch.Tensor):
+ return F_pil.crop(img, top, left, height, width)
+
+ return F_t.crop(img, top, left, height, width)
+
+
+def center_crop(img: Tensor, output_size: List[int]) -> Tensor:
+ """Crops the given image at the center.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+ If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
+
+ Args:
+ img (PIL Image or Tensor): Image to be cropped.
+ output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int,
+ it is used for both directions.
+
+ Returns:
+ PIL Image or Tensor: Cropped image.
+ """
+ if isinstance(output_size, numbers.Number):
+ output_size = (int(output_size), int(output_size))
+ elif isinstance(output_size, (tuple, list)) and len(output_size) == 1:
+ output_size = (output_size[0], output_size[0])
+
+ image_width, image_height = get_image_size(img)
+ crop_height, crop_width = output_size
+
+ if crop_width > image_width or crop_height > image_height:
+ padding_ltrb = [
+ (crop_width - image_width) // 2 if crop_width > image_width else 0,
+ (crop_height - image_height) // 2 if crop_height > image_height else 0,
+ (crop_width - image_width + 1) // 2 if crop_width > image_width else 0,
+ (crop_height - image_height + 1) // 2 if crop_height > image_height else 0,
+ ]
+ img = pad(img, padding_ltrb, fill=0) # PIL uses fill value 0
+ image_width, image_height = get_image_size(img)
+ if crop_width == image_width and crop_height == image_height:
+ return img
+
+ crop_top = int(round((image_height - crop_height) / 2.0))
+ crop_left = int(round((image_width - crop_width) / 2.0))
+ return crop(img, crop_top, crop_left, crop_height, crop_width)
+
+
+def resized_crop(
+ img: Tensor,
+ top: int,
+ left: int,
+ height: int,
+ width: int,
+ size: List[int],
+ interpolation: InterpolationMode = InterpolationMode.BILINEAR,
+) -> Tensor:
+ """Crop the given image and resize it to desired size.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+ Notably used in :class:`~torchvision.transforms.RandomResizedCrop`.
+
+ Args:
+ img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
+ top (int): Vertical component of the top left corner of the crop box.
+ left (int): Horizontal component of the top left corner of the crop box.
+ height (int): Height of the crop box.
+ width (int): Width of the crop box.
+ size (sequence or int): Desired output size. Same semantics as ``resize``.
+ interpolation (InterpolationMode): Desired interpolation enum defined by
+ :class:`torchvision.transforms.InterpolationMode`.
+ Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``,
+ ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
+ For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+
+ Returns:
+ PIL Image or Tensor: Cropped image.
+ """
+ img = crop(img, top, left, height, width)
+ img = resize(img, size, interpolation)
+ return img
+
+
+def hflip(img: Tensor) -> Tensor:
+ """Horizontally flip the given image.
+
+ Args:
+ img (PIL Image or Tensor): Image to be flipped. If img
+ is a Tensor, it is expected to be in [..., H, W] format,
+ where ... means it can have an arbitrary number of leading
+ dimensions.
+
+ Returns:
+ PIL Image or Tensor: Horizontally flipped image.
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.hflip(img)
+
+ return F_t.hflip(img)
+
+
+def _get_perspective_coeffs(startpoints: List[List[int]], endpoints: List[List[int]]) -> List[float]:
+ """Helper function to get the coefficients (a, b, c, d, e, f, g, h) for the perspective transforms.
+
+ In Perspective Transform each pixel (x, y) in the original image gets transformed as,
+ (x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) )
+
+ Args:
+ startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
+ ``[top-left, top-right, bottom-right, bottom-left]`` of the original image.
+ endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
+ ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image.
+
+ Returns:
+ octuple (a, b, c, d, e, f, g, h) for transforming each pixel.
+ """
+ a_matrix = torch.zeros(2 * len(startpoints), 8, dtype=torch.float)
+
+ for i, (p1, p2) in enumerate(zip(endpoints, startpoints)):
+ a_matrix[2 * i, :] = torch.tensor([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]])
+ a_matrix[2 * i + 1, :] = torch.tensor([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]])
+
+ b_matrix = torch.tensor(startpoints, dtype=torch.float).view(8)
+ res = torch.linalg.lstsq(a_matrix, b_matrix, driver="gels").solution
+
+ output: List[float] = res.tolist()
+ return output
+
+
+def perspective(
+ img: Tensor,
+ startpoints: List[List[int]],
+ endpoints: List[List[int]],
+ interpolation: InterpolationMode = InterpolationMode.BILINEAR,
+ fill: Optional[List[float]] = None,
+) -> Tensor:
+ """Perform perspective transform of the given image.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+
+ Args:
+ img (PIL Image or Tensor): Image to be transformed.
+ startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
+ ``[top-left, top-right, bottom-right, bottom-left]`` of the original image.
+ endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
+ ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image.
+ interpolation (InterpolationMode): Desired interpolation enum defined by
+ :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
+ If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+ For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+ fill (sequence or number, optional): Pixel fill value for the area outside the transformed
+ image. If given a number, the value is used for all bands respectively.
+
+ .. note::
+ In torchscript mode single int/float value is not supported, please use a sequence
+ of length 1: ``[value, ]``.
+
+ Returns:
+ PIL Image or Tensor: transformed Image.
+ """
+
+ coeffs = _get_perspective_coeffs(startpoints, endpoints)
+
+ # Backward compatibility with integer value
+ if isinstance(interpolation, int):
+ warnings.warn(
+ "Argument interpolation should be of type InterpolationMode instead of int. "
+ "Please, use InterpolationMode enum."
+ )
+ interpolation = _interpolation_modes_from_int(interpolation)
+
+ if not isinstance(interpolation, InterpolationMode):
+ raise TypeError("Argument interpolation should be a InterpolationMode")
+
+ if not isinstance(img, torch.Tensor):
+ pil_interpolation = pil_modes_mapping[interpolation]
+ return F_pil.perspective(img, coeffs, interpolation=pil_interpolation, fill=fill)
+
+ return F_t.perspective(img, coeffs, interpolation=interpolation.value, fill=fill)
+
+
+def vflip(img: Tensor) -> Tensor:
+ """Vertically flip the given image.
+
+ Args:
+ img (PIL Image or Tensor): Image to be flipped. If img
+ is a Tensor, it is expected to be in [..., H, W] format,
+ where ... means it can have an arbitrary number of leading
+ dimensions.
+
+ Returns:
+ PIL Image or Tensor: Vertically flipped image.
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.vflip(img)
+
+ return F_t.vflip(img)
+
+
+def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
+ """Crop the given image into four corners and the central crop.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+ .. Note::
+ This transform returns a tuple of images and there may be a
+ mismatch in the number of inputs and targets your ``Dataset`` returns.
+
+ Args:
+ img (PIL Image or Tensor): Image to be cropped.
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+
+ Returns:
+ tuple: tuple (tl, tr, bl, br, center)
+ Corresponding top left, top right, bottom left, bottom right and center crop.
+ """
+ if isinstance(size, numbers.Number):
+ size = (int(size), int(size))
+ elif isinstance(size, (tuple, list)) and len(size) == 1:
+ size = (size[0], size[0])
+
+ if len(size) != 2:
+ raise ValueError("Please provide only two dimensions (h, w) for size.")
+
+ image_width, image_height = get_image_size(img)
+ crop_height, crop_width = size
+ if crop_width > image_width or crop_height > image_height:
+ msg = "Requested crop size {} is bigger than input size {}"
+ raise ValueError(msg.format(size, (image_height, image_width)))
+
+ tl = crop(img, 0, 0, crop_height, crop_width)
+ tr = crop(img, 0, image_width - crop_width, crop_height, crop_width)
+ bl = crop(img, image_height - crop_height, 0, crop_height, crop_width)
+ br = crop(img, image_height - crop_height, image_width - crop_width, crop_height, crop_width)
+
+ center = center_crop(img, [crop_height, crop_width])
+
+ return tl, tr, bl, br, center
+
+
+def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[Tensor]:
+ """Generate ten cropped images from the given image.
+ Crop the given image into four corners and the central crop plus the
+ flipped version of these (horizontal flipping is used by default).
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+ .. Note::
+ This transform returns a tuple of images and there may be a
+ mismatch in the number of inputs and targets your ``Dataset`` returns.
+
+ Args:
+ img (PIL Image or Tensor): Image to be cropped.
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+ vertical_flip (bool): Use vertical flipping instead of horizontal
+
+ Returns:
+ tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip)
+ Corresponding top left, top right, bottom left, bottom right and
+ center crop and same for the flipped image.
+ """
+ if isinstance(size, numbers.Number):
+ size = (int(size), int(size))
+ elif isinstance(size, (tuple, list)) and len(size) == 1:
+ size = (size[0], size[0])
+
+ if len(size) != 2:
+ raise ValueError("Please provide only two dimensions (h, w) for size.")
+
+ first_five = five_crop(img, size)
+
+ if vertical_flip:
+ img = vflip(img)
+ else:
+ img = hflip(img)
+
+ second_five = five_crop(img, size)
+ return first_five + second_five
+
+
+def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor:
+ """Adjust brightness of an image.
+
+ Args:
+ img (PIL Image or Tensor): Image to be adjusted.
+ If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+ where ... means it can have an arbitrary number of leading dimensions.
+ brightness_factor (float): How much to adjust the brightness. Can be
+ any non negative number. 0 gives a black image, 1 gives the
+ original image while 2 increases the brightness by a factor of 2.
+
+ Returns:
+ PIL Image or Tensor: Brightness adjusted image.
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.adjust_brightness(img, brightness_factor)
+
+ return F_t.adjust_brightness(img, brightness_factor)
+
+
+def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor:
+ """Adjust contrast of an image.
+
+ Args:
+ img (PIL Image or Tensor): Image to be adjusted.
+ If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+ where ... means it can have an arbitrary number of leading dimensions.
+ contrast_factor (float): How much to adjust the contrast. Can be any
+ non negative number. 0 gives a solid gray image, 1 gives the
+ original image while 2 increases the contrast by a factor of 2.
+
+ Returns:
+ PIL Image or Tensor: Contrast adjusted image.
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.adjust_contrast(img, contrast_factor)
+
+ return F_t.adjust_contrast(img, contrast_factor)
+
+
+def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor:
+ """Adjust color saturation of an image.
+
+ Args:
+ img (PIL Image or Tensor): Image to be adjusted.
+ If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+ where ... means it can have an arbitrary number of leading dimensions.
+ saturation_factor (float): How much to adjust the saturation. 0 will
+ give a black and white image, 1 will give the original image while
+ 2 will enhance the saturation by a factor of 2.
+
+ Returns:
+ PIL Image or Tensor: Saturation adjusted image.
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.adjust_saturation(img, saturation_factor)
+
+ return F_t.adjust_saturation(img, saturation_factor)
+
+
+def adjust_hue(img: Tensor, hue_factor: float) -> Tensor:
+ """Adjust hue of an image.
+
+ The image hue is adjusted by converting the image to HSV and
+ cyclically shifting the intensities in the hue channel (H).
+ The image is then converted back to original image mode.
+
+ `hue_factor` is the amount of shift in H channel and must be in the
+ interval `[-0.5, 0.5]`.
+
+ See `Hue`_ for more details.
+
+ .. _Hue: https://en.wikipedia.org/wiki/Hue
+
+ Args:
+ img (PIL Image or Tensor): Image to be adjusted.
+ If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+ where ... means it can have an arbitrary number of leading dimensions.
+ If img is PIL Image mode "1", "I", "F" and modes with transparency (alpha channel) are not supported.
+ hue_factor (float): How much to shift the hue channel. Should be in
+ [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
+ HSV space in positive and negative direction respectively.
+ 0 means no shift. Therefore, both -0.5 and 0.5 will give an image
+ with complementary colors while 0 gives the original image.
+
+ Returns:
+ PIL Image or Tensor: Hue adjusted image.
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.adjust_hue(img, hue_factor)
+
+ return F_t.adjust_hue(img, hue_factor)
+
+
+def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor:
+ r"""Perform gamma correction on an image.
+
+ Also known as Power Law Transform. Intensities in RGB mode are adjusted
+ based on the following equation:
+
+ .. math::
+ I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}
+
+ See `Gamma Correction`_ for more details.
+
+ .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
+
+ Args:
+ img (PIL Image or Tensor): PIL Image to be adjusted.
+ If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+ where ... means it can have an arbitrary number of leading dimensions.
+ If img is PIL Image, modes with transparency (alpha channel) are not supported.
+ gamma (float): Non negative real number, same as :math:`\gamma` in the equation.
+ gamma larger than 1 make the shadows darker,
+ while gamma smaller than 1 make dark regions lighter.
+ gain (float): The constant multiplier.
+ Returns:
+ PIL Image or Tensor: Gamma correction adjusted image.
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.adjust_gamma(img, gamma, gain)
+
+ return F_t.adjust_gamma(img, gamma, gain)
+
+
+def _get_inverse_affine_matrix(
+ center: List[float], angle: float, translate: List[float], scale: float, shear: List[float]
+) -> List[float]:
+ # Helper method to compute inverse matrix for affine transformation
+
+ # As it is explained in PIL.Image.rotate
+ # We need compute INVERSE of affine transformation matrix: M = T * C * RSS * C^-1
+ # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1]
+ # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1]
+ # RSS is rotation with scale and shear matrix
+ # RSS(a, s, (sx, sy)) =
+ # = R(a) * S(s) * SHy(sy) * SHx(sx)
+ # = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(x)/cos(y) - sin(a)), 0 ]
+ # [ s*sin(a + sy)/cos(sy), s*(-sin(a - sy)*tan(x)/cos(y) + cos(a)), 0 ]
+ # [ 0 , 0 , 1 ]
+ #
+ # where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears:
+ # SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0]
+ # [0, 1 ] [-tan(s), 1]
+ #
+ # Thus, the inverse is M^-1 = C * RSS^-1 * C^-1 * T^-1
+
+ rot = math.radians(angle)
+ sx = math.radians(shear[0])
+ sy = math.radians(shear[1])
+
+ cx, cy = center
+ tx, ty = translate
+
+ # RSS without scaling
+ a = math.cos(rot - sy) / math.cos(sy)
+ b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot)
+ c = math.sin(rot - sy) / math.cos(sy)
+ d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot)
+
+ # Inverted rotation matrix with scale and shear
+ # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1
+ matrix = [d, -b, 0.0, -c, a, 0.0]
+ matrix = [x / scale for x in matrix]
+
+ # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1
+ matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty)
+ matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty)
+
+ # Apply center translation: C * RSS^-1 * C^-1 * T^-1
+ matrix[2] += cx
+ matrix[5] += cy
+
+ return matrix
+
+
+def rotate(
+ img: Tensor,
+ angle: float,
+ interpolation: InterpolationMode = InterpolationMode.NEAREST,
+ expand: bool = False,
+ center: Optional[List[int]] = None,
+ fill: Optional[List[float]] = None,
+ resample: Optional[int] = None,
+) -> Tensor:
+ """Rotate the image by angle.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+
+ Args:
+ img (PIL Image or Tensor): image to be rotated.
+ angle (number): rotation angle value in degrees, counter-clockwise.
+ interpolation (InterpolationMode): Desired interpolation enum defined by
+ :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
+ If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+ For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+ expand (bool, optional): Optional expansion flag.
+ If true, expands the output image to make it large enough to hold the entire rotated image.
+ If false or omitted, make the output image the same size as the input image.
+ Note that the expand flag assumes rotation around the center and no translation.
+ center (sequence, optional): Optional center of rotation. Origin is the upper left corner.
+ Default is the center of the image.
+ fill (sequence or number, optional): Pixel fill value for the area outside the transformed
+ image. If given a number, the value is used for all bands respectively.
+
+ .. note::
+ In torchscript mode single int/float value is not supported, please use a sequence
+ of length 1: ``[value, ]``.
+
+ Returns:
+ PIL Image or Tensor: Rotated image.
+
+ .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
+
+ """
+ if resample is not None:
+ warnings.warn(
+ "Argument resample is deprecated and will be removed since v0.10.0. Please, use interpolation instead"
+ )
+ interpolation = _interpolation_modes_from_int(resample)
+
+ # Backward compatibility with integer value
+ if isinstance(interpolation, int):
+ warnings.warn(
+ "Argument interpolation should be of type InterpolationMode instead of int. "
+ "Please, use InterpolationMode enum."
+ )
+ interpolation = _interpolation_modes_from_int(interpolation)
+
+ if not isinstance(angle, (int, float)):
+ raise TypeError("Argument angle should be int or float")
+
+ if center is not None and not isinstance(center, (list, tuple)):
+ raise TypeError("Argument center should be a sequence")
+
+ if not isinstance(interpolation, InterpolationMode):
+ raise TypeError("Argument interpolation should be a InterpolationMode")
+
+ if not isinstance(img, torch.Tensor):
+ pil_interpolation = pil_modes_mapping[interpolation]
+ return F_pil.rotate(img, angle=angle, interpolation=pil_interpolation, expand=expand, center=center, fill=fill)
+
+ center_f = [0.0, 0.0]
+ if center is not None:
+ img_size = get_image_size(img)
+ # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
+ center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, img_size)]
+
+ # due to current incoherence of rotation angle direction between affine and rotate implementations
+ # we need to set -angle.
+ matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0])
+ return F_t.rotate(img, matrix=matrix, interpolation=interpolation.value, expand=expand, fill=fill)
+
+
+def affine(
+ img: Tensor,
+ angle: float,
+ translate: List[int],
+ scale: float,
+ shear: List[float],
+ interpolation: InterpolationMode = InterpolationMode.NEAREST,
+ fill: Optional[List[float]] = None,
+ resample: Optional[int] = None,
+ fillcolor: Optional[List[float]] = None,
+) -> Tensor:
+ """Apply affine transformation on the image keeping image center invariant.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+
+ Args:
+ img (PIL Image or Tensor): image to transform.
+ angle (number): rotation angle in degrees between -180 and 180, clockwise direction.
+ translate (sequence of integers): horizontal and vertical translations (post-rotation translation)
+ scale (float): overall scale
+ shear (float or sequence): shear angle value in degrees between -180 to 180, clockwise direction.
+ If a sequence is specified, the first value corresponds to a shear parallel to the x axis, while
+ the second value corresponds to a shear parallel to the y axis.
+ interpolation (InterpolationMode): Desired interpolation enum defined by
+ :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
+ If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+ For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+ fill (sequence or number, optional): Pixel fill value for the area outside the transformed
+ image. If given a number, the value is used for all bands respectively.
+
+ .. note::
+ In torchscript mode single int/float value is not supported, please use a sequence
+ of length 1: ``[value, ]``.
+ fillcolor (sequence, int, float): deprecated argument and will be removed since v0.10.0.
+ Please use the ``fill`` parameter instead.
+ resample (int, optional): deprecated argument and will be removed since v0.10.0.
+ Please use the ``interpolation`` parameter instead.
+
+ Returns:
+ PIL Image or Tensor: Transformed image.
+ """
+ if resample is not None:
+ warnings.warn(
+ "Argument resample is deprecated and will be removed since v0.10.0. Please, use interpolation instead"
+ )
+ interpolation = _interpolation_modes_from_int(resample)
+
+ # Backward compatibility with integer value
+ if isinstance(interpolation, int):
+ warnings.warn(
+ "Argument interpolation should be of type InterpolationMode instead of int. "
+ "Please, use InterpolationMode enum."
+ )
+ interpolation = _interpolation_modes_from_int(interpolation)
+
+ if fillcolor is not None:
+ warnings.warn("Argument fillcolor is deprecated and will be removed since v0.10.0. Please, use fill instead")
+ fill = fillcolor
+
+ if not isinstance(angle, (int, float)):
+ raise TypeError("Argument angle should be int or float")
+
+ if not isinstance(translate, (list, tuple)):
+ raise TypeError("Argument translate should be a sequence")
+
+ if len(translate) != 2:
+ raise ValueError("Argument translate should be a sequence of length 2")
+
+ if scale <= 0.0:
+ raise ValueError("Argument scale should be positive")
+
+ if not isinstance(shear, (numbers.Number, (list, tuple))):
+ raise TypeError("Shear should be either a single value or a sequence of two values")
+
+ if not isinstance(interpolation, InterpolationMode):
+ raise TypeError("Argument interpolation should be a InterpolationMode")
+
+ if isinstance(angle, int):
+ angle = float(angle)
+
+ if isinstance(translate, tuple):
+ translate = list(translate)
+
+ if isinstance(shear, numbers.Number):
+ shear = [shear, 0.0]
+
+ if isinstance(shear, tuple):
+ shear = list(shear)
+
+ if len(shear) == 1:
+ shear = [shear[0], shear[0]]
+
+ if len(shear) != 2:
+ raise ValueError(f"Shear should be a sequence containing two values. Got {shear}")
+
+ img_size = get_image_size(img)
+ if not isinstance(img, torch.Tensor):
+ # center = (img_size[0] * 0.5 + 0.5, img_size[1] * 0.5 + 0.5)
+ # it is visually better to estimate the center without 0.5 offset
+ # otherwise image rotated by 90 degrees is shifted vs output image of torch.rot90 or F_t.affine
+ center = [img_size[0] * 0.5, img_size[1] * 0.5]
+ matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear)
+ pil_interpolation = pil_modes_mapping[interpolation]
+ return F_pil.affine(img, matrix=matrix, interpolation=pil_interpolation, fill=fill)
+
+ translate_f = [1.0 * t for t in translate]
+ matrix = _get_inverse_affine_matrix([0.0, 0.0], angle, translate_f, scale, shear)
+ return F_t.affine(img, matrix=matrix, interpolation=interpolation.value, fill=fill)
+
+
+@torch.jit.unused
+def to_grayscale(img, num_output_channels=1):
+ """Convert PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image.
+ This transform does not support torch Tensor.
+
+ Args:
+ img (PIL Image): PIL Image to be converted to grayscale.
+ num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default is 1.
+
+ Returns:
+ PIL Image: Grayscale version of the image.
+
+ - if num_output_channels = 1 : returned image is single channel
+ - if num_output_channels = 3 : returned image is 3 channel with r = g = b
+ """
+ if isinstance(img, Image.Image):
+ return F_pil.to_grayscale(img, num_output_channels)
+
+ raise TypeError("Input should be PIL Image")
+
+
+def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor:
+ """Convert RGB image to grayscale version of image.
+ If the image is torch Tensor, it is expected
+ to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
+
+ Note:
+ Please, note that this method supports only RGB images as input. For inputs in other color spaces,
+ please, consider using meth:`~torchvision.transforms.functional.to_grayscale` with PIL Image.
+
+ Args:
+ img (PIL Image or Tensor): RGB Image to be converted to grayscale.
+ num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1.
+
+ Returns:
+ PIL Image or Tensor: Grayscale version of the image.
+
+ - if num_output_channels = 1 : returned image is single channel
+ - if num_output_channels = 3 : returned image is 3 channel with r = g = b
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.to_grayscale(img, num_output_channels)
+
+ return F_t.rgb_to_grayscale(img, num_output_channels)
+
+
+def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool = False) -> Tensor:
+ """Erase the input Tensor Image with given value.
+ This transform does not support PIL Image.
+
+ Args:
+ img (Tensor Image): Tensor image of size (C, H, W) to be erased
+ i (int): i in (i,j) i.e coordinates of the upper left corner.
+ j (int): j in (i,j) i.e coordinates of the upper left corner.
+ h (int): Height of the erased region.
+ w (int): Width of the erased region.
+ v: Erasing value.
+ inplace(bool, optional): For in-place operations. By default is set False.
+
+ Returns:
+ Tensor Image: Erased image.
+ """
+ if not isinstance(img, torch.Tensor):
+ raise TypeError(f"img should be Tensor Image. Got {type(img)}")
+
+ if not inplace:
+ img = img.clone()
+
+ img[..., i : i + h, j : j + w] = v
+ return img
+
+
+def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Tensor:
+ """Performs Gaussian blurring on the image by given kernel.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+
+ Args:
+ img (PIL Image or Tensor): Image to be blurred
+ kernel_size (sequence of ints or int): Gaussian kernel size. Can be a sequence of integers
+ like ``(kx, ky)`` or a single integer for square kernels.
+
+ .. note::
+ In torchscript mode kernel_size as single int is not supported, use a sequence of
+ length 1: ``[ksize, ]``.
+ sigma (sequence of floats or float, optional): Gaussian kernel standard deviation. Can be a
+ sequence of floats like ``(sigma_x, sigma_y)`` or a single float to define the
+ same sigma in both X/Y directions. If None, then it is computed using
+ ``kernel_size`` as ``sigma = 0.3 * ((kernel_size - 1) * 0.5 - 1) + 0.8``.
+ Default, None.
+
+ .. note::
+ In torchscript mode sigma as single float is
+ not supported, use a sequence of length 1: ``[sigma, ]``.
+
+ Returns:
+ PIL Image or Tensor: Gaussian Blurred version of the image.
+ """
+ if not isinstance(kernel_size, (int, list, tuple)):
+ raise TypeError(f"kernel_size should be int or a sequence of integers. Got {type(kernel_size)}")
+ if isinstance(kernel_size, int):
+ kernel_size = [kernel_size, kernel_size]
+ if len(kernel_size) != 2:
+ raise ValueError(f"If kernel_size is a sequence its length should be 2. Got {len(kernel_size)}")
+ for ksize in kernel_size:
+ if ksize % 2 == 0 or ksize < 0:
+ raise ValueError(f"kernel_size should have odd and positive integers. Got {kernel_size}")
+
+ if sigma is None:
+ sigma = [ksize * 0.15 + 0.35 for ksize in kernel_size]
+
+ if sigma is not None and not isinstance(sigma, (int, float, list, tuple)):
+ raise TypeError(f"sigma should be either float or sequence of floats. Got {type(sigma)}")
+ if isinstance(sigma, (int, float)):
+ sigma = [float(sigma), float(sigma)]
+ if isinstance(sigma, (list, tuple)) and len(sigma) == 1:
+ sigma = [sigma[0], sigma[0]]
+ if len(sigma) != 2:
+ raise ValueError(f"If sigma is a sequence, its length should be 2. Got {len(sigma)}")
+ for s in sigma:
+ if s <= 0.0:
+ raise ValueError(f"sigma should have positive values. Got {sigma}")
+
+ t_img = img
+ if not isinstance(img, torch.Tensor):
+ if not F_pil._is_pil_image(img):
+ raise TypeError(f"img should be PIL Image or Tensor. Got {type(img)}")
+
+ t_img = to_tensor(img)
+
+ output = F_t.gaussian_blur(t_img, kernel_size, sigma)
+
+ if not isinstance(img, torch.Tensor):
+ output = to_pil_image(output)
+ return output
+
+
+def invert(img: Tensor) -> Tensor:
+ """Invert the colors of an RGB/grayscale image.
+
+ Args:
+ img (PIL Image or Tensor): Image to have its colors inverted.
+ If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+ where ... means it can have an arbitrary number of leading dimensions.
+ If img is PIL Image, it is expected to be in mode "L" or "RGB".
+
+ Returns:
+ PIL Image or Tensor: Color inverted image.
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.invert(img)
+
+ return F_t.invert(img)
+
+
+def posterize(img: Tensor, bits: int) -> Tensor:
+ """Posterize an image by reducing the number of bits for each color channel.
+
+ Args:
+ img (PIL Image or Tensor): Image to have its colors posterized.
+ If img is torch Tensor, it should be of type torch.uint8 and
+ it is expected to be in [..., 1 or 3, H, W] format, where ... means
+ it can have an arbitrary number of leading dimensions.
+ If img is PIL Image, it is expected to be in mode "L" or "RGB".
+ bits (int): The number of bits to keep for each channel (0-8).
+ Returns:
+ PIL Image or Tensor: Posterized image.
+ """
+ if not (0 <= bits <= 8):
+ raise ValueError(f"The number if bits should be between 0 and 8. Got {bits}")
+
+ if not isinstance(img, torch.Tensor):
+ return F_pil.posterize(img, bits)
+
+ return F_t.posterize(img, bits)
+
+
+def solarize(img: Tensor, threshold: float) -> Tensor:
+ """Solarize an RGB/grayscale image by inverting all pixel values above a threshold.
+
+ Args:
+ img (PIL Image or Tensor): Image to have its colors inverted.
+ If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+ where ... means it can have an arbitrary number of leading dimensions.
+ If img is PIL Image, it is expected to be in mode "L" or "RGB".
+ threshold (float): All pixels equal or above this value are inverted.
+ Returns:
+ PIL Image or Tensor: Solarized image.
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.solarize(img, threshold)
+
+ return F_t.solarize(img, threshold)
+
+
+def adjust_sharpness(img: Tensor, sharpness_factor: float) -> Tensor:
+ """Adjust the sharpness of an image.
+
+ Args:
+ img (PIL Image or Tensor): Image to be adjusted.
+ If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+ where ... means it can have an arbitrary number of leading dimensions.
+ sharpness_factor (float): How much to adjust the sharpness. Can be
+ any non negative number. 0 gives a blurred image, 1 gives the
+ original image while 2 increases the sharpness by a factor of 2.
+
+ Returns:
+ PIL Image or Tensor: Sharpness adjusted image.
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.adjust_sharpness(img, sharpness_factor)
+
+ return F_t.adjust_sharpness(img, sharpness_factor)
+
+
+def autocontrast(img: Tensor) -> Tensor:
+ """Maximize contrast of an image by remapping its
+ pixels per channel so that the lowest becomes black and the lightest
+ becomes white.
+
+ Args:
+ img (PIL Image or Tensor): Image on which autocontrast is applied.
+ If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+ where ... means it can have an arbitrary number of leading dimensions.
+ If img is PIL Image, it is expected to be in mode "L" or "RGB".
+
+ Returns:
+ PIL Image or Tensor: An image that was autocontrasted.
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.autocontrast(img)
+
+ return F_t.autocontrast(img)
+
+
+def equalize(img: Tensor) -> Tensor:
+ """Equalize the histogram of an image by applying
+ a non-linear mapping to the input in order to create a uniform
+ distribution of grayscale values in the output.
+
+ Args:
+ img (PIL Image or Tensor): Image on which equalize is applied.
+ If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+ where ... means it can have an arbitrary number of leading dimensions.
+ The tensor dtype must be ``torch.uint8`` and values are expected to be in ``[0, 255]``.
+ If img is PIL Image, it is expected to be in mode "P", "L" or "RGB".
+
+ Returns:
+ PIL Image or Tensor: An image that was equalized.
+ """
+ if not isinstance(img, torch.Tensor):
+ return F_pil.equalize(img)
+
+ return F_t.equalize(img)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py
new file mode 100644
index 0000000000..7ce1fb6ab3
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py
@@ -0,0 +1,906 @@
+import torch
+import math
+from PIL import Image, ImageOps, ImageEnhance, __version__ as PILLOW_VERSION
+try:
+ import accimage
+except ImportError:
+ accimage = None
+import numpy as np
+from numpy import sin, cos, tan
+import numbers
+from collections.abc import Sequence, Iterable
+import warnings
+
+
+def _is_pil_image(img):
+ if accimage is not None:
+ return isinstance(img, (Image.Image, accimage.Image))
+ else:
+ return isinstance(img, Image.Image)
+
+
+def _is_numpy(img):
+ return isinstance(img, np.ndarray)
+
+
+def _is_numpy_image(img):
+ return img.ndim in {2, 3}
+
+
+def to_tensor(pic):
+ """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+
+ See ``ToTensor`` for more details.
+
+ Args:
+ pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+
+ Returns:
+ Tensor: Converted image.
+ """
+ if not(_is_pil_image(pic) or _is_numpy(pic)):
+ raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
+
+ if _is_numpy(pic) and not _is_numpy_image(pic):
+ raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim))
+
+ if isinstance(pic, np.ndarray):
+ # handle numpy array
+ if pic.ndim == 2:
+ pic = pic[:, :, None]
+
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
+ # backward compatibility
+ if isinstance(img, torch.ByteTensor):
+ return img.float().div(255)
+ else:
+ return img
+
+ if accimage is not None and isinstance(pic, accimage.Image):
+ nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
+ pic.copyto(nppic)
+ return torch.from_numpy(nppic)
+
+ # handle PIL Image
+ if pic.mode == 'I':
+ img = torch.from_numpy(np.array(pic, np.int32, copy=False))
+ elif pic.mode == 'I;16':
+ img = torch.from_numpy(np.array(pic, np.int16, copy=False))
+ elif pic.mode == 'F':
+ img = torch.from_numpy(np.array(pic, np.float32, copy=False))
+ elif pic.mode == '1':
+ img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False))
+ else:
+ img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
+
+ img = img.view(pic.size[1], pic.size[0], len(pic.getbands()))
+ # put it from HWC to CHW format
+ img = img.permute((2, 0, 1)).contiguous()
+ if isinstance(img, torch.ByteTensor):
+ return img.float().div(255)
+ else:
+ return img
+
+
+def to_pil_image(pic, mode=None):
+ """Convert a tensor or an ndarray to PIL Image.
+
+ See :class:`~torchvision.transforms.ToPILImage` for more details.
+
+ Args:
+ pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
+ mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
+
+ .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes
+
+ Returns:
+ PIL Image: Image converted to PIL Image.
+ """
+ if not(isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)):
+ raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic)))
+
+ elif isinstance(pic, torch.Tensor):
+ if pic.ndimension() not in {2, 3}:
+ raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndimension()))
+
+ elif pic.ndimension() == 2:
+ # if 2D image, add channel dimension (CHW)
+ pic = pic.unsqueeze(0)
+
+ elif isinstance(pic, np.ndarray):
+ if pic.ndim not in {2, 3}:
+ raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim))
+
+ elif pic.ndim == 2:
+ # if 2D image, add channel dimension (HWC)
+ pic = np.expand_dims(pic, 2)
+
+ npimg = pic
+ if isinstance(pic, torch.FloatTensor) and mode != 'F':
+ pic = pic.mul(255).byte()
+ if isinstance(pic, torch.Tensor):
+ npimg = np.transpose(pic.numpy(), (1, 2, 0))
+
+ if not isinstance(npimg, np.ndarray):
+ raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' +
+ 'not {}'.format(type(npimg)))
+
+ if npimg.shape[2] == 1:
+ expected_mode = None
+ npimg = npimg[:, :, 0]
+ if npimg.dtype == np.uint8:
+ expected_mode = 'L'
+ elif npimg.dtype == np.int16:
+ expected_mode = 'I;16'
+ elif npimg.dtype == np.int32:
+ expected_mode = 'I'
+ elif npimg.dtype == np.float32:
+ expected_mode = 'F'
+ if mode is not None and mode != expected_mode:
+ raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}"
+ .format(mode, np.dtype, expected_mode))
+ mode = expected_mode
+
+ elif npimg.shape[2] == 2:
+ permitted_2_channel_modes = ['LA']
+ if mode is not None and mode not in permitted_2_channel_modes:
+ raise ValueError("Only modes {} are supported for 2D inputs".format(permitted_2_channel_modes))
+
+ if mode is None and npimg.dtype == np.uint8:
+ mode = 'LA'
+
+ elif npimg.shape[2] == 4:
+ permitted_4_channel_modes = ['RGBA', 'CMYK', 'RGBX']
+ if mode is not None and mode not in permitted_4_channel_modes:
+ raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes))
+
+ if mode is None and npimg.dtype == np.uint8:
+ mode = 'RGBA'
+ else:
+ permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV']
+ if mode is not None and mode not in permitted_3_channel_modes:
+ raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes))
+ if mode is None and npimg.dtype == np.uint8:
+ mode = 'RGB'
+
+ if mode is None:
+ raise TypeError('Input type {} is not supported'.format(npimg.dtype))
+
+ return Image.fromarray(npimg, mode=mode)
+
+
+def normalize(tensor, mean, std, inplace=False):
+ """Normalize a tensor image with mean and standard deviation.
+
+ .. note::
+ This transform acts out of place by default, i.e., it does not mutates the input tensor.
+
+ See :class:`~torchvision.transforms.Normalize` for more details.
+
+ Args:
+ tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+ mean (sequence): Sequence of means for each channel.
+ std (sequence): Sequence of standard deviations for each channel.
+ inplace(bool,optional): Bool to make this operation inplace.
+
+ Returns:
+ Tensor: Normalized Tensor image.
+ """
+ if not torch.is_tensor(tensor):
+ raise TypeError('tensor should be a torch tensor. Got {}.'.format(type(tensor)))
+
+ if tensor.ndimension() != 3:
+ raise ValueError('Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = '
+ '{}.'.format(tensor.size()))
+
+ if not inplace:
+ tensor = tensor.clone()
+
+ dtype = tensor.dtype
+ mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device)
+ std = torch.as_tensor(std, dtype=dtype, device=tensor.device)
+ if (std == 0).any():
+ raise ValueError('std evaluated to zero after conversion to {}, leading to division by zero.'.format(dtype))
+ if mean.ndim == 1:
+ mean = mean[:, None, None]
+ if std.ndim == 1:
+ std = std[:, None, None]
+ tensor.sub_(mean).div_(std)
+ return tensor
+
+
+def resize(img, size, interpolation=Image.BILINEAR):
+ r"""Resize the input PIL Image to the given size.
+
+ Args:
+ img (PIL Image): Image to be resized.
+ size (sequence or int): Desired output size. If size is a sequence like
+ (h, w), the output size will be matched to this. If size is an int,
+ the smaller edge of the image will be matched to this number maintaing
+ the aspect ratio. i.e, if height > width, then image will be rescaled to
+ :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`
+ interpolation (int, optional): Desired interpolation. Default is
+ ``PIL.Image.BILINEAR``
+
+ Returns:
+ PIL Image: Resized image.
+ """
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+ if not (isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)):
+ raise TypeError('Got inappropriate size arg: {}'.format(size))
+
+ if isinstance(size, int):
+ w, h = img.size
+ if (w <= h and w == size) or (h <= w and h == size):
+ return img
+ if w < h:
+ ow = size
+ oh = int(size * h / w)
+ return img.resize((ow, oh), interpolation)
+ else:
+ oh = size
+ ow = int(size * w / h)
+ return img.resize((ow, oh), interpolation)
+ else:
+ return img.resize(size[::-1], interpolation)
+
+
+def scale(*args, **kwargs):
+ warnings.warn("The use of the transforms.Scale transform is deprecated, " +
+ "please use transforms.Resize instead.")
+ return resize(*args, **kwargs)
+
+
+def pad(img, padding, fill=0, padding_mode='constant'):
+ r"""Pad the given PIL Image on all sides with specified padding mode and fill value.
+
+ Args:
+ img (PIL Image): Image to be padded.
+ padding (int or tuple): Padding on each border. If a single int is provided this
+ is used to pad all borders. If tuple of length 2 is provided this is the padding
+ on left/right and top/bottom respectively. If a tuple of length 4 is provided
+ this is the padding for the left, top, right and bottom borders
+ respectively.
+ fill: Pixel fill value for constant fill. Default is 0. If a tuple of
+ length 3, it is used to fill R, G, B channels respectively.
+ This value is only used when the padding_mode is constant
+ padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
+
+ - constant: pads with a constant value, this value is specified with fill
+
+ - edge: pads with the last value on the edge of the image
+
+ - reflect: pads with reflection of image (without repeating the last value on the edge)
+
+ padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+ will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+ - symmetric: pads with reflection of image (repeating the last value on the edge)
+
+ padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+ will result in [2, 1, 1, 2, 3, 4, 4, 3]
+
+ Returns:
+ PIL Image: Padded image.
+ """
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ if not isinstance(padding, (numbers.Number, tuple)):
+ raise TypeError('Got inappropriate padding arg')
+ if not isinstance(fill, (numbers.Number, str, tuple)):
+ raise TypeError('Got inappropriate fill arg')
+ if not isinstance(padding_mode, str):
+ raise TypeError('Got inappropriate padding_mode arg')
+
+ if isinstance(padding, Sequence) and len(padding) not in [2, 4]:
+ raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " +
+ "{} element tuple".format(len(padding)))
+
+ assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'], \
+ 'Padding mode should be either constant, edge, reflect or symmetric'
+
+ if padding_mode == 'constant':
+ if img.mode == 'P':
+ palette = img.getpalette()
+ image = ImageOps.expand(img, border=padding, fill=fill)
+ image.putpalette(palette)
+ return image
+
+ return ImageOps.expand(img, border=padding, fill=fill)
+ else:
+ if isinstance(padding, int):
+ pad_left = pad_right = pad_top = pad_bottom = padding
+ if isinstance(padding, Sequence) and len(padding) == 2:
+ pad_left = pad_right = padding[0]
+ pad_top = pad_bottom = padding[1]
+ if isinstance(padding, Sequence) and len(padding) == 4:
+ pad_left = padding[0]
+ pad_top = padding[1]
+ pad_right = padding[2]
+ pad_bottom = padding[3]
+
+ if img.mode == 'P':
+ palette = img.getpalette()
+ img = np.asarray(img)
+ img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode)
+ img = Image.fromarray(img)
+ img.putpalette(palette)
+ return img
+
+ img = np.asarray(img)
+ # RGB image
+ if len(img.shape) == 3:
+ img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), padding_mode)
+ # Grayscale image
+ if len(img.shape) == 2:
+ img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode)
+
+ return Image.fromarray(img)
+
+
+def crop(img, top, left, height, width):
+ """Crop the given PIL Image.
+
+ Args:
+ img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image.
+ top (int): Vertical component of the top left corner of the crop box.
+ left (int): Horizontal component of the top left corner of the crop box.
+ height (int): Height of the crop box.
+ width (int): Width of the crop box.
+
+ Returns:
+ PIL Image: Cropped image.
+ """
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ return img.crop((left, top, left + width, top + height))
+
+
+def center_crop(img, output_size):
+ """Crop the given PIL Image and resize it to desired size.
+
+ Args:
+ img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image.
+ output_size (sequence or int): (height, width) of the crop box. If int,
+ it is used for both directions
+ Returns:
+ PIL Image: Cropped image.
+ """
+ if isinstance(output_size, numbers.Number):
+ output_size = (int(output_size), int(output_size))
+ image_width, image_height = img.size
+ crop_height, crop_width = output_size
+ crop_top = int(round((image_height - crop_height) / 2.))
+ crop_left = int(round((image_width - crop_width) / 2.))
+ return crop(img, crop_top, crop_left, crop_height, crop_width)
+
+
+def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINEAR):
+ """Crop the given PIL Image and resize it to desired size.
+
+ Notably used in :class:`~torchvision.transforms.RandomResizedCrop`.
+
+ Args:
+ img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image.
+ top (int): Vertical component of the top left corner of the crop box.
+ left (int): Horizontal component of the top left corner of the crop box.
+ height (int): Height of the crop box.
+ width (int): Width of the crop box.
+ size (sequence or int): Desired output size. Same semantics as ``resize``.
+ interpolation (int, optional): Desired interpolation. Default is
+ ``PIL.Image.BILINEAR``.
+ Returns:
+ PIL Image: Cropped image.
+ """
+ assert _is_pil_image(img), 'img should be PIL Image'
+ img = crop(img, top, left, height, width)
+ img = resize(img, size, interpolation)
+ return img
+
+
+def hflip(img):
+ """Horizontally flip the given PIL Image.
+
+ Args:
+ img (PIL Image): Image to be flipped.
+
+ Returns:
+ PIL Image: Horizontall flipped image.
+ """
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ return img.transpose(Image.FLIP_LEFT_RIGHT)
+
+
+def _parse_fill(fill, img, min_pil_version):
+ """Helper function to get the fill color for rotate and perspective transforms.
+
+ Args:
+ fill (n-tuple or int or float): Pixel fill value for area outside the transformed
+ image. If int or float, the value is used for all bands respectively.
+ Defaults to 0 for all bands.
+ img (PIL Image): Image to be filled.
+ min_pil_version (str): The minimum PILLOW version for when the ``fillcolor`` option
+ was first introduced in the calling function. (e.g. rotate->5.2.0, perspective->5.0.0)
+
+ Returns:
+ dict: kwarg for ``fillcolor``
+ """
+ major_found, minor_found = (int(v) for v in PILLOW_VERSION.split('.')[:2])
+ major_required, minor_required = (int(v) for v in min_pil_version.split('.')[:2])
+ if major_found < major_required or (major_found == major_required and minor_found < minor_required):
+ if fill is None:
+ return {}
+ else:
+ msg = ("The option to fill background area of the transformed image, "
+ "requires pillow>={}")
+ raise RuntimeError(msg.format(min_pil_version))
+
+ num_bands = len(img.getbands())
+ if fill is None:
+ fill = 0
+ if isinstance(fill, (int, float)) and num_bands > 1:
+ fill = tuple([fill] * num_bands)
+ if not isinstance(fill, (int, float)) and len(fill) != num_bands:
+ msg = ("The number of elements in 'fill' does not match the number of "
+ "bands of the image ({} != {})")
+ raise ValueError(msg.format(len(fill), num_bands))
+
+ return {"fillcolor": fill}
+
+
+def _get_perspective_coeffs(startpoints, endpoints):
+ """Helper function to get the coefficients (a, b, c, d, e, f, g, h) for the perspective transforms.
+
+ In Perspective Transform each pixel (x, y) in the orignal image gets transformed as,
+ (x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) )
+
+ Args:
+ List containing [top-left, top-right, bottom-right, bottom-left] of the orignal image,
+ List containing [top-left, top-right, bottom-right, bottom-left] of the transformed
+ image
+ Returns:
+ octuple (a, b, c, d, e, f, g, h) for transforming each pixel.
+ """
+ matrix = []
+
+ for p1, p2 in zip(endpoints, startpoints):
+ matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]])
+ matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]])
+
+ A = torch.tensor(matrix, dtype=torch.float)
+ B = torch.tensor(startpoints, dtype=torch.float).view(8)
+ res = torch.lstsq(B, A)[0]
+ return res.squeeze_(1).tolist()
+
+
+def perspective(img, startpoints, endpoints, interpolation=Image.BICUBIC, fill=None):
+ """Perform perspective transform of the given PIL Image.
+
+ Args:
+ img (PIL Image): Image to be transformed.
+ startpoints: List containing [top-left, top-right, bottom-right, bottom-left] of the orignal image
+ endpoints: List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image
+ interpolation: Default- Image.BICUBIC
+ fill (n-tuple or int or float): Pixel fill value for area outside the rotated
+ image. If int or float, the value is used for all bands respectively.
+ This option is only available for ``pillow>=5.0.0``.
+
+ Returns:
+ PIL Image: Perspectively transformed Image.
+ """
+
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ opts = _parse_fill(fill, img, '5.0.0')
+
+ coeffs = _get_perspective_coeffs(startpoints, endpoints)
+ return img.transform(img.size, Image.PERSPECTIVE, coeffs, interpolation, **opts)
+
+
+def vflip(img):
+ """Vertically flip the given PIL Image.
+
+ Args:
+ img (PIL Image): Image to be flipped.
+
+ Returns:
+ PIL Image: Vertically flipped image.
+ """
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ return img.transpose(Image.FLIP_TOP_BOTTOM)
+
+
+def five_crop(img, size):
+ """Crop the given PIL Image into four corners and the central crop.
+
+ .. Note::
+ This transform returns a tuple of images and there may be a
+ mismatch in the number of inputs and targets your ``Dataset`` returns.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made.
+
+ Returns:
+ tuple: tuple (tl, tr, bl, br, center)
+ Corresponding top left, top right, bottom left, bottom right and center crop.
+ """
+ if isinstance(size, numbers.Number):
+ size = (int(size), int(size))
+ else:
+ assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+
+ image_width, image_height = img.size
+ crop_height, crop_width = size
+ if crop_width > image_width or crop_height > image_height:
+ msg = "Requested crop size {} is bigger than input size {}"
+ raise ValueError(msg.format(size, (image_height, image_width)))
+
+ tl = img.crop((0, 0, crop_width, crop_height))
+ tr = img.crop((image_width - crop_width, 0, image_width, crop_height))
+ bl = img.crop((0, image_height - crop_height, crop_width, image_height))
+ br = img.crop((image_width - crop_width, image_height - crop_height,
+ image_width, image_height))
+ center = center_crop(img, (crop_height, crop_width))
+ return (tl, tr, bl, br, center)
+
+
+def ten_crop(img, size, vertical_flip=False):
+ """Generate ten cropped images from the given PIL Image.
+ Crop the given PIL Image into four corners and the central crop plus the
+ flipped version of these (horizontal flipping is used by default).
+
+ .. Note::
+ This transform returns a tuple of images and there may be a
+ mismatch in the number of inputs and targets your ``Dataset`` returns.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made.
+ vertical_flip (bool): Use vertical flipping instead of horizontal
+
+ Returns:
+ tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip)
+ Corresponding top left, top right, bottom left, bottom right and
+ center crop and same for the flipped image.
+ """
+ if isinstance(size, numbers.Number):
+ size = (int(size), int(size))
+ else:
+ assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+
+ first_five = five_crop(img, size)
+
+ if vertical_flip:
+ img = vflip(img)
+ else:
+ img = hflip(img)
+
+ second_five = five_crop(img, size)
+ return first_five + second_five
+
+
+def adjust_brightness(img, brightness_factor):
+ """Adjust brightness of an Image.
+
+ Args:
+ img (PIL Image): PIL Image to be adjusted.
+ brightness_factor (float): How much to adjust the brightness. Can be
+ any non negative number. 0 gives a black image, 1 gives the
+ original image while 2 increases the brightness by a factor of 2.
+
+ Returns:
+ PIL Image: Brightness adjusted image.
+ """
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ enhancer = ImageEnhance.Brightness(img)
+ img = enhancer.enhance(brightness_factor)
+ return img
+
+
+def adjust_contrast(img, contrast_factor):
+ """Adjust contrast of an Image.
+
+ Args:
+ img (PIL Image): PIL Image to be adjusted.
+ contrast_factor (float): How much to adjust the contrast. Can be any
+ non negative number. 0 gives a solid gray image, 1 gives the
+ original image while 2 increases the contrast by a factor of 2.
+
+ Returns:
+ PIL Image: Contrast adjusted image.
+ """
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ enhancer = ImageEnhance.Contrast(img)
+ img = enhancer.enhance(contrast_factor)
+ return img
+
+
+def adjust_saturation(img, saturation_factor):
+ """Adjust color saturation of an image.
+
+ Args:
+ img (PIL Image): PIL Image to be adjusted.
+ saturation_factor (float): How much to adjust the saturation. 0 will
+ give a black and white image, 1 will give the original image while
+ 2 will enhance the saturation by a factor of 2.
+
+ Returns:
+ PIL Image: Saturation adjusted image.
+ """
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ enhancer = ImageEnhance.Color(img)
+ img = enhancer.enhance(saturation_factor)
+ return img
+
+
+def adjust_hue(img, hue_factor):
+ """Adjust hue of an image.
+
+ The image hue is adjusted by converting the image to HSV and
+ cyclically shifting the intensities in the hue channel (H).
+ The image is then converted back to original image mode.
+
+ `hue_factor` is the amount of shift in H channel and must be in the
+ interval `[-0.5, 0.5]`.
+
+ See `Hue`_ for more details.
+
+ .. _Hue: https://en.wikipedia.org/wiki/Hue
+
+ Args:
+ img (PIL Image): PIL Image to be adjusted.
+ hue_factor (float): How much to shift the hue channel. Should be in
+ [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
+ HSV space in positive and negative direction respectively.
+ 0 means no shift. Therefore, both -0.5 and 0.5 will give an image
+ with complementary colors while 0 gives the original image.
+
+ Returns:
+ PIL Image: Hue adjusted image.
+ """
+ if not(-0.5 <= hue_factor <= 0.5):
+ raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor))
+
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ input_mode = img.mode
+ if input_mode in {'L', '1', 'I', 'F'}:
+ return img
+
+ h, s, v = img.convert('HSV').split()
+
+ np_h = np.array(h, dtype=np.uint8)
+ # uint8 addition take cares of rotation across boundaries
+ with np.errstate(over='ignore'):
+ np_h += np.uint8(hue_factor * 255)
+ h = Image.fromarray(np_h, 'L')
+
+ img = Image.merge('HSV', (h, s, v)).convert(input_mode)
+ return img
+
+
+def adjust_gamma(img, gamma, gain=1):
+ r"""Perform gamma correction on an image.
+
+ Also known as Power Law Transform. Intensities in RGB mode are adjusted
+ based on the following equation:
+
+ .. math::
+ I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}
+
+ See `Gamma Correction`_ for more details.
+
+ .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
+
+ Args:
+ img (PIL Image): PIL Image to be adjusted.
+ gamma (float): Non negative real number, same as :math:`\gamma` in the equation.
+ gamma larger than 1 make the shadows darker,
+ while gamma smaller than 1 make dark regions lighter.
+ gain (float): The constant multiplier.
+ """
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ if gamma < 0:
+ raise ValueError('Gamma should be a non-negative real number')
+
+ input_mode = img.mode
+ img = img.convert('RGB')
+
+ gamma_map = [255 * gain * pow(ele / 255., gamma) for ele in range(256)] * 3
+ img = img.point(gamma_map) # use PIL's point-function to accelerate this part
+
+ img = img.convert(input_mode)
+ return img
+
+
+def rotate(img, angle, resample=False, expand=False, center=None, fill=None):
+ """Rotate the image by angle.
+
+
+ Args:
+ img (PIL Image): PIL Image to be rotated.
+ angle (float or int): In degrees degrees counter clockwise order.
+ resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
+ An optional resampling filter. See `filters`_ for more information.
+ If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
+ expand (bool, optional): Optional expansion flag.
+ If true, expands the output image to make it large enough to hold the entire rotated image.
+ If false or omitted, make the output image the same size as the input image.
+ Note that the expand flag assumes rotation around the center and no translation.
+ center (2-tuple, optional): Optional center of rotation.
+ Origin is the upper left corner.
+ Default is the center of the image.
+ fill (n-tuple or int or float): Pixel fill value for area outside the rotated
+ image. If int or float, the value is used for all bands respectively.
+ Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``.
+
+ .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
+
+ """
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ opts = _parse_fill(fill, img, '5.2.0')
+
+ return img.rotate(angle, resample, expand, center, **opts)
+
+
+def _get_inverse_affine_matrix(center, angle, translate, scale, shear):
+ # Helper method to compute inverse matrix for affine transformation
+
+ # As it is explained in PIL.Image.rotate
+ # We need compute INVERSE of affine transformation matrix: M = T * C * RSS * C^-1
+ # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1]
+ # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1]
+ # RSS is rotation with scale and shear matrix
+ # RSS(a, s, (sx, sy)) =
+ # = R(a) * S(s) * SHy(sy) * SHx(sx)
+ # = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(x)/cos(y) - sin(a)), 0 ]
+ # [ s*sin(a + sy)/cos(sy), s*(-sin(a - sy)*tan(x)/cos(y) + cos(a)), 0 ]
+ # [ 0 , 0 , 1 ]
+ #
+ # where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears:
+ # SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0]
+ # [0, 1 ] [-tan(s), 1]
+ #
+ # Thus, the inverse is M^-1 = C * RSS^-1 * C^-1 * T^-1
+
+ if isinstance(shear, numbers.Number):
+ shear = [shear, 0]
+
+ if not isinstance(shear, (tuple, list)) and len(shear) == 2:
+ raise ValueError(
+ "Shear should be a single value or a tuple/list containing " +
+ "two values. Got {}".format(shear))
+
+ rot = math.radians(angle)
+ sx, sy = [math.radians(s) for s in shear]
+
+ cx, cy = center
+ tx, ty = translate
+
+ # RSS without scaling
+ a = cos(rot - sy) / cos(sy)
+ b = -cos(rot - sy) * tan(sx) / cos(sy) - sin(rot)
+ c = sin(rot - sy) / cos(sy)
+ d = -sin(rot - sy) * tan(sx) / cos(sy) + cos(rot)
+
+ # Inverted rotation matrix with scale and shear
+ # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1
+ M = [d, -b, 0,
+ -c, a, 0]
+ M = [x / scale for x in M]
+
+ # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1
+ M[2] += M[0] * (-cx - tx) + M[1] * (-cy - ty)
+ M[5] += M[3] * (-cx - tx) + M[4] * (-cy - ty)
+
+ # Apply center translation: C * RSS^-1 * C^-1 * T^-1
+ M[2] += cx
+ M[5] += cy
+ return M
+
+
+def affine(img, angle, translate, scale, shear, resample=0, fillcolor=None):
+ """Apply affine transformation on the image keeping image center invariant
+
+ Args:
+ img (PIL Image): PIL Image to be rotated.
+ angle (float or int): rotation angle in degrees between -180 and 180, clockwise direction.
+ translate (list or tuple of integers): horizontal and vertical translations (post-rotation translation)
+ scale (float): overall scale
+ shear (float or tuple or list): shear angle value in degrees between -180 to 180, clockwise direction.
+ If a tuple of list is specified, the first value corresponds to a shear parallel to the x axis, while
+ the second value corresponds to a shear parallel to the y axis.
+ resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
+ An optional resampling filter.
+ See `filters`_ for more information.
+ If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
+ fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0)
+ """
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ assert isinstance(translate, (tuple, list)) and len(translate) == 2, \
+ "Argument translate should be a list or tuple of length 2"
+
+ assert scale > 0.0, "Argument scale should be positive"
+
+ output_size = img.size
+ center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5)
+ matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear)
+ kwargs = {"fillcolor": fillcolor} if int(PILLOW_VERSION.split('.')[0]) >= 5 else {}
+ return img.transform(output_size, Image.AFFINE, matrix, resample, **kwargs)
+
+
+def to_grayscale(img, num_output_channels=1):
+ """Convert image to grayscale version of image.
+
+ Args:
+ img (PIL Image): Image to be converted to grayscale.
+
+ Returns:
+ PIL Image: Grayscale version of the image.
+ if num_output_channels = 1 : returned image is single channel
+
+ if num_output_channels = 3 : returned image is 3 channel with r = g = b
+ """
+ if not _is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ if num_output_channels == 1:
+ img = img.convert('L')
+ elif num_output_channels == 3:
+ img = img.convert('L')
+ np_img = np.array(img, dtype=np.uint8)
+ np_img = np.dstack([np_img, np_img, np_img])
+ img = Image.fromarray(np_img, 'RGB')
+ else:
+ raise ValueError('num_output_channels should be either 1 or 3')
+
+ return img
+
+
+def erase(img, i, j, h, w, v, inplace=False):
+ """ Erase the input Tensor Image with given value.
+
+ Args:
+ img (Tensor Image): Tensor image of size (C, H, W) to be erased
+ i (int): i in (i,j) i.e coordinates of the upper left corner.
+ j (int): j in (i,j) i.e coordinates of the upper left corner.
+ h (int): Height of the erased region.
+ w (int): Width of the erased region.
+ v: Erasing value.
+ inplace(bool, optional): For in-place operations. By default is set False.
+
+ Returns:
+ Tensor Image: Erased image.
+ """
+ if not isinstance(img, torch.Tensor):
+ raise TypeError('img should be Tensor Image. Got {}'.format(type(img)))
+
+ if not inplace:
+ img = img.clone()
+
+ img[:, i:i + h, j:j + w] = v
+ return img
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py
new file mode 100644
index 0000000000..fdaf5f7de1
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py
@@ -0,0 +1,399 @@
+import numbers
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+
+import numpy as np
+import torch
+from PIL import Image, ImageOps, ImageEnhance
+
+try:
+ import accimage
+except ImportError:
+ accimage = None
+
+
+@torch.jit.unused
+def _is_pil_image(img: Any) -> bool:
+ if accimage is not None:
+ return isinstance(img, (Image.Image, accimage.Image))
+ else:
+ return isinstance(img, Image.Image)
+
+
+@torch.jit.unused
+def get_image_size(img: Any) -> List[int]:
+ if _is_pil_image(img):
+ return list(img.size)
+ raise TypeError(f"Unexpected type {type(img)}")
+
+
+@torch.jit.unused
+def get_image_num_channels(img: Any) -> int:
+ if _is_pil_image(img):
+ return 1 if img.mode == "L" else 3
+ raise TypeError(f"Unexpected type {type(img)}")
+
+
+@torch.jit.unused
+def hflip(img: Image.Image) -> Image.Image:
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ return img.transpose(Image.FLIP_LEFT_RIGHT)
+
+
+@torch.jit.unused
+def vflip(img: Image.Image) -> Image.Image:
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ return img.transpose(Image.FLIP_TOP_BOTTOM)
+
+
+@torch.jit.unused
+def adjust_brightness(img: Image.Image, brightness_factor: float) -> Image.Image:
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ enhancer = ImageEnhance.Brightness(img)
+ img = enhancer.enhance(brightness_factor)
+ return img
+
+
+@torch.jit.unused
+def adjust_contrast(img: Image.Image, contrast_factor: float) -> Image.Image:
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ enhancer = ImageEnhance.Contrast(img)
+ img = enhancer.enhance(contrast_factor)
+ return img
+
+
+@torch.jit.unused
+def adjust_saturation(img: Image.Image, saturation_factor: float) -> Image.Image:
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ enhancer = ImageEnhance.Color(img)
+ img = enhancer.enhance(saturation_factor)
+ return img
+
+
+@torch.jit.unused
+def adjust_hue(img: Image.Image, hue_factor: float) -> Image.Image:
+ if not (-0.5 <= hue_factor <= 0.5):
+ raise ValueError(f"hue_factor ({hue_factor}) is not in [-0.5, 0.5].")
+
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ input_mode = img.mode
+ if input_mode in {"L", "1", "I", "F"}:
+ return img
+
+ h, s, v = img.convert("HSV").split()
+
+ np_h = np.array(h, dtype=np.uint8)
+ # uint8 addition take cares of rotation across boundaries
+ with np.errstate(over="ignore"):
+ np_h += np.uint8(hue_factor * 255)
+ h = Image.fromarray(np_h, "L")
+
+ img = Image.merge("HSV", (h, s, v)).convert(input_mode)
+ return img
+
+
+@torch.jit.unused
+def adjust_gamma(
+ img: Image.Image,
+ gamma: float,
+ gain: float = 1.0,
+) -> Image.Image:
+
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ if gamma < 0:
+ raise ValueError("Gamma should be a non-negative real number")
+
+ input_mode = img.mode
+ img = img.convert("RGB")
+ gamma_map = [(255 + 1 - 1e-3) * gain * pow(ele / 255.0, gamma) for ele in range(256)] * 3
+ img = img.point(gamma_map) # use PIL's point-function to accelerate this part
+
+ img = img.convert(input_mode)
+ return img
+
+
+@torch.jit.unused
+def pad(
+ img: Image.Image,
+ padding: Union[int, List[int], Tuple[int, ...]],
+ fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0,
+ padding_mode: str = "constant",
+) -> Image.Image:
+
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ if not isinstance(padding, (numbers.Number, tuple, list)):
+ raise TypeError("Got inappropriate padding arg")
+ if not isinstance(fill, (numbers.Number, str, tuple)):
+ raise TypeError("Got inappropriate fill arg")
+ if not isinstance(padding_mode, str):
+ raise TypeError("Got inappropriate padding_mode arg")
+
+ if isinstance(padding, list):
+ padding = tuple(padding)
+
+ if isinstance(padding, tuple) and len(padding) not in [1, 2, 4]:
+ raise ValueError(f"Padding must be an int or a 1, 2, or 4 element tuple, not a {len(padding)} element tuple")
+
+ if isinstance(padding, tuple) and len(padding) == 1:
+ # Compatibility with `functional_tensor.pad`
+ padding = padding[0]
+
+ if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
+ raise ValueError("Padding mode should be either constant, edge, reflect or symmetric")
+
+ if padding_mode == "constant":
+ opts = _parse_fill(fill, img, name="fill")
+ if img.mode == "P":
+ palette = img.getpalette()
+ image = ImageOps.expand(img, border=padding, **opts)
+ image.putpalette(palette)
+ return image
+
+ return ImageOps.expand(img, border=padding, **opts)
+ else:
+ if isinstance(padding, int):
+ pad_left = pad_right = pad_top = pad_bottom = padding
+ if isinstance(padding, tuple) and len(padding) == 2:
+ pad_left = pad_right = padding[0]
+ pad_top = pad_bottom = padding[1]
+ if isinstance(padding, tuple) and len(padding) == 4:
+ pad_left = padding[0]
+ pad_top = padding[1]
+ pad_right = padding[2]
+ pad_bottom = padding[3]
+
+ p = [pad_left, pad_top, pad_right, pad_bottom]
+ cropping = -np.minimum(p, 0)
+
+ if cropping.any():
+ crop_left, crop_top, crop_right, crop_bottom = cropping
+ img = img.crop((crop_left, crop_top, img.width - crop_right, img.height - crop_bottom))
+
+ pad_left, pad_top, pad_right, pad_bottom = np.maximum(p, 0)
+
+ if img.mode == "P":
+ palette = img.getpalette()
+ img = np.asarray(img)
+ img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode)
+ img = Image.fromarray(img)
+ img.putpalette(palette)
+ return img
+
+ img = np.asarray(img)
+ # RGB image
+ if len(img.shape) == 3:
+ img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), padding_mode)
+ # Grayscale image
+ if len(img.shape) == 2:
+ img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode)
+
+ return Image.fromarray(img)
+
+
+@torch.jit.unused
+def crop(
+ img: Image.Image,
+ top: int,
+ left: int,
+ height: int,
+ width: int,
+) -> Image.Image:
+
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ return img.crop((left, top, left + width, top + height))
+
+
+@torch.jit.unused
+def resize(
+ img: Image.Image,
+ size: Union[Sequence[int], int],
+ interpolation: int = Image.BILINEAR,
+ max_size: Optional[int] = None,
+) -> Image.Image:
+
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+ if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))):
+ raise TypeError(f"Got inappropriate size arg: {size}")
+
+ if isinstance(size, Sequence) and len(size) == 1:
+ size = size[0]
+ if isinstance(size, int):
+ w, h = img.size
+
+ short, long = (w, h) if w <= h else (h, w)
+ if short == size:
+ return img
+
+ new_short, new_long = size, int(size * long / short)
+
+ if max_size is not None:
+ if max_size <= size:
+ raise ValueError(
+ f"max_size = {max_size} must be strictly greater than the requested "
+ f"size for the smaller edge size = {size}"
+ )
+ if new_long > max_size:
+ new_short, new_long = int(max_size * new_short / new_long), max_size
+
+ new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short)
+ return img.resize((new_w, new_h), interpolation)
+ else:
+ if max_size is not None:
+ raise ValueError(
+ "max_size should only be passed if size specifies the length of the smaller edge, "
+ "i.e. size should be an int or a sequence of length 1 in torchscript mode."
+ )
+ return img.resize(size[::-1], interpolation)
+
+
+@torch.jit.unused
+def _parse_fill(
+ fill: Optional[Union[float, List[float], Tuple[float, ...]]],
+ img: Image.Image,
+ name: str = "fillcolor",
+) -> Dict[str, Optional[Union[float, List[float], Tuple[float, ...]]]]:
+
+ # Process fill color for affine transforms
+ num_bands = len(img.getbands())
+ if fill is None:
+ fill = 0
+ if isinstance(fill, (int, float)) and num_bands > 1:
+ fill = tuple([fill] * num_bands)
+ if isinstance(fill, (list, tuple)):
+ if len(fill) != num_bands:
+ msg = "The number of elements in 'fill' does not match the number of bands of the image ({} != {})"
+ raise ValueError(msg.format(len(fill), num_bands))
+
+ fill = tuple(fill)
+
+ return {name: fill}
+
+
+@torch.jit.unused
+def affine(
+ img: Image.Image,
+ matrix: List[float],
+ interpolation: int = Image.NEAREST,
+ fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0,
+) -> Image.Image:
+
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ output_size = img.size
+ opts = _parse_fill(fill, img)
+ return img.transform(output_size, Image.AFFINE, matrix, interpolation, **opts)
+
+
+@torch.jit.unused
+def rotate(
+ img: Image.Image,
+ angle: float,
+ interpolation: int = Image.NEAREST,
+ expand: bool = False,
+ center: Optional[Tuple[int, int]] = None,
+ fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0,
+) -> Image.Image:
+
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ opts = _parse_fill(fill, img)
+ return img.rotate(angle, interpolation, expand, center, **opts)
+
+
+@torch.jit.unused
+def perspective(
+ img: Image.Image,
+ perspective_coeffs: float,
+ interpolation: int = Image.BICUBIC,
+ fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0,
+) -> Image.Image:
+
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ opts = _parse_fill(fill, img)
+
+ return img.transform(img.size, Image.PERSPECTIVE, perspective_coeffs, interpolation, **opts)
+
+
+@torch.jit.unused
+def to_grayscale(img: Image.Image, num_output_channels: int) -> Image.Image:
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ if num_output_channels == 1:
+ img = img.convert("L")
+ elif num_output_channels == 3:
+ img = img.convert("L")
+ np_img = np.array(img, dtype=np.uint8)
+ np_img = np.dstack([np_img, np_img, np_img])
+ img = Image.fromarray(np_img, "RGB")
+ else:
+ raise ValueError("num_output_channels should be either 1 or 3")
+
+ return img
+
+
+@torch.jit.unused
+def invert(img: Image.Image) -> Image.Image:
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+ return ImageOps.invert(img)
+
+
+@torch.jit.unused
+def posterize(img: Image.Image, bits: int) -> Image.Image:
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+ return ImageOps.posterize(img, bits)
+
+
+@torch.jit.unused
+def solarize(img: Image.Image, threshold: int) -> Image.Image:
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+ return ImageOps.solarize(img, threshold)
+
+
+@torch.jit.unused
+def adjust_sharpness(img: Image.Image, sharpness_factor: float) -> Image.Image:
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+
+ enhancer = ImageEnhance.Sharpness(img)
+ img = enhancer.enhance(sharpness_factor)
+ return img
+
+
+@torch.jit.unused
+def autocontrast(img: Image.Image) -> Image.Image:
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+ return ImageOps.autocontrast(img)
+
+
+@torch.jit.unused
+def equalize(img: Image.Image) -> Image.Image:
+ if not _is_pil_image(img):
+ raise TypeError(f"img should be PIL Image. Got {type(img)}")
+ return ImageOps.equalize(img)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py
new file mode 100644
index 0000000000..09ae726931
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py
@@ -0,0 +1,987 @@
+import warnings
+from typing import Optional, Tuple, List
+
+import torch
+from torch import Tensor
+from torch.jit.annotations import BroadcastingList2
+from torch.nn.functional import grid_sample, conv2d, interpolate, pad as torch_pad
+
+
+def _is_tensor_a_torch_image(x: Tensor) -> bool:
+ return x.ndim >= 2
+
+
+def _assert_image_tensor(img: Tensor) -> None:
+ if not _is_tensor_a_torch_image(img):
+ raise TypeError("Tensor is not a torch image.")
+
+
+def get_image_size(img: Tensor) -> List[int]:
+ # Returns (w, h) of tensor image
+ _assert_image_tensor(img)
+ return [img.shape[-1], img.shape[-2]]
+
+
+def get_image_num_channels(img: Tensor) -> int:
+ if img.ndim == 2:
+ return 1
+ elif img.ndim > 2:
+ return img.shape[-3]
+
+ raise TypeError(f"Input ndim should be 2 or more. Got {img.ndim}")
+
+
+def _max_value(dtype: torch.dtype) -> float:
+ # TODO: replace this method with torch.iinfo when it gets torchscript support.
+ # https://github.com/pytorch/pytorch/issues/41492
+
+ a = torch.tensor(2, dtype=dtype)
+ signed = 1 if torch.tensor(0, dtype=dtype).is_signed() else 0
+ bits = 1
+ max_value = torch.tensor(-signed, dtype=torch.long)
+ while True:
+ next_value = a.pow(bits - signed).sub(1)
+ if next_value > max_value:
+ max_value = next_value
+ bits *= 2
+ else:
+ break
+ return max_value.item()
+
+
+def _assert_channels(img: Tensor, permitted: List[int]) -> None:
+ c = get_image_num_channels(img)
+ if c not in permitted:
+ raise TypeError(f"Input image tensor permitted channel values are {permitted}, but found {c}")
+
+
+def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor:
+ if image.dtype == dtype:
+ return image
+
+ if image.is_floating_point():
+
+ # TODO: replace with dtype.is_floating_point when torchscript supports it
+ if torch.tensor(0, dtype=dtype).is_floating_point():
+ return image.to(dtype)
+
+ # float to int
+ if (image.dtype == torch.float32 and dtype in (torch.int32, torch.int64)) or (
+ image.dtype == torch.float64 and dtype == torch.int64
+ ):
+ msg = f"The cast from {image.dtype} to {dtype} cannot be performed safely."
+ raise RuntimeError(msg)
+
+ # https://github.com/pytorch/vision/pull/2078#issuecomment-612045321
+ # For data in the range 0-1, (float * 255).to(uint) is only 255
+ # when float is exactly 1.0.
+ # `max + 1 - epsilon` provides more evenly distributed mapping of
+ # ranges of floats to ints.
+ eps = 1e-3
+ max_val = _max_value(dtype)
+ result = image.mul(max_val + 1.0 - eps)
+ return result.to(dtype)
+ else:
+ input_max = _max_value(image.dtype)
+
+ # int to float
+ # TODO: replace with dtype.is_floating_point when torchscript supports it
+ if torch.tensor(0, dtype=dtype).is_floating_point():
+ image = image.to(dtype)
+ return image / input_max
+
+ output_max = _max_value(dtype)
+
+ # int to int
+ if input_max > output_max:
+ # factor should be forced to int for torch jit script
+ # otherwise factor is a float and image // factor can produce different results
+ factor = int((input_max + 1) // (output_max + 1))
+ image = torch.div(image, factor, rounding_mode="floor")
+ return image.to(dtype)
+ else:
+ # factor should be forced to int for torch jit script
+ # otherwise factor is a float and image * factor can produce different results
+ factor = int((output_max + 1) // (input_max + 1))
+ image = image.to(dtype)
+ return image * factor
+
+
+def vflip(img: Tensor) -> Tensor:
+ _assert_image_tensor(img)
+
+ return img.flip(-2)
+
+
+def hflip(img: Tensor) -> Tensor:
+ _assert_image_tensor(img)
+
+ return img.flip(-1)
+
+
+def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
+ _assert_image_tensor(img)
+
+ w, h = get_image_size(img)
+ right = left + width
+ bottom = top + height
+
+ if left < 0 or top < 0 or right > w or bottom > h:
+ padding_ltrb = [max(-left, 0), max(-top, 0), max(right - w, 0), max(bottom - h, 0)]
+ return pad(img[..., max(top, 0) : bottom, max(left, 0) : right], padding_ltrb, fill=0)
+ return img[..., top:bottom, left:right]
+
+
+def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor:
+ if img.ndim < 3:
+ raise TypeError(f"Input image tensor should have at least 3 dimensions, but found {img.ndim}")
+ _assert_channels(img, [3])
+
+ if num_output_channels not in (1, 3):
+ raise ValueError("num_output_channels should be either 1 or 3")
+
+ r, g, b = img.unbind(dim=-3)
+ # This implementation closely follows the TF one:
+ # https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/ops/image_ops_impl.py#L2105-L2138
+ l_img = (0.2989 * r + 0.587 * g + 0.114 * b).to(img.dtype)
+ l_img = l_img.unsqueeze(dim=-3)
+
+ if num_output_channels == 3:
+ return l_img.expand(img.shape)
+
+ return l_img
+
+
+def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor:
+ if brightness_factor < 0:
+ raise ValueError(f"brightness_factor ({brightness_factor}) is not non-negative.")
+
+ _assert_image_tensor(img)
+
+ _assert_channels(img, [1, 3])
+
+ return _blend(img, torch.zeros_like(img), brightness_factor)
+
+
+def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor:
+ if contrast_factor < 0:
+ raise ValueError(f"contrast_factor ({contrast_factor}) is not non-negative.")
+
+ _assert_image_tensor(img)
+
+ _assert_channels(img, [3, 1])
+ c = get_image_num_channels(img)
+ dtype = img.dtype if torch.is_floating_point(img) else torch.float32
+ if c == 3:
+ mean = torch.mean(rgb_to_grayscale(img).to(dtype), dim=(-3, -2, -1), keepdim=True)
+ else:
+ mean = torch.mean(img.to(dtype), dim=(-3, -2, -1), keepdim=True)
+
+ return _blend(img, mean, contrast_factor)
+
+
+def adjust_hue(img: Tensor, hue_factor: float) -> Tensor:
+ if not (-0.5 <= hue_factor <= 0.5):
+ raise ValueError(f"hue_factor ({hue_factor}) is not in [-0.5, 0.5].")
+
+ if not (isinstance(img, torch.Tensor)):
+ raise TypeError("Input img should be Tensor image")
+
+ _assert_image_tensor(img)
+
+ _assert_channels(img, [1, 3])
+ if get_image_num_channels(img) == 1: # Match PIL behaviour
+ return img
+
+ orig_dtype = img.dtype
+ if img.dtype == torch.uint8:
+ img = img.to(dtype=torch.float32) / 255.0
+
+ img = _rgb2hsv(img)
+ h, s, v = img.unbind(dim=-3)
+ h = (h + hue_factor) % 1.0
+ img = torch.stack((h, s, v), dim=-3)
+ img_hue_adj = _hsv2rgb(img)
+
+ if orig_dtype == torch.uint8:
+ img_hue_adj = (img_hue_adj * 255.0).to(dtype=orig_dtype)
+
+ return img_hue_adj
+
+
+def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor:
+ if saturation_factor < 0:
+ raise ValueError(f"saturation_factor ({saturation_factor}) is not non-negative.")
+
+ _assert_image_tensor(img)
+
+ _assert_channels(img, [1, 3])
+
+ if get_image_num_channels(img) == 1: # Match PIL behaviour
+ return img
+
+ return _blend(img, rgb_to_grayscale(img), saturation_factor)
+
+
+def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor:
+ if not isinstance(img, torch.Tensor):
+ raise TypeError("Input img should be a Tensor.")
+
+ _assert_channels(img, [1, 3])
+
+ if gamma < 0:
+ raise ValueError("Gamma should be a non-negative real number")
+
+ result = img
+ dtype = img.dtype
+ if not torch.is_floating_point(img):
+ result = convert_image_dtype(result, torch.float32)
+
+ result = (gain * result ** gamma).clamp(0, 1)
+
+ result = convert_image_dtype(result, dtype)
+ return result
+
+
+def center_crop(img: Tensor, output_size: BroadcastingList2[int]) -> Tensor:
+ """DEPRECATED"""
+ warnings.warn(
+ "This method is deprecated and will be removed in future releases. Please, use ``F.center_crop`` instead."
+ )
+
+ _assert_image_tensor(img)
+
+ _, image_width, image_height = img.size()
+ crop_height, crop_width = output_size
+ # crop_top = int(round((image_height - crop_height) / 2.))
+ # Result can be different between python func and scripted func
+ # Temporary workaround:
+ crop_top = int((image_height - crop_height + 1) * 0.5)
+ # crop_left = int(round((image_width - crop_width) / 2.))
+ # Result can be different between python func and scripted func
+ # Temporary workaround:
+ crop_left = int((image_width - crop_width + 1) * 0.5)
+
+ return crop(img, crop_top, crop_left, crop_height, crop_width)
+
+
+def five_crop(img: Tensor, size: BroadcastingList2[int]) -> List[Tensor]:
+ """DEPRECATED"""
+ warnings.warn(
+ "This method is deprecated and will be removed in future releases. Please, use ``F.five_crop`` instead."
+ )
+
+ _assert_image_tensor(img)
+
+ assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+
+ _, image_width, image_height = img.size()
+ crop_height, crop_width = size
+ if crop_width > image_width or crop_height > image_height:
+ msg = "Requested crop size {} is bigger than input size {}"
+ raise ValueError(msg.format(size, (image_height, image_width)))
+
+ tl = crop(img, 0, 0, crop_width, crop_height)
+ tr = crop(img, image_width - crop_width, 0, image_width, crop_height)
+ bl = crop(img, 0, image_height - crop_height, crop_width, image_height)
+ br = crop(img, image_width - crop_width, image_height - crop_height, image_width, image_height)
+ center = center_crop(img, (crop_height, crop_width))
+
+ return [tl, tr, bl, br, center]
+
+
+def ten_crop(img: Tensor, size: BroadcastingList2[int], vertical_flip: bool = False) -> List[Tensor]:
+ """DEPRECATED"""
+ warnings.warn(
+ "This method is deprecated and will be removed in future releases. Please, use ``F.ten_crop`` instead."
+ )
+
+ _assert_image_tensor(img)
+
+ assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+ first_five = five_crop(img, size)
+
+ if vertical_flip:
+ img = vflip(img)
+ else:
+ img = hflip(img)
+
+ second_five = five_crop(img, size)
+
+ return first_five + second_five
+
+
+def _blend(img1: Tensor, img2: Tensor, ratio: float) -> Tensor:
+ ratio = float(ratio)
+ bound = 1.0 if img1.is_floating_point() else 255.0
+ return (ratio * img1 + (1.0 - ratio) * img2).clamp(0, bound).to(img1.dtype)
+
+
+def _rgb2hsv(img: Tensor) -> Tensor:
+ r, g, b = img.unbind(dim=-3)
+
+ # Implementation is based on https://github.com/python-pillow/Pillow/blob/4174d4267616897df3746d315d5a2d0f82c656ee/
+ # src/libImaging/Convert.c#L330
+ maxc = torch.max(img, dim=-3).values
+ minc = torch.min(img, dim=-3).values
+
+ # The algorithm erases S and H channel where `maxc = minc`. This avoids NaN
+ # from happening in the results, because
+ # + S channel has division by `maxc`, which is zero only if `maxc = minc`
+ # + H channel has division by `(maxc - minc)`.
+ #
+ # Instead of overwriting NaN afterwards, we just prevent it from occuring so
+ # we don't need to deal with it in case we save the NaN in a buffer in
+ # backprop, if it is ever supported, but it doesn't hurt to do so.
+ eqc = maxc == minc
+
+ cr = maxc - minc
+ # Since `eqc => cr = 0`, replacing denominator with 1 when `eqc` is fine.
+ ones = torch.ones_like(maxc)
+ s = cr / torch.where(eqc, ones, maxc)
+ # Note that `eqc => maxc = minc = r = g = b`. So the following calculation
+ # of `h` would reduce to `bc - gc + 2 + rc - bc + 4 + rc - bc = 6` so it
+ # would not matter what values `rc`, `gc`, and `bc` have here, and thus
+ # replacing denominator with 1 when `eqc` is fine.
+ cr_divisor = torch.where(eqc, ones, cr)
+ rc = (maxc - r) / cr_divisor
+ gc = (maxc - g) / cr_divisor
+ bc = (maxc - b) / cr_divisor
+
+ hr = (maxc == r) * (bc - gc)
+ hg = ((maxc == g) & (maxc != r)) * (2.0 + rc - bc)
+ hb = ((maxc != g) & (maxc != r)) * (4.0 + gc - rc)
+ h = hr + hg + hb
+ h = torch.fmod((h / 6.0 + 1.0), 1.0)
+ return torch.stack((h, s, maxc), dim=-3)
+
+
+def _hsv2rgb(img: Tensor) -> Tensor:
+ h, s, v = img.unbind(dim=-3)
+ i = torch.floor(h * 6.0)
+ f = (h * 6.0) - i
+ i = i.to(dtype=torch.int32)
+
+ p = torch.clamp((v * (1.0 - s)), 0.0, 1.0)
+ q = torch.clamp((v * (1.0 - s * f)), 0.0, 1.0)
+ t = torch.clamp((v * (1.0 - s * (1.0 - f))), 0.0, 1.0)
+ i = i % 6
+
+ mask = i.unsqueeze(dim=-3) == torch.arange(6, device=i.device).view(-1, 1, 1)
+
+ a1 = torch.stack((v, q, p, p, t, v), dim=-3)
+ a2 = torch.stack((t, v, v, q, p, p), dim=-3)
+ a3 = torch.stack((p, p, t, v, v, q), dim=-3)
+ a4 = torch.stack((a1, a2, a3), dim=-4)
+
+ return torch.einsum("...ijk, ...xijk -> ...xjk", mask.to(dtype=img.dtype), a4)
+
+
+def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor:
+ # padding is left, right, top, bottom
+
+ # crop if needed
+ if padding[0] < 0 or padding[1] < 0 or padding[2] < 0 or padding[3] < 0:
+ neg_min_padding = [-min(x, 0) for x in padding]
+ crop_left, crop_right, crop_top, crop_bottom = neg_min_padding
+ img = img[..., crop_top : img.shape[-2] - crop_bottom, crop_left : img.shape[-1] - crop_right]
+ padding = [max(x, 0) for x in padding]
+
+ in_sizes = img.size()
+
+ _x_indices = [i for i in range(in_sizes[-1])] # [0, 1, 2, 3, ...]
+ left_indices = [i for i in range(padding[0] - 1, -1, -1)] # e.g. [3, 2, 1, 0]
+ right_indices = [-(i + 1) for i in range(padding[1])] # e.g. [-1, -2, -3]
+ x_indices = torch.tensor(left_indices + _x_indices + right_indices, device=img.device)
+
+ _y_indices = [i for i in range(in_sizes[-2])]
+ top_indices = [i for i in range(padding[2] - 1, -1, -1)]
+ bottom_indices = [-(i + 1) for i in range(padding[3])]
+ y_indices = torch.tensor(top_indices + _y_indices + bottom_indices, device=img.device)
+
+ ndim = img.ndim
+ if ndim == 3:
+ return img[:, y_indices[:, None], x_indices[None, :]]
+ elif ndim == 4:
+ return img[:, :, y_indices[:, None], x_indices[None, :]]
+ else:
+ raise RuntimeError("Symmetric padding of N-D tensors are not supported yet")
+
+
+def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor:
+ _assert_image_tensor(img)
+
+ if not isinstance(padding, (int, tuple, list)):
+ raise TypeError("Got inappropriate padding arg")
+ if not isinstance(fill, (int, float)):
+ raise TypeError("Got inappropriate fill arg")
+ if not isinstance(padding_mode, str):
+ raise TypeError("Got inappropriate padding_mode arg")
+
+ if isinstance(padding, tuple):
+ padding = list(padding)
+
+ if isinstance(padding, list) and len(padding) not in [1, 2, 4]:
+ raise ValueError(f"Padding must be an int or a 1, 2, or 4 element tuple, not a {len(padding)} element tuple")
+
+ if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
+ raise ValueError("Padding mode should be either constant, edge, reflect or symmetric")
+
+ if isinstance(padding, int):
+ if torch.jit.is_scripting():
+ # This maybe unreachable
+ raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]")
+ pad_left = pad_right = pad_top = pad_bottom = padding
+ elif len(padding) == 1:
+ pad_left = pad_right = pad_top = pad_bottom = padding[0]
+ elif len(padding) == 2:
+ pad_left = pad_right = padding[0]
+ pad_top = pad_bottom = padding[1]
+ else:
+ pad_left = padding[0]
+ pad_top = padding[1]
+ pad_right = padding[2]
+ pad_bottom = padding[3]
+
+ p = [pad_left, pad_right, pad_top, pad_bottom]
+
+ if padding_mode == "edge":
+ # remap padding_mode str
+ padding_mode = "replicate"
+ elif padding_mode == "symmetric":
+ # route to another implementation
+ return _pad_symmetric(img, p)
+
+ need_squeeze = False
+ if img.ndim < 4:
+ img = img.unsqueeze(dim=0)
+ need_squeeze = True
+
+ out_dtype = img.dtype
+ need_cast = False
+ if (padding_mode != "constant") and img.dtype not in (torch.float32, torch.float64):
+ # Here we temporary cast input tensor to float
+ # until pytorch issue is resolved :
+ # https://github.com/pytorch/pytorch/issues/40763
+ need_cast = True
+ img = img.to(torch.float32)
+
+ img = torch_pad(img, p, mode=padding_mode, value=float(fill))
+
+ if need_squeeze:
+ img = img.squeeze(dim=0)
+
+ if need_cast:
+ img = img.to(out_dtype)
+
+ return img
+
+
+def resize(
+ img: Tensor,
+ size: List[int],
+ interpolation: str = "bilinear",
+ max_size: Optional[int] = None,
+ antialias: Optional[bool] = None,
+) -> Tensor:
+ _assert_image_tensor(img)
+
+ if not isinstance(size, (int, tuple, list)):
+ raise TypeError("Got inappropriate size arg")
+ if not isinstance(interpolation, str):
+ raise TypeError("Got inappropriate interpolation arg")
+
+ if interpolation not in ["nearest", "bilinear", "bicubic"]:
+ raise ValueError("This interpolation mode is unsupported with Tensor input")
+
+ if isinstance(size, tuple):
+ size = list(size)
+
+ if isinstance(size, list):
+ if len(size) not in [1, 2]:
+ raise ValueError(
+ f"Size must be an int or a 1 or 2 element tuple/list, not a {len(size)} element tuple/list"
+ )
+ if max_size is not None and len(size) != 1:
+ raise ValueError(
+ "max_size should only be passed if size specifies the length of the smaller edge, "
+ "i.e. size should be an int or a sequence of length 1 in torchscript mode."
+ )
+
+ if antialias is None:
+ antialias = False
+
+ if antialias and interpolation not in ["bilinear", "bicubic"]:
+ raise ValueError("Antialias option is supported for bilinear and bicubic interpolation modes only")
+
+ w, h = get_image_size(img)
+
+ if isinstance(size, int) or len(size) == 1: # specified size only for the smallest edge
+ short, long = (w, h) if w <= h else (h, w)
+ requested_new_short = size if isinstance(size, int) else size[0]
+
+ if short == requested_new_short:
+ return img
+
+ new_short, new_long = requested_new_short, int(requested_new_short * long / short)
+
+ if max_size is not None:
+ if max_size <= requested_new_short:
+ raise ValueError(
+ f"max_size = {max_size} must be strictly greater than the requested "
+ f"size for the smaller edge size = {size}"
+ )
+ if new_long > max_size:
+ new_short, new_long = int(max_size * new_short / new_long), max_size
+
+ new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short)
+
+ else: # specified both h and w
+ new_w, new_h = size[1], size[0]
+
+ img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(img, [torch.float32, torch.float64])
+
+ # Define align_corners to avoid warnings
+ align_corners = False if interpolation in ["bilinear", "bicubic"] else None
+
+ if antialias:
+ if interpolation == "bilinear":
+ img = torch.ops.torchvision._interpolate_bilinear2d_aa(img, [new_h, new_w], align_corners=False)
+ elif interpolation == "bicubic":
+ img = torch.ops.torchvision._interpolate_bicubic2d_aa(img, [new_h, new_w], align_corners=False)
+ else:
+ img = interpolate(img, size=[new_h, new_w], mode=interpolation, align_corners=align_corners)
+
+ if interpolation == "bicubic" and out_dtype == torch.uint8:
+ img = img.clamp(min=0, max=255)
+
+ img = _cast_squeeze_out(img, need_cast=need_cast, need_squeeze=need_squeeze, out_dtype=out_dtype)
+
+ return img
+
+
+def _assert_grid_transform_inputs(
+ img: Tensor,
+ matrix: Optional[List[float]],
+ interpolation: str,
+ fill: Optional[List[float]],
+ supported_interpolation_modes: List[str],
+ coeffs: Optional[List[float]] = None,
+) -> None:
+
+ if not (isinstance(img, torch.Tensor)):
+ raise TypeError("Input img should be Tensor")
+
+ _assert_image_tensor(img)
+
+ if matrix is not None and not isinstance(matrix, list):
+ raise TypeError("Argument matrix should be a list")
+
+ if matrix is not None and len(matrix) != 6:
+ raise ValueError("Argument matrix should have 6 float values")
+
+ if coeffs is not None and len(coeffs) != 8:
+ raise ValueError("Argument coeffs should have 8 float values")
+
+ if fill is not None and not isinstance(fill, (int, float, tuple, list)):
+ warnings.warn("Argument fill should be either int, float, tuple or list")
+
+ # Check fill
+ num_channels = get_image_num_channels(img)
+ if isinstance(fill, (tuple, list)) and (len(fill) > 1 and len(fill) != num_channels):
+ msg = (
+ "The number of elements in 'fill' cannot broadcast to match the number of "
+ "channels of the image ({} != {})"
+ )
+ raise ValueError(msg.format(len(fill), num_channels))
+
+ if interpolation not in supported_interpolation_modes:
+ raise ValueError(f"Interpolation mode '{interpolation}' is unsupported with Tensor input")
+
+
+def _cast_squeeze_in(img: Tensor, req_dtypes: List[torch.dtype]) -> Tuple[Tensor, bool, bool, torch.dtype]:
+ need_squeeze = False
+ # make image NCHW
+ if img.ndim < 4:
+ img = img.unsqueeze(dim=0)
+ need_squeeze = True
+
+ out_dtype = img.dtype
+ need_cast = False
+ if out_dtype not in req_dtypes:
+ need_cast = True
+ req_dtype = req_dtypes[0]
+ img = img.to(req_dtype)
+ return img, need_cast, need_squeeze, out_dtype
+
+
+def _cast_squeeze_out(img: Tensor, need_cast: bool, need_squeeze: bool, out_dtype: torch.dtype) -> Tensor:
+ if need_squeeze:
+ img = img.squeeze(dim=0)
+
+ if need_cast:
+ if out_dtype in (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64):
+ # it is better to round before cast
+ img = torch.round(img)
+ img = img.to(out_dtype)
+
+ return img
+
+
+def _apply_grid_transform(img: Tensor, grid: Tensor, mode: str, fill: Optional[List[float]]) -> Tensor:
+
+ img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(
+ img,
+ [
+ grid.dtype,
+ ],
+ )
+
+ if img.shape[0] > 1:
+ # Apply same grid to a batch of images
+ grid = grid.expand(img.shape[0], grid.shape[1], grid.shape[2], grid.shape[3])
+
+ # Append a dummy mask for customized fill colors, should be faster than grid_sample() twice
+ if fill is not None:
+ dummy = torch.ones((img.shape[0], 1, img.shape[2], img.shape[3]), dtype=img.dtype, device=img.device)
+ img = torch.cat((img, dummy), dim=1)
+
+ img = grid_sample(img, grid, mode=mode, padding_mode="zeros", align_corners=False)
+
+ # Fill with required color
+ if fill is not None:
+ mask = img[:, -1:, :, :] # N * 1 * H * W
+ img = img[:, :-1, :, :] # N * C * H * W
+ mask = mask.expand_as(img)
+ len_fill = len(fill) if isinstance(fill, (tuple, list)) else 1
+ fill_img = torch.tensor(fill, dtype=img.dtype, device=img.device).view(1, len_fill, 1, 1).expand_as(img)
+ if mode == "nearest":
+ mask = mask < 0.5
+ img[mask] = fill_img[mask]
+ else: # 'bilinear'
+ img = img * mask + (1.0 - mask) * fill_img
+
+ img = _cast_squeeze_out(img, need_cast, need_squeeze, out_dtype)
+ return img
+
+
+def _gen_affine_grid(
+ theta: Tensor,
+ w: int,
+ h: int,
+ ow: int,
+ oh: int,
+) -> Tensor:
+ # https://github.com/pytorch/pytorch/blob/74b65c32be68b15dc7c9e8bb62459efbfbde33d8/aten/src/ATen/native/
+ # AffineGridGenerator.cpp#L18
+ # Difference with AffineGridGenerator is that:
+ # 1) we normalize grid values after applying theta
+ # 2) we can normalize by other image size, such that it covers "extend" option like in PIL.Image.rotate
+
+ d = 0.5
+ base_grid = torch.empty(1, oh, ow, 3, dtype=theta.dtype, device=theta.device)
+ x_grid = torch.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, steps=ow, device=theta.device)
+ base_grid[..., 0].copy_(x_grid)
+ y_grid = torch.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, steps=oh, device=theta.device).unsqueeze_(-1)
+ base_grid[..., 1].copy_(y_grid)
+ base_grid[..., 2].fill_(1)
+
+ rescaled_theta = theta.transpose(1, 2) / torch.tensor([0.5 * w, 0.5 * h], dtype=theta.dtype, device=theta.device)
+ output_grid = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta)
+ return output_grid.view(1, oh, ow, 2)
+
+
+def affine(
+ img: Tensor, matrix: List[float], interpolation: str = "nearest", fill: Optional[List[float]] = None
+) -> Tensor:
+ _assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"])
+
+ dtype = img.dtype if torch.is_floating_point(img) else torch.float32
+ theta = torch.tensor(matrix, dtype=dtype, device=img.device).reshape(1, 2, 3)
+ shape = img.shape
+ # grid will be generated on the same device as theta and img
+ grid = _gen_affine_grid(theta, w=shape[-1], h=shape[-2], ow=shape[-1], oh=shape[-2])
+ return _apply_grid_transform(img, grid, interpolation, fill=fill)
+
+
+def _compute_output_size(matrix: List[float], w: int, h: int) -> Tuple[int, int]:
+
+ # Inspired of PIL implementation:
+ # https://github.com/python-pillow/Pillow/blob/11de3318867e4398057373ee9f12dcb33db7335c/src/PIL/Image.py#L2054
+
+ # pts are Top-Left, Top-Right, Bottom-Left, Bottom-Right points.
+ pts = torch.tensor(
+ [
+ [-0.5 * w, -0.5 * h, 1.0],
+ [-0.5 * w, 0.5 * h, 1.0],
+ [0.5 * w, 0.5 * h, 1.0],
+ [0.5 * w, -0.5 * h, 1.0],
+ ]
+ )
+ theta = torch.tensor(matrix, dtype=torch.float).reshape(1, 2, 3)
+ new_pts = pts.view(1, 4, 3).bmm(theta.transpose(1, 2)).view(4, 2)
+ min_vals, _ = new_pts.min(dim=0)
+ max_vals, _ = new_pts.max(dim=0)
+
+ # Truncate precision to 1e-4 to avoid ceil of Xe-15 to 1.0
+ tol = 1e-4
+ cmax = torch.ceil((max_vals / tol).trunc_() * tol)
+ cmin = torch.floor((min_vals / tol).trunc_() * tol)
+ size = cmax - cmin
+ return int(size[0]), int(size[1])
+
+
+def rotate(
+ img: Tensor,
+ matrix: List[float],
+ interpolation: str = "nearest",
+ expand: bool = False,
+ fill: Optional[List[float]] = None,
+) -> Tensor:
+ _assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"])
+ w, h = img.shape[-1], img.shape[-2]
+ ow, oh = _compute_output_size(matrix, w, h) if expand else (w, h)
+ dtype = img.dtype if torch.is_floating_point(img) else torch.float32
+ theta = torch.tensor(matrix, dtype=dtype, device=img.device).reshape(1, 2, 3)
+ # grid will be generated on the same device as theta and img
+ grid = _gen_affine_grid(theta, w=w, h=h, ow=ow, oh=oh)
+
+ return _apply_grid_transform(img, grid, interpolation, fill=fill)
+
+
+def _perspective_grid(coeffs: List[float], ow: int, oh: int, dtype: torch.dtype, device: torch.device) -> Tensor:
+ # https://github.com/python-pillow/Pillow/blob/4634eafe3c695a014267eefdce830b4a825beed7/
+ # src/libImaging/Geometry.c#L394
+
+ #
+ # x_out = (coeffs[0] * x + coeffs[1] * y + coeffs[2]) / (coeffs[6] * x + coeffs[7] * y + 1)
+ # y_out = (coeffs[3] * x + coeffs[4] * y + coeffs[5]) / (coeffs[6] * x + coeffs[7] * y + 1)
+ #
+ theta1 = torch.tensor(
+ [[[coeffs[0], coeffs[1], coeffs[2]], [coeffs[3], coeffs[4], coeffs[5]]]], dtype=dtype, device=device
+ )
+ theta2 = torch.tensor([[[coeffs[6], coeffs[7], 1.0], [coeffs[6], coeffs[7], 1.0]]], dtype=dtype, device=device)
+
+ d = 0.5
+ base_grid = torch.empty(1, oh, ow, 3, dtype=dtype, device=device)
+ x_grid = torch.linspace(d, ow * 1.0 + d - 1.0, steps=ow, device=device)
+ base_grid[..., 0].copy_(x_grid)
+ y_grid = torch.linspace(d, oh * 1.0 + d - 1.0, steps=oh, device=device).unsqueeze_(-1)
+ base_grid[..., 1].copy_(y_grid)
+ base_grid[..., 2].fill_(1)
+
+ rescaled_theta1 = theta1.transpose(1, 2) / torch.tensor([0.5 * ow, 0.5 * oh], dtype=dtype, device=device)
+ output_grid1 = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta1)
+ output_grid2 = base_grid.view(1, oh * ow, 3).bmm(theta2.transpose(1, 2))
+
+ output_grid = output_grid1 / output_grid2 - 1.0
+ return output_grid.view(1, oh, ow, 2)
+
+
+def perspective(
+ img: Tensor, perspective_coeffs: List[float], interpolation: str = "bilinear", fill: Optional[List[float]] = None
+) -> Tensor:
+ if not (isinstance(img, torch.Tensor)):
+ raise TypeError("Input img should be Tensor.")
+
+ _assert_image_tensor(img)
+
+ _assert_grid_transform_inputs(
+ img,
+ matrix=None,
+ interpolation=interpolation,
+ fill=fill,
+ supported_interpolation_modes=["nearest", "bilinear"],
+ coeffs=perspective_coeffs,
+ )
+
+ ow, oh = img.shape[-1], img.shape[-2]
+ dtype = img.dtype if torch.is_floating_point(img) else torch.float32
+ grid = _perspective_grid(perspective_coeffs, ow=ow, oh=oh, dtype=dtype, device=img.device)
+ return _apply_grid_transform(img, grid, interpolation, fill=fill)
+
+
+def _get_gaussian_kernel1d(kernel_size: int, sigma: float) -> Tensor:
+ ksize_half = (kernel_size - 1) * 0.5
+
+ x = torch.linspace(-ksize_half, ksize_half, steps=kernel_size)
+ pdf = torch.exp(-0.5 * (x / sigma).pow(2))
+ kernel1d = pdf / pdf.sum()
+
+ return kernel1d
+
+
+def _get_gaussian_kernel2d(
+ kernel_size: List[int], sigma: List[float], dtype: torch.dtype, device: torch.device
+) -> Tensor:
+ kernel1d_x = _get_gaussian_kernel1d(kernel_size[0], sigma[0]).to(device, dtype=dtype)
+ kernel1d_y = _get_gaussian_kernel1d(kernel_size[1], sigma[1]).to(device, dtype=dtype)
+ kernel2d = torch.mm(kernel1d_y[:, None], kernel1d_x[None, :])
+ return kernel2d
+
+
+def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: List[float]) -> Tensor:
+ if not (isinstance(img, torch.Tensor)):
+ raise TypeError(f"img should be Tensor. Got {type(img)}")
+
+ _assert_image_tensor(img)
+
+ dtype = img.dtype if torch.is_floating_point(img) else torch.float32
+ kernel = _get_gaussian_kernel2d(kernel_size, sigma, dtype=dtype, device=img.device)
+ kernel = kernel.expand(img.shape[-3], 1, kernel.shape[0], kernel.shape[1])
+
+ img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(
+ img,
+ [
+ kernel.dtype,
+ ],
+ )
+
+ # padding = (left, right, top, bottom)
+ padding = [kernel_size[0] // 2, kernel_size[0] // 2, kernel_size[1] // 2, kernel_size[1] // 2]
+ img = torch_pad(img, padding, mode="reflect")
+ img = conv2d(img, kernel, groups=img.shape[-3])
+
+ img = _cast_squeeze_out(img, need_cast, need_squeeze, out_dtype)
+ return img
+
+
+def invert(img: Tensor) -> Tensor:
+
+ _assert_image_tensor(img)
+
+ if img.ndim < 3:
+ raise TypeError(f"Input image tensor should have at least 3 dimensions, but found {img.ndim}")
+
+ _assert_channels(img, [1, 3])
+
+ bound = torch.tensor(1 if img.is_floating_point() else 255, dtype=img.dtype, device=img.device)
+ return bound - img
+
+
+def posterize(img: Tensor, bits: int) -> Tensor:
+
+ _assert_image_tensor(img)
+
+ if img.ndim < 3:
+ raise TypeError(f"Input image tensor should have at least 3 dimensions, but found {img.ndim}")
+ if img.dtype != torch.uint8:
+ raise TypeError(f"Only torch.uint8 image tensors are supported, but found {img.dtype}")
+
+ _assert_channels(img, [1, 3])
+ mask = -int(2 ** (8 - bits)) # JIT-friendly for: ~(2 ** (8 - bits) - 1)
+ return img & mask
+
+
+def solarize(img: Tensor, threshold: float) -> Tensor:
+
+ _assert_image_tensor(img)
+
+ if img.ndim < 3:
+ raise TypeError(f"Input image tensor should have at least 3 dimensions, but found {img.ndim}")
+
+ _assert_channels(img, [1, 3])
+
+ inverted_img = invert(img)
+ return torch.where(img >= threshold, inverted_img, img)
+
+
+def _blurred_degenerate_image(img: Tensor) -> Tensor:
+ dtype = img.dtype if torch.is_floating_point(img) else torch.float32
+
+ kernel = torch.ones((3, 3), dtype=dtype, device=img.device)
+ kernel[1, 1] = 5.0
+ kernel /= kernel.sum()
+ kernel = kernel.expand(img.shape[-3], 1, kernel.shape[0], kernel.shape[1])
+
+ result_tmp, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(
+ img,
+ [
+ kernel.dtype,
+ ],
+ )
+ result_tmp = conv2d(result_tmp, kernel, groups=result_tmp.shape[-3])
+ result_tmp = _cast_squeeze_out(result_tmp, need_cast, need_squeeze, out_dtype)
+
+ result = img.clone()
+ result[..., 1:-1, 1:-1] = result_tmp
+
+ return result
+
+
+def adjust_sharpness(img: Tensor, sharpness_factor: float) -> Tensor:
+ if sharpness_factor < 0:
+ raise ValueError(f"sharpness_factor ({sharpness_factor}) is not non-negative.")
+
+ _assert_image_tensor(img)
+
+ _assert_channels(img, [1, 3])
+
+ if img.size(-1) <= 2 or img.size(-2) <= 2:
+ return img
+
+ return _blend(img, _blurred_degenerate_image(img), sharpness_factor)
+
+
+def autocontrast(img: Tensor) -> Tensor:
+
+ _assert_image_tensor(img)
+
+ if img.ndim < 3:
+ raise TypeError(f"Input image tensor should have at least 3 dimensions, but found {img.ndim}")
+
+ _assert_channels(img, [1, 3])
+
+ bound = 1.0 if img.is_floating_point() else 255.0
+ dtype = img.dtype if torch.is_floating_point(img) else torch.float32
+
+ minimum = img.amin(dim=(-2, -1), keepdim=True).to(dtype)
+ maximum = img.amax(dim=(-2, -1), keepdim=True).to(dtype)
+ eq_idxs = torch.where(minimum == maximum)[0]
+ minimum[eq_idxs] = 0
+ maximum[eq_idxs] = bound
+ scale = bound / (maximum - minimum)
+
+ return ((img - minimum) * scale).clamp(0, bound).to(img.dtype)
+
+
+def _scale_channel(img_chan: Tensor) -> Tensor:
+ # TODO: we should expect bincount to always be faster than histc, but this
+ # isn't always the case. Once
+ # https://github.com/pytorch/pytorch/issues/53194 is fixed, remove the if
+ # block and only use bincount.
+ if img_chan.is_cuda:
+ hist = torch.histc(img_chan.to(torch.float32), bins=256, min=0, max=255)
+ else:
+ hist = torch.bincount(img_chan.view(-1), minlength=256)
+
+ nonzero_hist = hist[hist != 0]
+ step = torch.div(nonzero_hist[:-1].sum(), 255, rounding_mode="floor")
+ if step == 0:
+ return img_chan
+
+ lut = torch.div(torch.cumsum(hist, 0) + torch.div(step, 2, rounding_mode="floor"), step, rounding_mode="floor")
+ lut = torch.nn.functional.pad(lut, [1, 0])[:-1].clamp(0, 255)
+
+ return lut[img_chan.to(torch.int64)].to(torch.uint8)
+
+
+def _equalize_single_image(img: Tensor) -> Tensor:
+ return torch.stack([_scale_channel(img[c]) for c in range(img.size(0))])
+
+
+def equalize(img: Tensor) -> Tensor:
+
+ _assert_image_tensor(img)
+
+ if not (3 <= img.ndim <= 4):
+ raise TypeError(f"Input image tensor should have 3 or 4 dimensions, but found {img.ndim}")
+ if img.dtype != torch.uint8:
+ raise TypeError(f"Only torch.uint8 image tensors are supported, but found {img.dtype}")
+
+ _assert_channels(img, [1, 3])
+
+ if img.ndim == 3:
+ return _equalize_single_image(img)
+
+ return torch.stack([_equalize_single_image(x) for x in img])
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py
new file mode 100644
index 0000000000..b81deed6d4
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py
@@ -0,0 +1,238 @@
+import torch
+import torchvision.transforms.functional as F
+from torch import Tensor
+from torch.jit.annotations import Optional, List, BroadcastingList2, Tuple
+
+
+def _is_tensor_a_torch_image(input):
+ return len(input.shape) == 3
+
+
+def vflip(img):
+ # type: (Tensor) -> Tensor
+ """Vertically flip the given the Image Tensor.
+
+ Args:
+ img (Tensor): Image Tensor to be flipped in the form [C, H, W].
+
+ Returns:
+ Tensor: Vertically flipped image Tensor.
+ """
+ if not _is_tensor_a_torch_image(img):
+ raise TypeError('tensor is not a torch image.')
+
+ return img.flip(-2)
+
+
+def hflip(img):
+ # type: (Tensor) -> Tensor
+ """Horizontally flip the given the Image Tensor.
+
+ Args:
+ img (Tensor): Image Tensor to be flipped in the form [C, H, W].
+
+ Returns:
+ Tensor: Horizontally flipped image Tensor.
+ """
+ if not _is_tensor_a_torch_image(img):
+ raise TypeError('tensor is not a torch image.')
+
+ return img.flip(-1)
+
+
+def crop(img, top, left, height, width):
+ # type: (Tensor, int, int, int, int) -> Tensor
+ """Crop the given Image Tensor.
+
+ Args:
+ img (Tensor): Image to be cropped in the form [C, H, W]. (0,0) denotes the top left corner of the image.
+ top (int): Vertical component of the top left corner of the crop box.
+ left (int): Horizontal component of the top left corner of the crop box.
+ height (int): Height of the crop box.
+ width (int): Width of the crop box.
+
+ Returns:
+ Tensor: Cropped image.
+ """
+ if not _is_tensor_a_torch_image(img):
+ raise TypeError('tensor is not a torch image.')
+
+ return img[..., top:top + height, left:left + width]
+
+
+def rgb_to_grayscale(img):
+ # type: (Tensor) -> Tensor
+ """Convert the given RGB Image Tensor to Grayscale.
+ For RGB to Grayscale conversion, ITU-R 601-2 luma transform is performed which
+ is L = R * 0.2989 + G * 0.5870 + B * 0.1140
+
+ Args:
+ img (Tensor): Image to be converted to Grayscale in the form [C, H, W].
+
+ Returns:
+ Tensor: Grayscale image.
+
+ """
+ if img.shape[0] != 3:
+ raise TypeError('Input Image does not contain 3 Channels')
+
+ return (0.2989 * img[0] + 0.5870 * img[1] + 0.1140 * img[2]).to(img.dtype)
+
+
+def adjust_brightness(img, brightness_factor):
+ # type: (Tensor, float) -> Tensor
+ """Adjust brightness of an RGB image.
+
+ Args:
+ img (Tensor): Image to be adjusted.
+ brightness_factor (float): How much to adjust the brightness. Can be
+ any non negative number. 0 gives a black image, 1 gives the
+ original image while 2 increases the brightness by a factor of 2.
+
+ Returns:
+ Tensor: Brightness adjusted image.
+ """
+ if not _is_tensor_a_torch_image(img):
+ raise TypeError('tensor is not a torch image.')
+
+ return _blend(img, torch.zeros_like(img), brightness_factor)
+
+
+def adjust_contrast(img, contrast_factor):
+ # type: (Tensor, float) -> Tensor
+ """Adjust contrast of an RGB image.
+
+ Args:
+ img (Tensor): Image to be adjusted.
+ contrast_factor (float): How much to adjust the contrast. Can be any
+ non negative number. 0 gives a solid gray image, 1 gives the
+ original image while 2 increases the contrast by a factor of 2.
+
+ Returns:
+ Tensor: Contrast adjusted image.
+ """
+ if not _is_tensor_a_torch_image(img):
+ raise TypeError('tensor is not a torch image.')
+
+ mean = torch.mean(rgb_to_grayscale(img).to(torch.float))
+
+ return _blend(img, mean, contrast_factor)
+
+
+def adjust_saturation(img, saturation_factor):
+ # type: (Tensor, float) -> Tensor
+ """Adjust color saturation of an RGB image.
+
+ Args:
+ img (Tensor): Image to be adjusted.
+ saturation_factor (float): How much to adjust the saturation. 0 will
+ give a black and white image, 1 will give the original image while
+ 2 will enhance the saturation by a factor of 2.
+
+ Returns:
+ Tensor: Saturation adjusted image.
+ """
+ if not _is_tensor_a_torch_image(img):
+ raise TypeError('tensor is not a torch image.')
+
+ return _blend(img, rgb_to_grayscale(img), saturation_factor)
+
+
+def center_crop(img, output_size):
+ # type: (Tensor, BroadcastingList2[int]) -> Tensor
+ """Crop the Image Tensor and resize it to desired size.
+
+ Args:
+ img (Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
+ output_size (sequence or int): (height, width) of the crop box. If int,
+ it is used for both directions
+
+ Returns:
+ Tensor: Cropped image.
+ """
+ if not _is_tensor_a_torch_image(img):
+ raise TypeError('tensor is not a torch image.')
+
+ _, image_width, image_height = img.size()
+ crop_height, crop_width = output_size
+ crop_top = int(round((image_height - crop_height) / 2.))
+ crop_left = int(round((image_width - crop_width) / 2.))
+
+ return crop(img, crop_top, crop_left, crop_height, crop_width)
+
+
+def five_crop(img, size):
+ # type: (Tensor, BroadcastingList2[int]) -> List[Tensor]
+ """Crop the given Image Tensor into four corners and the central crop.
+ .. Note::
+ This transform returns a List of Tensors and there may be a
+ mismatch in the number of inputs and targets your ``Dataset`` returns.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made.
+
+ Returns:
+ List: List (tl, tr, bl, br, center)
+ Corresponding top left, top right, bottom left, bottom right and center crop.
+ """
+ if not _is_tensor_a_torch_image(img):
+ raise TypeError('tensor is not a torch image.')
+
+ assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+
+ _, image_width, image_height = img.size()
+ crop_height, crop_width = size
+ if crop_width > image_width or crop_height > image_height:
+ msg = "Requested crop size {} is bigger than input size {}"
+ raise ValueError(msg.format(size, (image_height, image_width)))
+
+ tl = crop(img, 0, 0, crop_width, crop_height)
+ tr = crop(img, image_width - crop_width, 0, image_width, crop_height)
+ bl = crop(img, 0, image_height - crop_height, crop_width, image_height)
+ br = crop(img, image_width - crop_width, image_height - crop_height, image_width, image_height)
+ center = center_crop(img, (crop_height, crop_width))
+
+ return [tl, tr, bl, br, center]
+
+
+def ten_crop(img, size, vertical_flip=False):
+ # type: (Tensor, BroadcastingList2[int], bool) -> List[Tensor]
+ """Crop the given Image Tensor into four corners and the central crop plus the
+ flipped version of these (horizontal flipping is used by default).
+ .. Note::
+ This transform returns a List of images and there may be a
+ mismatch in the number of inputs and targets your ``Dataset`` returns.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made.
+ vertical_flip (bool): Use vertical flipping instead of horizontal
+
+ Returns:
+ List: List (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip)
+ Corresponding top left, top right, bottom left, bottom right and center crop
+ and same for the flipped image's tensor.
+ """
+ if not _is_tensor_a_torch_image(img):
+ raise TypeError('tensor is not a torch image.')
+
+ assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+ first_five = five_crop(img, size)
+
+ if vertical_flip:
+ img = vflip(img)
+ else:
+ img = hflip(img)
+
+ second_five = five_crop(img, size)
+
+ return first_five + second_five
+
+
+def _blend(img1, img2, ratio):
+ # type: (Tensor, Tensor, float) -> Tensor
+ bound = 1 if img1.dtype in [torch.half, torch.float32, torch.float64] else 255
+ return (ratio * img1 + (1 - ratio) * img2).clamp(0, bound).to(img1.dtype)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py
new file mode 100644
index 0000000000..a409ff3cbb
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py
@@ -0,0 +1,2016 @@
+import math
+import numbers
+import random
+import warnings
+from collections.abc import Sequence
+from typing import Tuple, List, Optional
+
+import torch
+from torch import Tensor
+
+try:
+ import accimage
+except ImportError:
+ accimage = None
+
+from . import functional as F
+from .functional import InterpolationMode, _interpolation_modes_from_int
+
+
+__all__ = [
+ "Compose",
+ "ToTensor",
+ "PILToTensor",
+ "ConvertImageDtype",
+ "ToPILImage",
+ "Normalize",
+ "Resize",
+ "Scale",
+ "CenterCrop",
+ "Pad",
+ "Lambda",
+ "RandomApply",
+ "RandomChoice",
+ "RandomOrder",
+ "RandomCrop",
+ "RandomHorizontalFlip",
+ "RandomVerticalFlip",
+ "RandomResizedCrop",
+ "RandomSizedCrop",
+ "FiveCrop",
+ "TenCrop",
+ "LinearTransformation",
+ "ColorJitter",
+ "RandomRotation",
+ "RandomAffine",
+ "Grayscale",
+ "RandomGrayscale",
+ "RandomPerspective",
+ "RandomErasing",
+ "GaussianBlur",
+ "InterpolationMode",
+ "RandomInvert",
+ "RandomPosterize",
+ "RandomSolarize",
+ "RandomAdjustSharpness",
+ "RandomAutocontrast",
+ "RandomEqualize",
+]
+
+
+class Compose:
+ """Composes several transforms together. This transform does not support torchscript.
+ Please, see the note below.
+
+ Args:
+ transforms (list of ``Transform`` objects): list of transforms to compose.
+
+ Example:
+ >>> transforms.Compose([
+ >>> transforms.CenterCrop(10),
+ >>> transforms.PILToTensor(),
+ >>> transforms.ConvertImageDtype(torch.float),
+ >>> ])
+
+ .. note::
+ In order to script the transformations, please use ``torch.nn.Sequential`` as below.
+
+ >>> transforms = torch.nn.Sequential(
+ >>> transforms.CenterCrop(10),
+ >>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+ >>> )
+ >>> scripted_transforms = torch.jit.script(transforms)
+
+ Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
+ `lambda` functions or ``PIL.Image``.
+
+ """
+
+ def __init__(self, transforms):
+ self.transforms = transforms
+
+ def __call__(self, img):
+ for t in self.transforms:
+ img = t(img)
+ return img
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + "("
+ for t in self.transforms:
+ format_string += "\n"
+ format_string += f" {t}"
+ format_string += "\n)"
+ return format_string
+
+
+class ToTensor:
+ """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. This transform does not support torchscript.
+
+ Converts a PIL Image or numpy.ndarray (H x W x C) in the range
+ [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
+ if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
+ or if the numpy.ndarray has dtype = np.uint8
+
+ In the other cases, tensors are returned without scaling.
+
+ .. note::
+ Because the input image is scaled to [0.0, 1.0], this transformation should not be used when
+ transforming target image masks. See the `references`_ for implementing the transforms for image masks.
+
+ .. _references: https://github.com/pytorch/vision/tree/main/references/segmentation
+ """
+
+ def __call__(self, pic):
+ """
+ Args:
+ pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+
+ Returns:
+ Tensor: Converted image.
+ """
+ return F.to_tensor(pic)
+
+ def __repr__(self):
+ return self.__class__.__name__ + "()"
+
+
+class PILToTensor:
+ """Convert a ``PIL Image`` to a tensor of the same type. This transform does not support torchscript.
+
+ Converts a PIL Image (H x W x C) to a Tensor of shape (C x H x W).
+ """
+
+ def __call__(self, pic):
+ """
+ .. note::
+
+ A deep copy of the underlying array is performed.
+
+ Args:
+ pic (PIL Image): Image to be converted to tensor.
+
+ Returns:
+ Tensor: Converted image.
+ """
+ return F.pil_to_tensor(pic)
+
+ def __repr__(self):
+ return self.__class__.__name__ + "()"
+
+
+class ConvertImageDtype(torch.nn.Module):
+ """Convert a tensor image to the given ``dtype`` and scale the values accordingly
+ This function does not support PIL Image.
+
+ Args:
+ dtype (torch.dtype): Desired data type of the output
+
+ .. note::
+
+ When converting from a smaller to a larger integer ``dtype`` the maximum values are **not** mapped exactly.
+ If converted back and forth, this mismatch has no effect.
+
+ Raises:
+ RuntimeError: When trying to cast :class:`torch.float32` to :class:`torch.int32` or :class:`torch.int64` as
+ well as for trying to cast :class:`torch.float64` to :class:`torch.int64`. These conversions might lead to
+ overflow errors since the floating point ``dtype`` cannot store consecutive integers over the whole range
+ of the integer ``dtype``.
+ """
+
+ def __init__(self, dtype: torch.dtype) -> None:
+ super().__init__()
+ self.dtype = dtype
+
+ def forward(self, image):
+ return F.convert_image_dtype(image, self.dtype)
+
+
+class ToPILImage:
+ """Convert a tensor or an ndarray to PIL Image. This transform does not support torchscript.
+
+ Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape
+ H x W x C to a PIL Image while preserving the value range.
+
+ Args:
+ mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
+ If ``mode`` is ``None`` (default) there are some assumptions made about the input data:
+ - If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``.
+ - If the input has 3 channels, the ``mode`` is assumed to be ``RGB``.
+ - If the input has 2 channels, the ``mode`` is assumed to be ``LA``.
+ - If the input has 1 channel, the ``mode`` is determined by the data type (i.e ``int``, ``float``,
+ ``short``).
+
+ .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes
+ """
+
+ def __init__(self, mode=None):
+ self.mode = mode
+
+ def __call__(self, pic):
+ """
+ Args:
+ pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
+
+ Returns:
+ PIL Image: Image converted to PIL Image.
+
+ """
+ return F.to_pil_image(pic, self.mode)
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + "("
+ if self.mode is not None:
+ format_string += f"mode={self.mode}"
+ format_string += ")"
+ return format_string
+
+
+class Normalize(torch.nn.Module):
+ """Normalize a tensor image with mean and standard deviation.
+ This transform does not support PIL Image.
+ Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
+ channels, this transform will normalize each channel of the input
+ ``torch.*Tensor`` i.e.,
+ ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
+
+ .. note::
+ This transform acts out of place, i.e., it does not mutate the input tensor.
+
+ Args:
+ mean (sequence): Sequence of means for each channel.
+ std (sequence): Sequence of standard deviations for each channel.
+ inplace(bool,optional): Bool to make this operation in-place.
+
+ """
+
+ def __init__(self, mean, std, inplace=False):
+ super().__init__()
+ self.mean = mean
+ self.std = std
+ self.inplace = inplace
+
+ def forward(self, tensor: Tensor) -> Tensor:
+ """
+ Args:
+ tensor (Tensor): Tensor image to be normalized.
+
+ Returns:
+ Tensor: Normalized Tensor image.
+ """
+ return F.normalize(tensor, self.mean, self.std, self.inplace)
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(mean={self.mean}, std={self.std})"
+
+
+class Resize(torch.nn.Module):
+ """Resize the input image to the given size.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+ .. warning::
+ The output image might be different depending on its type: when downsampling, the interpolation of PIL images
+ and tensors is slightly different, because PIL applies antialiasing. This may lead to significant differences
+ in the performance of a network. Therefore, it is preferable to train and serve a model with the same input
+ types. See also below the ``antialias`` parameter, which can help making the output of PIL images and tensors
+ closer.
+
+ Args:
+ size (sequence or int): Desired output size. If size is a sequence like
+ (h, w), output size will be matched to this. If size is an int,
+ smaller edge of the image will be matched to this number.
+ i.e, if height > width, then image will be rescaled to
+ (size * height / width, size).
+
+ .. note::
+ In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
+ interpolation (InterpolationMode): Desired interpolation enum defined by
+ :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
+ If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
+ ``InterpolationMode.BICUBIC`` are supported.
+ For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+ max_size (int, optional): The maximum allowed for the longer edge of
+ the resized image: if the longer edge of the image is greater
+ than ``max_size`` after being resized according to ``size``, then
+ the image is resized again so that the longer edge is equal to
+ ``max_size``. As a result, ``size`` might be overruled, i.e the
+ smaller edge may be shorter than ``size``. This is only supported
+ if ``size`` is an int (or a sequence of length 1 in torchscript
+ mode).
+ antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias
+ is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for
+ ``InterpolationMode.BILINEAR`` only mode. This can help making the output for PIL images and tensors
+ closer.
+
+ .. warning::
+ There is no autodiff support for ``antialias=True`` option with input ``img`` as Tensor.
+
+ """
+
+ def __init__(self, size, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None):
+ super().__init__()
+ if not isinstance(size, (int, Sequence)):
+ raise TypeError(f"Size should be int or sequence. Got {type(size)}")
+ if isinstance(size, Sequence) and len(size) not in (1, 2):
+ raise ValueError("If size is a sequence, it should have 1 or 2 values")
+ self.size = size
+ self.max_size = max_size
+
+ # Backward compatibility with integer value
+ if isinstance(interpolation, int):
+ warnings.warn(
+ "Argument interpolation should be of type InterpolationMode instead of int. "
+ "Please, use InterpolationMode enum."
+ )
+ interpolation = _interpolation_modes_from_int(interpolation)
+
+ self.interpolation = interpolation
+ self.antialias = antialias
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be scaled.
+
+ Returns:
+ PIL Image or Tensor: Rescaled image.
+ """
+ return F.resize(img, self.size, self.interpolation, self.max_size, self.antialias)
+
+ def __repr__(self):
+ detail = f"(size={self.size}, interpolation={self.interpolation.value}, max_size={self.max_size}, antialias={self.antialias})"
+ return self.__class__.__name__ + detail
+
+
+class Scale(Resize):
+ """
+ Note: This transform is deprecated in favor of Resize.
+ """
+
+ def __init__(self, *args, **kwargs):
+ warnings.warn("The use of the transforms.Scale transform is deprecated, please use transforms.Resize instead.")
+ super().__init__(*args, **kwargs)
+
+
+class CenterCrop(torch.nn.Module):
+ """Crops the given image at the center.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+ If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+ """
+
+ def __init__(self, size):
+ super().__init__()
+ self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be cropped.
+
+ Returns:
+ PIL Image or Tensor: Cropped image.
+ """
+ return F.center_crop(img, self.size)
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(size={self.size})"
+
+
+class Pad(torch.nn.Module):
+ """Pad the given image on all sides with the given "pad" value.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric,
+ at most 3 leading dimensions for mode edge,
+ and an arbitrary number of leading dimensions for mode constant
+
+ Args:
+ padding (int or sequence): Padding on each border. If a single int is provided this
+ is used to pad all borders. If sequence of length 2 is provided this is the padding
+ on left/right and top/bottom respectively. If a sequence of length 4 is provided
+ this is the padding for the left, top, right and bottom borders respectively.
+
+ .. note::
+ In torchscript mode padding as single int is not supported, use a sequence of
+ length 1: ``[padding, ]``.
+ fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
+ length 3, it is used to fill R, G, B channels respectively.
+ This value is only used when the padding_mode is constant.
+ Only number is supported for torch Tensor.
+ Only int or str or tuple value is supported for PIL Image.
+ padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
+ Default is constant.
+
+ - constant: pads with a constant value, this value is specified with fill
+
+ - edge: pads with the last value at the edge of the image.
+ If input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2
+
+ - reflect: pads with reflection of image without repeating the last value on the edge.
+ For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+ will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+ - symmetric: pads with reflection of image repeating the last value on the edge.
+ For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+ will result in [2, 1, 1, 2, 3, 4, 4, 3]
+ """
+
+ def __init__(self, padding, fill=0, padding_mode="constant"):
+ super().__init__()
+ if not isinstance(padding, (numbers.Number, tuple, list)):
+ raise TypeError("Got inappropriate padding arg")
+
+ if not isinstance(fill, (numbers.Number, str, tuple)):
+ raise TypeError("Got inappropriate fill arg")
+
+ if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
+ raise ValueError("Padding mode should be either constant, edge, reflect or symmetric")
+
+ if isinstance(padding, Sequence) and len(padding) not in [1, 2, 4]:
+ raise ValueError(
+ f"Padding must be an int or a 1, 2, or 4 element tuple, not a {len(padding)} element tuple"
+ )
+
+ self.padding = padding
+ self.fill = fill
+ self.padding_mode = padding_mode
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be padded.
+
+ Returns:
+ PIL Image or Tensor: Padded image.
+ """
+ return F.pad(img, self.padding, self.fill, self.padding_mode)
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(padding={self.padding}, fill={self.fill}, padding_mode={self.padding_mode})"
+
+
+class Lambda:
+ """Apply a user-defined lambda as a transform. This transform does not support torchscript.
+
+ Args:
+ lambd (function): Lambda/function to be used for transform.
+ """
+
+ def __init__(self, lambd):
+ if not callable(lambd):
+ raise TypeError(f"Argument lambd should be callable, got {repr(type(lambd).__name__)}")
+ self.lambd = lambd
+
+ def __call__(self, img):
+ return self.lambd(img)
+
+ def __repr__(self):
+ return self.__class__.__name__ + "()"
+
+
+class RandomTransforms:
+ """Base class for a list of transformations with randomness
+
+ Args:
+ transforms (sequence): list of transformations
+ """
+
+ def __init__(self, transforms):
+ if not isinstance(transforms, Sequence):
+ raise TypeError("Argument transforms should be a sequence")
+ self.transforms = transforms
+
+ def __call__(self, *args, **kwargs):
+ raise NotImplementedError()
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + "("
+ for t in self.transforms:
+ format_string += "\n"
+ format_string += f" {t}"
+ format_string += "\n)"
+ return format_string
+
+
+class RandomApply(torch.nn.Module):
+ """Apply randomly a list of transformations with a given probability.
+
+ .. note::
+ In order to script the transformation, please use ``torch.nn.ModuleList`` as input instead of list/tuple of
+ transforms as shown below:
+
+ >>> transforms = transforms.RandomApply(torch.nn.ModuleList([
+ >>> transforms.ColorJitter(),
+ >>> ]), p=0.3)
+ >>> scripted_transforms = torch.jit.script(transforms)
+
+ Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
+ `lambda` functions or ``PIL.Image``.
+
+ Args:
+ transforms (sequence or torch.nn.Module): list of transformations
+ p (float): probability
+ """
+
+ def __init__(self, transforms, p=0.5):
+ super().__init__()
+ self.transforms = transforms
+ self.p = p
+
+ def forward(self, img):
+ if self.p < torch.rand(1):
+ return img
+ for t in self.transforms:
+ img = t(img)
+ return img
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + "("
+ format_string += f"\n p={self.p}"
+ for t in self.transforms:
+ format_string += "\n"
+ format_string += f" {t}"
+ format_string += "\n)"
+ return format_string
+
+
+class RandomOrder(RandomTransforms):
+ """Apply a list of transformations in a random order. This transform does not support torchscript."""
+
+ def __call__(self, img):
+ order = list(range(len(self.transforms)))
+ random.shuffle(order)
+ for i in order:
+ img = self.transforms[i](img)
+ return img
+
+
+class RandomChoice(RandomTransforms):
+ """Apply single transformation randomly picked from a list. This transform does not support torchscript."""
+
+ def __init__(self, transforms, p=None):
+ super().__init__(transforms)
+ if p is not None and not isinstance(p, Sequence):
+ raise TypeError("Argument p should be a sequence")
+ self.p = p
+
+ def __call__(self, *args):
+ t = random.choices(self.transforms, weights=self.p)[0]
+ return t(*args)
+
+ def __repr__(self):
+ format_string = super().__repr__()
+ format_string += f"(p={self.p})"
+ return format_string
+
+
+class RandomCrop(torch.nn.Module):
+ """Crop the given image at a random location.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions,
+ but if non-constant padding is used, the input is expected to have at most 2 leading dimensions
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+ padding (int or sequence, optional): Optional padding on each border
+ of the image. Default is None. If a single int is provided this
+ is used to pad all borders. If sequence of length 2 is provided this is the padding
+ on left/right and top/bottom respectively. If a sequence of length 4 is provided
+ this is the padding for the left, top, right and bottom borders respectively.
+
+ .. note::
+ In torchscript mode padding as single int is not supported, use a sequence of
+ length 1: ``[padding, ]``.
+ pad_if_needed (boolean): It will pad the image if smaller than the
+ desired size to avoid raising an exception. Since cropping is done
+ after padding, the padding seems to be done at a random offset.
+ fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
+ length 3, it is used to fill R, G, B channels respectively.
+ This value is only used when the padding_mode is constant.
+ Only number is supported for torch Tensor.
+ Only int or str or tuple value is supported for PIL Image.
+ padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
+ Default is constant.
+
+ - constant: pads with a constant value, this value is specified with fill
+
+ - edge: pads with the last value at the edge of the image.
+ If input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2
+
+ - reflect: pads with reflection of image without repeating the last value on the edge.
+ For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+ will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+ - symmetric: pads with reflection of image repeating the last value on the edge.
+ For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+ will result in [2, 1, 1, 2, 3, 4, 4, 3]
+ """
+
+ @staticmethod
+ def get_params(img: Tensor, output_size: Tuple[int, int]) -> Tuple[int, int, int, int]:
+ """Get parameters for ``crop`` for a random crop.
+
+ Args:
+ img (PIL Image or Tensor): Image to be cropped.
+ output_size (tuple): Expected output size of the crop.
+
+ Returns:
+ tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
+ """
+ w, h = F.get_image_size(img)
+ th, tw = output_size
+
+ if h + 1 < th or w + 1 < tw:
+ raise ValueError(f"Required crop size {(th, tw)} is larger then input image size {(h, w)}")
+
+ if w == tw and h == th:
+ return 0, 0, h, w
+
+ i = torch.randint(0, h - th + 1, size=(1,)).item()
+ j = torch.randint(0, w - tw + 1, size=(1,)).item()
+ return i, j, th, tw
+
+ def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode="constant"):
+ super().__init__()
+
+ self.size = tuple(_setup_size(size, error_msg="Please provide only two dimensions (h, w) for size."))
+
+ self.padding = padding
+ self.pad_if_needed = pad_if_needed
+ self.fill = fill
+ self.padding_mode = padding_mode
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be cropped.
+
+ Returns:
+ PIL Image or Tensor: Cropped image.
+ """
+ if self.padding is not None:
+ img = F.pad(img, self.padding, self.fill, self.padding_mode)
+
+ width, height = F.get_image_size(img)
+ # pad the width if needed
+ if self.pad_if_needed and width < self.size[1]:
+ padding = [self.size[1] - width, 0]
+ img = F.pad(img, padding, self.fill, self.padding_mode)
+ # pad the height if needed
+ if self.pad_if_needed and height < self.size[0]:
+ padding = [0, self.size[0] - height]
+ img = F.pad(img, padding, self.fill, self.padding_mode)
+
+ i, j, h, w = self.get_params(img, self.size)
+
+ return F.crop(img, i, j, h, w)
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(size={self.size}, padding={self.padding})"
+
+
+class RandomHorizontalFlip(torch.nn.Module):
+ """Horizontally flip the given image randomly with a given probability.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading
+ dimensions
+
+ Args:
+ p (float): probability of the image being flipped. Default value is 0.5
+ """
+
+ def __init__(self, p=0.5):
+ super().__init__()
+ self.p = p
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be flipped.
+
+ Returns:
+ PIL Image or Tensor: Randomly flipped image.
+ """
+ if torch.rand(1) < self.p:
+ return F.hflip(img)
+ return img
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(p={self.p})"
+
+
+class RandomVerticalFlip(torch.nn.Module):
+ """Vertically flip the given image randomly with a given probability.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading
+ dimensions
+
+ Args:
+ p (float): probability of the image being flipped. Default value is 0.5
+ """
+
+ def __init__(self, p=0.5):
+ super().__init__()
+ self.p = p
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be flipped.
+
+ Returns:
+ PIL Image or Tensor: Randomly flipped image.
+ """
+ if torch.rand(1) < self.p:
+ return F.vflip(img)
+ return img
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(p={self.p})"
+
+
+class RandomPerspective(torch.nn.Module):
+ """Performs a random perspective transformation of the given image with a given probability.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+
+ Args:
+ distortion_scale (float): argument to control the degree of distortion and ranges from 0 to 1.
+ Default is 0.5.
+ p (float): probability of the image being transformed. Default is 0.5.
+ interpolation (InterpolationMode): Desired interpolation enum defined by
+ :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
+ If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+ For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+ fill (sequence or number): Pixel fill value for the area outside the transformed
+ image. Default is ``0``. If given a number, the value is used for all bands respectively.
+ """
+
+ def __init__(self, distortion_scale=0.5, p=0.5, interpolation=InterpolationMode.BILINEAR, fill=0):
+ super().__init__()
+ self.p = p
+
+ # Backward compatibility with integer value
+ if isinstance(interpolation, int):
+ warnings.warn(
+ "Argument interpolation should be of type InterpolationMode instead of int. "
+ "Please, use InterpolationMode enum."
+ )
+ interpolation = _interpolation_modes_from_int(interpolation)
+
+ self.interpolation = interpolation
+ self.distortion_scale = distortion_scale
+
+ if fill is None:
+ fill = 0
+ elif not isinstance(fill, (Sequence, numbers.Number)):
+ raise TypeError("Fill should be either a sequence or a number.")
+
+ self.fill = fill
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be Perspectively transformed.
+
+ Returns:
+ PIL Image or Tensor: Randomly transformed image.
+ """
+
+ fill = self.fill
+ if isinstance(img, Tensor):
+ if isinstance(fill, (int, float)):
+ fill = [float(fill)] * F.get_image_num_channels(img)
+ else:
+ fill = [float(f) for f in fill]
+
+ if torch.rand(1) < self.p:
+ width, height = F.get_image_size(img)
+ startpoints, endpoints = self.get_params(width, height, self.distortion_scale)
+ return F.perspective(img, startpoints, endpoints, self.interpolation, fill)
+ return img
+
+ @staticmethod
+ def get_params(width: int, height: int, distortion_scale: float) -> Tuple[List[List[int]], List[List[int]]]:
+ """Get parameters for ``perspective`` for a random perspective transform.
+
+ Args:
+ width (int): width of the image.
+ height (int): height of the image.
+ distortion_scale (float): argument to control the degree of distortion and ranges from 0 to 1.
+
+ Returns:
+ List containing [top-left, top-right, bottom-right, bottom-left] of the original image,
+ List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image.
+ """
+ half_height = height // 2
+ half_width = width // 2
+ topleft = [
+ int(torch.randint(0, int(distortion_scale * half_width) + 1, size=(1,)).item()),
+ int(torch.randint(0, int(distortion_scale * half_height) + 1, size=(1,)).item()),
+ ]
+ topright = [
+ int(torch.randint(width - int(distortion_scale * half_width) - 1, width, size=(1,)).item()),
+ int(torch.randint(0, int(distortion_scale * half_height) + 1, size=(1,)).item()),
+ ]
+ botright = [
+ int(torch.randint(width - int(distortion_scale * half_width) - 1, width, size=(1,)).item()),
+ int(torch.randint(height - int(distortion_scale * half_height) - 1, height, size=(1,)).item()),
+ ]
+ botleft = [
+ int(torch.randint(0, int(distortion_scale * half_width) + 1, size=(1,)).item()),
+ int(torch.randint(height - int(distortion_scale * half_height) - 1, height, size=(1,)).item()),
+ ]
+ startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]]
+ endpoints = [topleft, topright, botright, botleft]
+ return startpoints, endpoints
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(p={self.p})"
+
+
+class RandomResizedCrop(torch.nn.Module):
+ """Crop a random portion of image and resize it to a given size.
+
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+
+ A crop of the original image is made: the crop has a random area (H * W)
+ and a random aspect ratio. This crop is finally resized to the given
+ size. This is popularly used to train the Inception networks.
+
+ Args:
+ size (int or sequence): expected output size of the crop, for each edge. If size is an
+ int instead of sequence like (h, w), a square output size ``(size, size)`` is
+ made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+
+ .. note::
+ In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
+ scale (tuple of float): Specifies the lower and upper bounds for the random area of the crop,
+ before resizing. The scale is defined with respect to the area of the original image.
+ ratio (tuple of float): lower and upper bounds for the random aspect ratio of the crop, before
+ resizing.
+ interpolation (InterpolationMode): Desired interpolation enum defined by
+ :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
+ If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and
+ ``InterpolationMode.BICUBIC`` are supported.
+ For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+
+ """
+
+ def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation=InterpolationMode.BILINEAR):
+ super().__init__()
+ self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
+
+ if not isinstance(scale, Sequence):
+ raise TypeError("Scale should be a sequence")
+ if not isinstance(ratio, Sequence):
+ raise TypeError("Ratio should be a sequence")
+ if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+ warnings.warn("Scale and ratio should be of kind (min, max)")
+
+ # Backward compatibility with integer value
+ if isinstance(interpolation, int):
+ warnings.warn(
+ "Argument interpolation should be of type InterpolationMode instead of int. "
+ "Please, use InterpolationMode enum."
+ )
+ interpolation = _interpolation_modes_from_int(interpolation)
+
+ self.interpolation = interpolation
+ self.scale = scale
+ self.ratio = ratio
+
+ @staticmethod
+ def get_params(img: Tensor, scale: List[float], ratio: List[float]) -> Tuple[int, int, int, int]:
+ """Get parameters for ``crop`` for a random sized crop.
+
+ Args:
+ img (PIL Image or Tensor): Input image.
+ scale (list): range of scale of the origin size cropped
+ ratio (list): range of aspect ratio of the origin aspect ratio cropped
+
+ Returns:
+ tuple: params (i, j, h, w) to be passed to ``crop`` for a random
+ sized crop.
+ """
+ width, height = F.get_image_size(img)
+ area = height * width
+
+ log_ratio = torch.log(torch.tensor(ratio))
+ for _ in range(10):
+ target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item()
+ aspect_ratio = torch.exp(torch.empty(1).uniform_(log_ratio[0], log_ratio[1])).item()
+
+ w = int(round(math.sqrt(target_area * aspect_ratio)))
+ h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+ if 0 < w <= width and 0 < h <= height:
+ i = torch.randint(0, height - h + 1, size=(1,)).item()
+ j = torch.randint(0, width - w + 1, size=(1,)).item()
+ return i, j, h, w
+
+ # Fallback to central crop
+ in_ratio = float(width) / float(height)
+ if in_ratio < min(ratio):
+ w = width
+ h = int(round(w / min(ratio)))
+ elif in_ratio > max(ratio):
+ h = height
+ w = int(round(h * max(ratio)))
+ else: # whole image
+ w = width
+ h = height
+ i = (height - h) // 2
+ j = (width - w) // 2
+ return i, j, h, w
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be cropped and resized.
+
+ Returns:
+ PIL Image or Tensor: Randomly cropped and resized image.
+ """
+ i, j, h, w = self.get_params(img, self.scale, self.ratio)
+ return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
+
+ def __repr__(self):
+ interpolate_str = self.interpolation.value
+ format_string = self.__class__.__name__ + f"(size={self.size}"
+ format_string += f", scale={tuple(round(s, 4) for s in self.scale)}"
+ format_string += f", ratio={tuple(round(r, 4) for r in self.ratio)}"
+ format_string += f", interpolation={interpolate_str})"
+ return format_string
+
+
+class RandomSizedCrop(RandomResizedCrop):
+ """
+ Note: This transform is deprecated in favor of RandomResizedCrop.
+ """
+
+ def __init__(self, *args, **kwargs):
+ warnings.warn(
+ "The use of the transforms.RandomSizedCrop transform is deprecated, "
+ + "please use transforms.RandomResizedCrop instead."
+ )
+ super().__init__(*args, **kwargs)
+
+
+class FiveCrop(torch.nn.Module):
+ """Crop the given image into four corners and the central crop.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading
+ dimensions
+
+ .. Note::
+ This transform returns a tuple of images and there may be a mismatch in the number of
+ inputs and targets your Dataset returns. See below for an example of how to deal with
+ this.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an ``int``
+ instead of sequence like (h, w), a square crop of size (size, size) is made.
+ If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+
+ Example:
+ >>> transform = Compose([
+ >>> FiveCrop(size), # this is a list of PIL Images
+ >>> Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor
+ >>> ])
+ >>> #In your test loop you can do the following:
+ >>> input, target = batch # input is a 5d tensor, target is 2d
+ >>> bs, ncrops, c, h, w = input.size()
+ >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops
+ >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops
+ """
+
+ def __init__(self, size):
+ super().__init__()
+ self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be cropped.
+
+ Returns:
+ tuple of 5 images. Image can be PIL Image or Tensor
+ """
+ return F.five_crop(img, self.size)
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(size={self.size})"
+
+
+class TenCrop(torch.nn.Module):
+ """Crop the given image into four corners and the central crop plus the flipped version of
+ these (horizontal flipping is used by default).
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading
+ dimensions
+
+ .. Note::
+ This transform returns a tuple of images and there may be a mismatch in the number of
+ inputs and targets your Dataset returns. See below for an example of how to deal with
+ this.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
+ vertical_flip (bool): Use vertical flipping instead of horizontal
+
+ Example:
+ >>> transform = Compose([
+ >>> TenCrop(size), # this is a list of PIL Images
+ >>> Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor
+ >>> ])
+ >>> #In your test loop you can do the following:
+ >>> input, target = batch # input is a 5d tensor, target is 2d
+ >>> bs, ncrops, c, h, w = input.size()
+ >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops
+ >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops
+ """
+
+ def __init__(self, size, vertical_flip=False):
+ super().__init__()
+ self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
+ self.vertical_flip = vertical_flip
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be cropped.
+
+ Returns:
+ tuple of 10 images. Image can be PIL Image or Tensor
+ """
+ return F.ten_crop(img, self.size, self.vertical_flip)
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(size={self.size}, vertical_flip={self.vertical_flip})"
+
+
+class LinearTransformation(torch.nn.Module):
+ """Transform a tensor image with a square transformation matrix and a mean_vector computed
+ offline.
+ This transform does not support PIL Image.
+ Given transformation_matrix and mean_vector, will flatten the torch.*Tensor and
+ subtract mean_vector from it which is then followed by computing the dot
+ product with the transformation matrix and then reshaping the tensor to its
+ original shape.
+
+ Applications:
+ whitening transformation: Suppose X is a column vector zero-centered data.
+ Then compute the data covariance matrix [D x D] with torch.mm(X.t(), X),
+ perform SVD on this matrix and pass it as transformation_matrix.
+
+ Args:
+ transformation_matrix (Tensor): tensor [D x D], D = C x H x W
+ mean_vector (Tensor): tensor [D], D = C x H x W
+ """
+
+ def __init__(self, transformation_matrix, mean_vector):
+ super().__init__()
+ if transformation_matrix.size(0) != transformation_matrix.size(1):
+ raise ValueError(
+ "transformation_matrix should be square. Got "
+ f"{tuple(transformation_matrix.size())} rectangular matrix."
+ )
+
+ if mean_vector.size(0) != transformation_matrix.size(0):
+ raise ValueError(
+ f"mean_vector should have the same length {mean_vector.size(0)}"
+ f" as any one of the dimensions of the transformation_matrix [{tuple(transformation_matrix.size())}]"
+ )
+
+ if transformation_matrix.device != mean_vector.device:
+ raise ValueError(
+ f"Input tensors should be on the same device. Got {transformation_matrix.device} and {mean_vector.device}"
+ )
+
+ self.transformation_matrix = transformation_matrix
+ self.mean_vector = mean_vector
+
+ def forward(self, tensor: Tensor) -> Tensor:
+ """
+ Args:
+ tensor (Tensor): Tensor image to be whitened.
+
+ Returns:
+ Tensor: Transformed image.
+ """
+ shape = tensor.shape
+ n = shape[-3] * shape[-2] * shape[-1]
+ if n != self.transformation_matrix.shape[0]:
+ raise ValueError(
+ "Input tensor and transformation matrix have incompatible shape."
+ + f"[{shape[-3]} x {shape[-2]} x {shape[-1]}] != "
+ + f"{self.transformation_matrix.shape[0]}"
+ )
+
+ if tensor.device.type != self.mean_vector.device.type:
+ raise ValueError(
+ "Input tensor should be on the same device as transformation matrix and mean vector. "
+ f"Got {tensor.device} vs {self.mean_vector.device}"
+ )
+
+ flat_tensor = tensor.view(-1, n) - self.mean_vector
+ transformed_tensor = torch.mm(flat_tensor, self.transformation_matrix)
+ tensor = transformed_tensor.view(shape)
+ return tensor
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + "(transformation_matrix="
+ format_string += str(self.transformation_matrix.tolist()) + ")"
+ format_string += ", (mean_vector=" + str(self.mean_vector.tolist()) + ")"
+ return format_string
+
+
+class ColorJitter(torch.nn.Module):
+ """Randomly change the brightness, contrast, saturation and hue of an image.
+ If the image is torch Tensor, it is expected
+ to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
+ If img is PIL Image, mode "1", "I", "F" and modes with transparency (alpha channel) are not supported.
+
+ Args:
+ brightness (float or tuple of float (min, max)): How much to jitter brightness.
+ brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
+ or the given [min, max]. Should be non negative numbers.
+ contrast (float or tuple of float (min, max)): How much to jitter contrast.
+ contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
+ or the given [min, max]. Should be non negative numbers.
+ saturation (float or tuple of float (min, max)): How much to jitter saturation.
+ saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
+ or the given [min, max]. Should be non negative numbers.
+ hue (float or tuple of float (min, max)): How much to jitter hue.
+ hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
+ Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
+ """
+
+ def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
+ super().__init__()
+ self.brightness = self._check_input(brightness, "brightness")
+ self.contrast = self._check_input(contrast, "contrast")
+ self.saturation = self._check_input(saturation, "saturation")
+ self.hue = self._check_input(hue, "hue", center=0, bound=(-0.5, 0.5), clip_first_on_zero=False)
+
+ @torch.jit.unused
+ def _check_input(self, value, name, center=1, bound=(0, float("inf")), clip_first_on_zero=True):
+ if isinstance(value, numbers.Number):
+ if value < 0:
+ raise ValueError(f"If {name} is a single number, it must be non negative.")
+ value = [center - float(value), center + float(value)]
+ if clip_first_on_zero:
+ value[0] = max(value[0], 0.0)
+ elif isinstance(value, (tuple, list)) and len(value) == 2:
+ if not bound[0] <= value[0] <= value[1] <= bound[1]:
+ raise ValueError(f"{name} values should be between {bound}")
+ else:
+ raise TypeError(f"{name} should be a single number or a list/tuple with length 2.")
+
+ # if value is 0 or (1., 1.) for brightness/contrast/saturation
+ # or (0., 0.) for hue, do nothing
+ if value[0] == value[1] == center:
+ value = None
+ return value
+
+ @staticmethod
+ def get_params(
+ brightness: Optional[List[float]],
+ contrast: Optional[List[float]],
+ saturation: Optional[List[float]],
+ hue: Optional[List[float]],
+ ) -> Tuple[Tensor, Optional[float], Optional[float], Optional[float], Optional[float]]:
+ """Get the parameters for the randomized transform to be applied on image.
+
+ Args:
+ brightness (tuple of float (min, max), optional): The range from which the brightness_factor is chosen
+ uniformly. Pass None to turn off the transformation.
+ contrast (tuple of float (min, max), optional): The range from which the contrast_factor is chosen
+ uniformly. Pass None to turn off the transformation.
+ saturation (tuple of float (min, max), optional): The range from which the saturation_factor is chosen
+ uniformly. Pass None to turn off the transformation.
+ hue (tuple of float (min, max), optional): The range from which the hue_factor is chosen uniformly.
+ Pass None to turn off the transformation.
+
+ Returns:
+ tuple: The parameters used to apply the randomized transform
+ along with their random order.
+ """
+ fn_idx = torch.randperm(4)
+
+ b = None if brightness is None else float(torch.empty(1).uniform_(brightness[0], brightness[1]))
+ c = None if contrast is None else float(torch.empty(1).uniform_(contrast[0], contrast[1]))
+ s = None if saturation is None else float(torch.empty(1).uniform_(saturation[0], saturation[1]))
+ h = None if hue is None else float(torch.empty(1).uniform_(hue[0], hue[1]))
+
+ return fn_idx, b, c, s, h
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Input image.
+
+ Returns:
+ PIL Image or Tensor: Color jittered image.
+ """
+ fn_idx, brightness_factor, contrast_factor, saturation_factor, hue_factor = self.get_params(
+ self.brightness, self.contrast, self.saturation, self.hue
+ )
+
+ for fn_id in fn_idx:
+ if fn_id == 0 and brightness_factor is not None:
+ img = F.adjust_brightness(img, brightness_factor)
+ elif fn_id == 1 and contrast_factor is not None:
+ img = F.adjust_contrast(img, contrast_factor)
+ elif fn_id == 2 and saturation_factor is not None:
+ img = F.adjust_saturation(img, saturation_factor)
+ elif fn_id == 3 and hue_factor is not None:
+ img = F.adjust_hue(img, hue_factor)
+
+ return img
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + "("
+ format_string += f"brightness={self.brightness}"
+ format_string += f", contrast={self.contrast}"
+ format_string += f", saturation={self.saturation}"
+ format_string += f", hue={self.hue})"
+ return format_string
+
+
+class RandomRotation(torch.nn.Module):
+ """Rotate the image by angle.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+
+ Args:
+ degrees (sequence or number): Range of degrees to select from.
+ If degrees is a number instead of sequence like (min, max), the range of degrees
+ will be (-degrees, +degrees).
+ interpolation (InterpolationMode): Desired interpolation enum defined by
+ :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
+ If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+ For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+ expand (bool, optional): Optional expansion flag.
+ If true, expands the output to make it large enough to hold the entire rotated image.
+ If false or omitted, make the output image the same size as the input image.
+ Note that the expand flag assumes rotation around the center and no translation.
+ center (sequence, optional): Optional center of rotation, (x, y). Origin is the upper left corner.
+ Default is the center of the image.
+ fill (sequence or number): Pixel fill value for the area outside the rotated
+ image. Default is ``0``. If given a number, the value is used for all bands respectively.
+ resample (int, optional): deprecated argument and will be removed since v0.10.0.
+ Please use the ``interpolation`` parameter instead.
+
+ .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
+
+ """
+
+ def __init__(
+ self, degrees, interpolation=InterpolationMode.NEAREST, expand=False, center=None, fill=0, resample=None
+ ):
+ super().__init__()
+ if resample is not None:
+ warnings.warn(
+ "Argument resample is deprecated and will be removed since v0.10.0. Please, use interpolation instead"
+ )
+ interpolation = _interpolation_modes_from_int(resample)
+
+ # Backward compatibility with integer value
+ if isinstance(interpolation, int):
+ warnings.warn(
+ "Argument interpolation should be of type InterpolationMode instead of int. "
+ "Please, use InterpolationMode enum."
+ )
+ interpolation = _interpolation_modes_from_int(interpolation)
+
+ self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2,))
+
+ if center is not None:
+ _check_sequence_input(center, "center", req_sizes=(2,))
+
+ self.center = center
+
+ self.resample = self.interpolation = interpolation
+ self.expand = expand
+
+ if fill is None:
+ fill = 0
+ elif not isinstance(fill, (Sequence, numbers.Number)):
+ raise TypeError("Fill should be either a sequence or a number.")
+
+ self.fill = fill
+
+ @staticmethod
+ def get_params(degrees: List[float]) -> float:
+ """Get parameters for ``rotate`` for a random rotation.
+
+ Returns:
+ float: angle parameter to be passed to ``rotate`` for random rotation.
+ """
+ angle = float(torch.empty(1).uniform_(float(degrees[0]), float(degrees[1])).item())
+ return angle
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be rotated.
+
+ Returns:
+ PIL Image or Tensor: Rotated image.
+ """
+ fill = self.fill
+ if isinstance(img, Tensor):
+ if isinstance(fill, (int, float)):
+ fill = [float(fill)] * F.get_image_num_channels(img)
+ else:
+ fill = [float(f) for f in fill]
+ angle = self.get_params(self.degrees)
+
+ return F.rotate(img, angle, self.resample, self.expand, self.center, fill)
+
+ def __repr__(self):
+ interpolate_str = self.interpolation.value
+ format_string = self.__class__.__name__ + f"(degrees={self.degrees}"
+ format_string += f", interpolation={interpolate_str}"
+ format_string += f", expand={self.expand}"
+ if self.center is not None:
+ format_string += f", center={self.center}"
+ if self.fill is not None:
+ format_string += f", fill={self.fill}"
+ format_string += ")"
+ return format_string
+
+
+class RandomAffine(torch.nn.Module):
+ """Random affine transformation of the image keeping center invariant.
+ If the image is torch Tensor, it is expected
+ to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+
+ Args:
+ degrees (sequence or number): Range of degrees to select from.
+ If degrees is a number instead of sequence like (min, max), the range of degrees
+ will be (-degrees, +degrees). Set to 0 to deactivate rotations.
+ translate (tuple, optional): tuple of maximum absolute fraction for horizontal
+ and vertical translations. For example translate=(a, b), then horizontal shift
+ is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is
+ randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default.
+ scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is
+ randomly sampled from the range a <= scale <= b. Will keep original scale by default.
+ shear (sequence or number, optional): Range of degrees to select from.
+ If shear is a number, a shear parallel to the x axis in the range (-shear, +shear)
+ will be applied. Else if shear is a sequence of 2 values a shear parallel to the x axis in the
+ range (shear[0], shear[1]) will be applied. Else if shear is a sequence of 4 values,
+ a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied.
+ Will not apply shear by default.
+ interpolation (InterpolationMode): Desired interpolation enum defined by
+ :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
+ If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
+ For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable.
+ fill (sequence or number): Pixel fill value for the area outside the transformed
+ image. Default is ``0``. If given a number, the value is used for all bands respectively.
+ fillcolor (sequence or number, optional): deprecated argument and will be removed since v0.10.0.
+ Please use the ``fill`` parameter instead.
+ resample (int, optional): deprecated argument and will be removed since v0.10.0.
+ Please use the ``interpolation`` parameter instead.
+
+ .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
+
+ """
+
+ def __init__(
+ self,
+ degrees,
+ translate=None,
+ scale=None,
+ shear=None,
+ interpolation=InterpolationMode.NEAREST,
+ fill=0,
+ fillcolor=None,
+ resample=None,
+ ):
+ super().__init__()
+ if resample is not None:
+ warnings.warn(
+ "Argument resample is deprecated and will be removed since v0.10.0. Please, use interpolation instead"
+ )
+ interpolation = _interpolation_modes_from_int(resample)
+
+ # Backward compatibility with integer value
+ if isinstance(interpolation, int):
+ warnings.warn(
+ "Argument interpolation should be of type InterpolationMode instead of int. "
+ "Please, use InterpolationMode enum."
+ )
+ interpolation = _interpolation_modes_from_int(interpolation)
+
+ if fillcolor is not None:
+ warnings.warn(
+ "Argument fillcolor is deprecated and will be removed since v0.10.0. Please, use fill instead"
+ )
+ fill = fillcolor
+
+ self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2,))
+
+ if translate is not None:
+ _check_sequence_input(translate, "translate", req_sizes=(2,))
+ for t in translate:
+ if not (0.0 <= t <= 1.0):
+ raise ValueError("translation values should be between 0 and 1")
+ self.translate = translate
+
+ if scale is not None:
+ _check_sequence_input(scale, "scale", req_sizes=(2,))
+ for s in scale:
+ if s <= 0:
+ raise ValueError("scale values should be positive")
+ self.scale = scale
+
+ if shear is not None:
+ self.shear = _setup_angle(shear, name="shear", req_sizes=(2, 4))
+ else:
+ self.shear = shear
+
+ self.resample = self.interpolation = interpolation
+
+ if fill is None:
+ fill = 0
+ elif not isinstance(fill, (Sequence, numbers.Number)):
+ raise TypeError("Fill should be either a sequence or a number.")
+
+ self.fillcolor = self.fill = fill
+
+ @staticmethod
+ def get_params(
+ degrees: List[float],
+ translate: Optional[List[float]],
+ scale_ranges: Optional[List[float]],
+ shears: Optional[List[float]],
+ img_size: List[int],
+ ) -> Tuple[float, Tuple[int, int], float, Tuple[float, float]]:
+ """Get parameters for affine transformation
+
+ Returns:
+ params to be passed to the affine transformation
+ """
+ angle = float(torch.empty(1).uniform_(float(degrees[0]), float(degrees[1])).item())
+ if translate is not None:
+ max_dx = float(translate[0] * img_size[0])
+ max_dy = float(translate[1] * img_size[1])
+ tx = int(round(torch.empty(1).uniform_(-max_dx, max_dx).item()))
+ ty = int(round(torch.empty(1).uniform_(-max_dy, max_dy).item()))
+ translations = (tx, ty)
+ else:
+ translations = (0, 0)
+
+ if scale_ranges is not None:
+ scale = float(torch.empty(1).uniform_(scale_ranges[0], scale_ranges[1]).item())
+ else:
+ scale = 1.0
+
+ shear_x = shear_y = 0.0
+ if shears is not None:
+ shear_x = float(torch.empty(1).uniform_(shears[0], shears[1]).item())
+ if len(shears) == 4:
+ shear_y = float(torch.empty(1).uniform_(shears[2], shears[3]).item())
+
+ shear = (shear_x, shear_y)
+
+ return angle, translations, scale, shear
+
+ def forward(self, img):
+ """
+ img (PIL Image or Tensor): Image to be transformed.
+
+ Returns:
+ PIL Image or Tensor: Affine transformed image.
+ """
+ fill = self.fill
+ if isinstance(img, Tensor):
+ if isinstance(fill, (int, float)):
+ fill = [float(fill)] * F.get_image_num_channels(img)
+ else:
+ fill = [float(f) for f in fill]
+
+ img_size = F.get_image_size(img)
+
+ ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img_size)
+
+ return F.affine(img, *ret, interpolation=self.interpolation, fill=fill)
+
+ def __repr__(self):
+ s = "{name}(degrees={degrees}"
+ if self.translate is not None:
+ s += ", translate={translate}"
+ if self.scale is not None:
+ s += ", scale={scale}"
+ if self.shear is not None:
+ s += ", shear={shear}"
+ if self.interpolation != InterpolationMode.NEAREST:
+ s += ", interpolation={interpolation}"
+ if self.fill != 0:
+ s += ", fill={fill}"
+ s += ")"
+ d = dict(self.__dict__)
+ d["interpolation"] = self.interpolation.value
+ return s.format(name=self.__class__.__name__, **d)
+
+
+class Grayscale(torch.nn.Module):
+ """Convert image to grayscale.
+ If the image is torch Tensor, it is expected
+ to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
+
+ Args:
+ num_output_channels (int): (1 or 3) number of channels desired for output image
+
+ Returns:
+ PIL Image: Grayscale version of the input.
+
+ - If ``num_output_channels == 1`` : returned image is single channel
+ - If ``num_output_channels == 3`` : returned image is 3 channel with r == g == b
+
+ """
+
+ def __init__(self, num_output_channels=1):
+ super().__init__()
+ self.num_output_channels = num_output_channels
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be converted to grayscale.
+
+ Returns:
+ PIL Image or Tensor: Grayscaled image.
+ """
+ return F.rgb_to_grayscale(img, num_output_channels=self.num_output_channels)
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(num_output_channels={self.num_output_channels})"
+
+
+class RandomGrayscale(torch.nn.Module):
+ """Randomly convert image to grayscale with a probability of p (default 0.1).
+ If the image is torch Tensor, it is expected
+ to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
+
+ Args:
+ p (float): probability that image should be converted to grayscale.
+
+ Returns:
+ PIL Image or Tensor: Grayscale version of the input image with probability p and unchanged
+ with probability (1-p).
+ - If input image is 1 channel: grayscale version is 1 channel
+ - If input image is 3 channel: grayscale version is 3 channel with r == g == b
+
+ """
+
+ def __init__(self, p=0.1):
+ super().__init__()
+ self.p = p
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be converted to grayscale.
+
+ Returns:
+ PIL Image or Tensor: Randomly grayscaled image.
+ """
+ num_output_channels = F.get_image_num_channels(img)
+ if torch.rand(1) < self.p:
+ return F.rgb_to_grayscale(img, num_output_channels=num_output_channels)
+ return img
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(p={self.p})"
+
+
+class RandomErasing(torch.nn.Module):
+ """Randomly selects a rectangle region in an torch Tensor image and erases its pixels.
+ This transform does not support PIL Image.
+ 'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/abs/1708.04896
+
+ Args:
+ p: probability that the random erasing operation will be performed.
+ scale: range of proportion of erased area against input image.
+ ratio: range of aspect ratio of erased area.
+ value: erasing value. Default is 0. If a single int, it is used to
+ erase all pixels. If a tuple of length 3, it is used to erase
+ R, G, B channels respectively.
+ If a str of 'random', erasing each pixel with random values.
+ inplace: boolean to make this transform inplace. Default set to False.
+
+ Returns:
+ Erased Image.
+
+ Example:
+ >>> transform = transforms.Compose([
+ >>> transforms.RandomHorizontalFlip(),
+ >>> transforms.PILToTensor(),
+ >>> transforms.ConvertImageDtype(torch.float),
+ >>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+ >>> transforms.RandomErasing(),
+ >>> ])
+ """
+
+ def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False):
+ super().__init__()
+ if not isinstance(value, (numbers.Number, str, tuple, list)):
+ raise TypeError("Argument value should be either a number or str or a sequence")
+ if isinstance(value, str) and value != "random":
+ raise ValueError("If value is str, it should be 'random'")
+ if not isinstance(scale, (tuple, list)):
+ raise TypeError("Scale should be a sequence")
+ if not isinstance(ratio, (tuple, list)):
+ raise TypeError("Ratio should be a sequence")
+ if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+ warnings.warn("Scale and ratio should be of kind (min, max)")
+ if scale[0] < 0 or scale[1] > 1:
+ raise ValueError("Scale should be between 0 and 1")
+ if p < 0 or p > 1:
+ raise ValueError("Random erasing probability should be between 0 and 1")
+
+ self.p = p
+ self.scale = scale
+ self.ratio = ratio
+ self.value = value
+ self.inplace = inplace
+
+ @staticmethod
+ def get_params(
+ img: Tensor, scale: Tuple[float, float], ratio: Tuple[float, float], value: Optional[List[float]] = None
+ ) -> Tuple[int, int, int, int, Tensor]:
+ """Get parameters for ``erase`` for a random erasing.
+
+ Args:
+ img (Tensor): Tensor image to be erased.
+ scale (sequence): range of proportion of erased area against input image.
+ ratio (sequence): range of aspect ratio of erased area.
+ value (list, optional): erasing value. If None, it is interpreted as "random"
+ (erasing each pixel with random values). If ``len(value)`` is 1, it is interpreted as a number,
+ i.e. ``value[0]``.
+
+ Returns:
+ tuple: params (i, j, h, w, v) to be passed to ``erase`` for random erasing.
+ """
+ img_c, img_h, img_w = img.shape[-3], img.shape[-2], img.shape[-1]
+ area = img_h * img_w
+
+ log_ratio = torch.log(torch.tensor(ratio))
+ for _ in range(10):
+ erase_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item()
+ aspect_ratio = torch.exp(torch.empty(1).uniform_(log_ratio[0], log_ratio[1])).item()
+
+ h = int(round(math.sqrt(erase_area * aspect_ratio)))
+ w = int(round(math.sqrt(erase_area / aspect_ratio)))
+ if not (h < img_h and w < img_w):
+ continue
+
+ if value is None:
+ v = torch.empty([img_c, h, w], dtype=torch.float32).normal_()
+ else:
+ v = torch.tensor(value)[:, None, None]
+
+ i = torch.randint(0, img_h - h + 1, size=(1,)).item()
+ j = torch.randint(0, img_w - w + 1, size=(1,)).item()
+ return i, j, h, w, v
+
+ # Return original image
+ return 0, 0, img_h, img_w, img
+
+ def forward(self, img):
+ """
+ Args:
+ img (Tensor): Tensor image to be erased.
+
+ Returns:
+ img (Tensor): Erased Tensor image.
+ """
+ if torch.rand(1) < self.p:
+
+ # cast self.value to script acceptable type
+ if isinstance(self.value, (int, float)):
+ value = [
+ self.value,
+ ]
+ elif isinstance(self.value, str):
+ value = None
+ elif isinstance(self.value, tuple):
+ value = list(self.value)
+ else:
+ value = self.value
+
+ if value is not None and not (len(value) in (1, img.shape[-3])):
+ raise ValueError(
+ "If value is a sequence, it should have either a single value or "
+ f"{img.shape[-3]} (number of input channels)"
+ )
+
+ x, y, h, w, v = self.get_params(img, scale=self.scale, ratio=self.ratio, value=value)
+ return F.erase(img, x, y, h, w, v, self.inplace)
+ return img
+
+ def __repr__(self):
+ s = f"(p={self.p}, "
+ s += f"scale={self.scale}, "
+ s += f"ratio={self.ratio}, "
+ s += f"value={self.value}, "
+ s += f"inplace={self.inplace})"
+ return self.__class__.__name__ + s
+
+
+class GaussianBlur(torch.nn.Module):
+ """Blurs image with randomly chosen Gaussian blur.
+ If the image is torch Tensor, it is expected
+ to have [..., C, H, W] shape, where ... means an arbitrary number of leading dimensions.
+
+ Args:
+ kernel_size (int or sequence): Size of the Gaussian kernel.
+ sigma (float or tuple of float (min, max)): Standard deviation to be used for
+ creating kernel to perform blurring. If float, sigma is fixed. If it is tuple
+ of float (min, max), sigma is chosen uniformly at random to lie in the
+ given range.
+
+ Returns:
+ PIL Image or Tensor: Gaussian blurred version of the input image.
+
+ """
+
+ def __init__(self, kernel_size, sigma=(0.1, 2.0)):
+ super().__init__()
+ self.kernel_size = _setup_size(kernel_size, "Kernel size should be a tuple/list of two integers")
+ for ks in self.kernel_size:
+ if ks <= 0 or ks % 2 == 0:
+ raise ValueError("Kernel size value should be an odd and positive number.")
+
+ if isinstance(sigma, numbers.Number):
+ if sigma <= 0:
+ raise ValueError("If sigma is a single number, it must be positive.")
+ sigma = (sigma, sigma)
+ elif isinstance(sigma, Sequence) and len(sigma) == 2:
+ if not 0.0 < sigma[0] <= sigma[1]:
+ raise ValueError("sigma values should be positive and of the form (min, max).")
+ else:
+ raise ValueError("sigma should be a single number or a list/tuple with length 2.")
+
+ self.sigma = sigma
+
+ @staticmethod
+ def get_params(sigma_min: float, sigma_max: float) -> float:
+ """Choose sigma for random gaussian blurring.
+
+ Args:
+ sigma_min (float): Minimum standard deviation that can be chosen for blurring kernel.
+ sigma_max (float): Maximum standard deviation that can be chosen for blurring kernel.
+
+ Returns:
+ float: Standard deviation to be passed to calculate kernel for gaussian blurring.
+ """
+ return torch.empty(1).uniform_(sigma_min, sigma_max).item()
+
+ def forward(self, img: Tensor) -> Tensor:
+ """
+ Args:
+ img (PIL Image or Tensor): image to be blurred.
+
+ Returns:
+ PIL Image or Tensor: Gaussian blurred image
+ """
+ sigma = self.get_params(self.sigma[0], self.sigma[1])
+ return F.gaussian_blur(img, self.kernel_size, [sigma, sigma])
+
+ def __repr__(self):
+ s = f"(kernel_size={self.kernel_size}, "
+ s += f"sigma={self.sigma})"
+ return self.__class__.__name__ + s
+
+
+def _setup_size(size, error_msg):
+ if isinstance(size, numbers.Number):
+ return int(size), int(size)
+
+ if isinstance(size, Sequence) and len(size) == 1:
+ return size[0], size[0]
+
+ if len(size) != 2:
+ raise ValueError(error_msg)
+
+ return size
+
+
+def _check_sequence_input(x, name, req_sizes):
+ msg = req_sizes[0] if len(req_sizes) < 2 else " or ".join([str(s) for s in req_sizes])
+ if not isinstance(x, Sequence):
+ raise TypeError(f"{name} should be a sequence of length {msg}.")
+ if len(x) not in req_sizes:
+ raise ValueError(f"{name} should be sequence of length {msg}.")
+
+
+def _setup_angle(x, name, req_sizes=(2,)):
+ if isinstance(x, numbers.Number):
+ if x < 0:
+ raise ValueError(f"If {name} is a single number, it must be positive.")
+ x = [-x, x]
+ else:
+ _check_sequence_input(x, name, req_sizes)
+
+ return [float(d) for d in x]
+
+
+class RandomInvert(torch.nn.Module):
+ """Inverts the colors of the given image randomly with a given probability.
+ If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+ where ... means it can have an arbitrary number of leading dimensions.
+ If img is PIL Image, it is expected to be in mode "L" or "RGB".
+
+ Args:
+ p (float): probability of the image being color inverted. Default value is 0.5
+ """
+
+ def __init__(self, p=0.5):
+ super().__init__()
+ self.p = p
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be inverted.
+
+ Returns:
+ PIL Image or Tensor: Randomly color inverted image.
+ """
+ if torch.rand(1).item() < self.p:
+ return F.invert(img)
+ return img
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(p={self.p})"
+
+
+class RandomPosterize(torch.nn.Module):
+ """Posterize the image randomly with a given probability by reducing the
+ number of bits for each color channel. If the image is torch Tensor, it should be of type torch.uint8,
+ and it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
+ If img is PIL Image, it is expected to be in mode "L" or "RGB".
+
+ Args:
+ bits (int): number of bits to keep for each channel (0-8)
+ p (float): probability of the image being color inverted. Default value is 0.5
+ """
+
+ def __init__(self, bits, p=0.5):
+ super().__init__()
+ self.bits = bits
+ self.p = p
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be posterized.
+
+ Returns:
+ PIL Image or Tensor: Randomly posterized image.
+ """
+ if torch.rand(1).item() < self.p:
+ return F.posterize(img, self.bits)
+ return img
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(bits={self.bits},p={self.p})"
+
+
+class RandomSolarize(torch.nn.Module):
+ """Solarize the image randomly with a given probability by inverting all pixel
+ values above a threshold. If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format,
+ where ... means it can have an arbitrary number of leading dimensions.
+ If img is PIL Image, it is expected to be in mode "L" or "RGB".
+
+ Args:
+ threshold (float): all pixels equal or above this value are inverted.
+ p (float): probability of the image being color inverted. Default value is 0.5
+ """
+
+ def __init__(self, threshold, p=0.5):
+ super().__init__()
+ self.threshold = threshold
+ self.p = p
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be solarized.
+
+ Returns:
+ PIL Image or Tensor: Randomly solarized image.
+ """
+ if torch.rand(1).item() < self.p:
+ return F.solarize(img, self.threshold)
+ return img
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(threshold={self.threshold},p={self.p})"
+
+
+class RandomAdjustSharpness(torch.nn.Module):
+ """Adjust the sharpness of the image randomly with a given probability. If the image is torch Tensor,
+ it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
+
+ Args:
+ sharpness_factor (float): How much to adjust the sharpness. Can be
+ any non negative number. 0 gives a blurred image, 1 gives the
+ original image while 2 increases the sharpness by a factor of 2.
+ p (float): probability of the image being color inverted. Default value is 0.5
+ """
+
+ def __init__(self, sharpness_factor, p=0.5):
+ super().__init__()
+ self.sharpness_factor = sharpness_factor
+ self.p = p
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be sharpened.
+
+ Returns:
+ PIL Image or Tensor: Randomly sharpened image.
+ """
+ if torch.rand(1).item() < self.p:
+ return F.adjust_sharpness(img, self.sharpness_factor)
+ return img
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(sharpness_factor={self.sharpness_factor},p={self.p})"
+
+
+class RandomAutocontrast(torch.nn.Module):
+ """Autocontrast the pixels of the given image randomly with a given probability.
+ If the image is torch Tensor, it is expected
+ to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
+ If img is PIL Image, it is expected to be in mode "L" or "RGB".
+
+ Args:
+ p (float): probability of the image being autocontrasted. Default value is 0.5
+ """
+
+ def __init__(self, p=0.5):
+ super().__init__()
+ self.p = p
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be autocontrasted.
+
+ Returns:
+ PIL Image or Tensor: Randomly autocontrasted image.
+ """
+ if torch.rand(1).item() < self.p:
+ return F.autocontrast(img)
+ return img
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(p={self.p})"
+
+
+class RandomEqualize(torch.nn.Module):
+ """Equalize the histogram of the given image randomly with a given probability.
+ If the image is torch Tensor, it is expected
+ to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions.
+ If img is PIL Image, it is expected to be in mode "P", "L" or "RGB".
+
+ Args:
+ p (float): probability of the image being equalized. Default value is 0.5
+ """
+
+ def __init__(self, p=0.5):
+ super().__init__()
+ self.p = p
+
+ def forward(self, img):
+ """
+ Args:
+ img (PIL Image or Tensor): Image to be equalized.
+
+ Returns:
+ PIL Image or Tensor: Randomly equalized image.
+ """
+ if torch.rand(1).item() < self.p:
+ return F.equalize(img)
+ return img
+
+ def __repr__(self):
+ return self.__class__.__name__ + f"(p={self.p})"
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py
new file mode 100644
index 0000000000..10783c8e53
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py
@@ -0,0 +1,1297 @@
+import torch
+import math
+import random
+from PIL import Image
+try:
+ import accimage
+except ImportError:
+ accimage = None
+import numpy as np
+import numbers
+import types
+from collections.abc import Sequence, Iterable
+import warnings
+
+from . import functional as F
+
+
+__all__ = ["Compose", "ToTensor", "ToPILImage", "Normalize", "Resize", "Scale", "CenterCrop", "Pad",
+ "Lambda", "RandomApply", "RandomChoice", "RandomOrder", "RandomCrop", "RandomHorizontalFlip",
+ "RandomVerticalFlip", "RandomResizedCrop", "RandomSizedCrop", "FiveCrop", "TenCrop", "LinearTransformation",
+ "ColorJitter", "RandomRotation", "RandomAffine", "Grayscale", "RandomGrayscale",
+ "RandomPerspective", "RandomErasing"]
+
+_pil_interpolation_to_str = {
+ Image.NEAREST: 'PIL.Image.NEAREST',
+ Image.BILINEAR: 'PIL.Image.BILINEAR',
+ Image.BICUBIC: 'PIL.Image.BICUBIC',
+ Image.LANCZOS: 'PIL.Image.LANCZOS',
+ Image.HAMMING: 'PIL.Image.HAMMING',
+ Image.BOX: 'PIL.Image.BOX',
+}
+
+
+def _get_image_size(img):
+ if F._is_pil_image(img):
+ return img.size
+ elif isinstance(img, torch.Tensor) and img.dim() > 2:
+ return img.shape[-2:][::-1]
+ else:
+ raise TypeError("Unexpected type {}".format(type(img)))
+
+
+class Compose(object):
+ """Composes several transforms together.
+
+ Args:
+ transforms (list of ``Transform`` objects): list of transforms to compose.
+
+ Example:
+ >>> transforms.Compose([
+ >>> transforms.CenterCrop(10),
+ >>> transforms.ToTensor(),
+ >>> ])
+ """
+
+ def __init__(self, transforms):
+ self.transforms = transforms
+
+ def __call__(self, img):
+ for t in self.transforms:
+ img = t(img)
+ return img
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + '('
+ for t in self.transforms:
+ format_string += '\n'
+ format_string += ' {0}'.format(t)
+ format_string += '\n)'
+ return format_string
+
+
+class ToTensor(object):
+ """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+
+ Converts a PIL Image or numpy.ndarray (H x W x C) in the range
+ [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
+ if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
+ or if the numpy.ndarray has dtype = np.uint8
+
+ In the other cases, tensors are returned without scaling.
+ """
+
+ def __call__(self, pic):
+ """
+ Args:
+ pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+
+ Returns:
+ Tensor: Converted image.
+ """
+ return F.to_tensor(pic)
+
+ def __repr__(self):
+ return self.__class__.__name__ + '()'
+
+
+class ToPILImage(object):
+ """Convert a tensor or an ndarray to PIL Image.
+
+ Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape
+ H x W x C to a PIL Image while preserving the value range.
+
+ Args:
+ mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
+ If ``mode`` is ``None`` (default) there are some assumptions made about the input data:
+ - If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``.
+ - If the input has 3 channels, the ``mode`` is assumed to be ``RGB``.
+ - If the input has 2 channels, the ``mode`` is assumed to be ``LA``.
+ - If the input has 1 channel, the ``mode`` is determined by the data type (i.e ``int``, ``float``,
+ ``short``).
+
+ .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes
+ """
+ def __init__(self, mode=None):
+ self.mode = mode
+
+ def __call__(self, pic):
+ """
+ Args:
+ pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
+
+ Returns:
+ PIL Image: Image converted to PIL Image.
+
+ """
+ return F.to_pil_image(pic, self.mode)
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + '('
+ if self.mode is not None:
+ format_string += 'mode={0}'.format(self.mode)
+ format_string += ')'
+ return format_string
+
+
+class Normalize(object):
+ """Normalize a tensor image with mean and standard deviation.
+ Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
+ will normalize each channel of the input ``torch.*Tensor`` i.e.
+ ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
+
+ .. note::
+ This transform acts out of place, i.e., it does not mutate the input tensor.
+
+ Args:
+ mean (sequence): Sequence of means for each channel.
+ std (sequence): Sequence of standard deviations for each channel.
+ inplace(bool,optional): Bool to make this operation in-place.
+
+ """
+
+ def __init__(self, mean, std, inplace=False):
+ self.mean = mean
+ self.std = std
+ self.inplace = inplace
+
+ def __call__(self, tensor):
+ """
+ Args:
+ tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+
+ Returns:
+ Tensor: Normalized Tensor image.
+ """
+ return F.normalize(tensor, self.mean, self.std, self.inplace)
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
+
+
+class Resize(object):
+ """Resize the input PIL Image to the given size.
+
+ Args:
+ size (sequence or int): Desired output size. If size is a sequence like
+ (h, w), output size will be matched to this. If size is an int,
+ smaller edge of the image will be matched to this number.
+ i.e, if height > width, then image will be rescaled to
+ (size * height / width, size)
+ interpolation (int, optional): Desired interpolation. Default is
+ ``PIL.Image.BILINEAR``
+ """
+
+ def __init__(self, size, interpolation=Image.BILINEAR):
+ assert isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)
+ self.size = size
+ self.interpolation = interpolation
+
+ def __call__(self, img):
+ """
+ Args:
+ img (PIL Image): Image to be scaled.
+
+ Returns:
+ PIL Image: Rescaled image.
+ """
+ return F.resize(img, self.size, self.interpolation)
+
+ def __repr__(self):
+ interpolate_str = _pil_interpolation_to_str[self.interpolation]
+ return self.__class__.__name__ + '(size={0}, interpolation={1})'.format(self.size, interpolate_str)
+
+
+class Scale(Resize):
+ """
+ Note: This transform is deprecated in favor of Resize.
+ """
+ def __init__(self, *args, **kwargs):
+ warnings.warn("The use of the transforms.Scale transform is deprecated, " +
+ "please use transforms.Resize instead.")
+ super(Scale, self).__init__(*args, **kwargs)
+
+
+class CenterCrop(object):
+ """Crops the given PIL Image at the center.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made.
+ """
+
+ def __init__(self, size):
+ if isinstance(size, numbers.Number):
+ self.size = (int(size), int(size))
+ else:
+ self.size = size
+
+ def __call__(self, img):
+ """
+ Args:
+ img (PIL Image): Image to be cropped.
+
+ Returns:
+ PIL Image: Cropped image.
+ """
+ return F.center_crop(img, self.size)
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(size={0})'.format(self.size)
+
+
+class Pad(object):
+ """Pad the given PIL Image on all sides with the given "pad" value.
+
+ Args:
+ padding (int or tuple): Padding on each border. If a single int is provided this
+ is used to pad all borders. If tuple of length 2 is provided this is the padding
+ on left/right and top/bottom respectively. If a tuple of length 4 is provided
+ this is the padding for the left, top, right and bottom borders
+ respectively.
+ fill (int or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
+ length 3, it is used to fill R, G, B channels respectively.
+ This value is only used when the padding_mode is constant
+ padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
+ Default is constant.
+
+ - constant: pads with a constant value, this value is specified with fill
+
+ - edge: pads with the last value at the edge of the image
+
+ - reflect: pads with reflection of image without repeating the last value on the edge
+
+ For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+ will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+ - symmetric: pads with reflection of image repeating the last value on the edge
+
+ For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+ will result in [2, 1, 1, 2, 3, 4, 4, 3]
+ """
+
+ def __init__(self, padding, fill=0, padding_mode='constant'):
+ assert isinstance(padding, (numbers.Number, tuple))
+ assert isinstance(fill, (numbers.Number, str, tuple))
+ assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric']
+ if isinstance(padding, Sequence) and len(padding) not in [2, 4]:
+ raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " +
+ "{} element tuple".format(len(padding)))
+
+ self.padding = padding
+ self.fill = fill
+ self.padding_mode = padding_mode
+
+ def __call__(self, img):
+ """
+ Args:
+ img (PIL Image): Image to be padded.
+
+ Returns:
+ PIL Image: Padded image.
+ """
+ return F.pad(img, self.padding, self.fill, self.padding_mode)
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(padding={0}, fill={1}, padding_mode={2})'.\
+ format(self.padding, self.fill, self.padding_mode)
+
+
+class Lambda(object):
+ """Apply a user-defined lambda as a transform.
+
+ Args:
+ lambd (function): Lambda/function to be used for transform.
+ """
+
+ def __init__(self, lambd):
+ assert callable(lambd), repr(type(lambd).__name__) + " object is not callable"
+ self.lambd = lambd
+
+ def __call__(self, img):
+ return self.lambd(img)
+
+ def __repr__(self):
+ return self.__class__.__name__ + '()'
+
+
+class RandomTransforms(object):
+ """Base class for a list of transformations with randomness
+
+ Args:
+ transforms (list or tuple): list of transformations
+ """
+
+ def __init__(self, transforms):
+ assert isinstance(transforms, (list, tuple))
+ self.transforms = transforms
+
+ def __call__(self, *args, **kwargs):
+ raise NotImplementedError()
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + '('
+ for t in self.transforms:
+ format_string += '\n'
+ format_string += ' {0}'.format(t)
+ format_string += '\n)'
+ return format_string
+
+
+class RandomApply(RandomTransforms):
+ """Apply randomly a list of transformations with a given probability
+
+ Args:
+ transforms (list or tuple): list of transformations
+ p (float): probability
+ """
+
+ def __init__(self, transforms, p=0.5):
+ super(RandomApply, self).__init__(transforms)
+ self.p = p
+
+ def __call__(self, img):
+ if self.p < random.random():
+ return img
+ for t in self.transforms:
+ img = t(img)
+ return img
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + '('
+ format_string += '\n p={}'.format(self.p)
+ for t in self.transforms:
+ format_string += '\n'
+ format_string += ' {0}'.format(t)
+ format_string += '\n)'
+ return format_string
+
+
+class RandomOrder(RandomTransforms):
+ """Apply a list of transformations in a random order
+ """
+ def __call__(self, img):
+ order = list(range(len(self.transforms)))
+ random.shuffle(order)
+ for i in order:
+ img = self.transforms[i](img)
+ return img
+
+
+class RandomChoice(RandomTransforms):
+ """Apply single transformation randomly picked from a list
+ """
+ def __call__(self, img):
+ t = random.choice(self.transforms)
+ return t(img)
+
+
+class RandomCrop(object):
+ """Crop the given PIL Image at a random location.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made.
+ padding (int or sequence, optional): Optional padding on each border
+ of the image. Default is None, i.e no padding. If a sequence of length
+ 4 is provided, it is used to pad left, top, right, bottom borders
+ respectively. If a sequence of length 2 is provided, it is used to
+ pad left/right, top/bottom borders, respectively.
+ pad_if_needed (boolean): It will pad the image if smaller than the
+ desired size to avoid raising an exception. Since cropping is done
+ after padding, the padding seems to be done at a random offset.
+ fill: Pixel fill value for constant fill. Default is 0. If a tuple of
+ length 3, it is used to fill R, G, B channels respectively.
+ This value is only used when the padding_mode is constant
+ padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
+
+ - constant: pads with a constant value, this value is specified with fill
+
+ - edge: pads with the last value on the edge of the image
+
+ - reflect: pads with reflection of image (without repeating the last value on the edge)
+
+ padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+ will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+ - symmetric: pads with reflection of image (repeating the last value on the edge)
+
+ padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+ will result in [2, 1, 1, 2, 3, 4, 4, 3]
+
+ """
+
+ def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode='constant'):
+ if isinstance(size, numbers.Number):
+ self.size = (int(size), int(size))
+ else:
+ self.size = size
+ self.padding = padding
+ self.pad_if_needed = pad_if_needed
+ self.fill = fill
+ self.padding_mode = padding_mode
+
+ @staticmethod
+ def get_params(img, output_size):
+ """Get parameters for ``crop`` for a random crop.
+
+ Args:
+ img (PIL Image): Image to be cropped.
+ output_size (tuple): Expected output size of the crop.
+
+ Returns:
+ tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
+ """
+ w, h = _get_image_size(img)
+ th, tw = output_size
+ if w == tw and h == th:
+ return 0, 0, h, w
+
+ i = random.randint(0, h - th)
+ j = random.randint(0, w - tw)
+ return i, j, th, tw
+
+ def __call__(self, img):
+ """
+ Args:
+ img (PIL Image): Image to be cropped.
+
+ Returns:
+ PIL Image: Cropped image.
+ """
+ if self.padding is not None:
+ img = F.pad(img, self.padding, self.fill, self.padding_mode)
+
+ # pad the width if needed
+ if self.pad_if_needed and img.size[0] < self.size[1]:
+ img = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode)
+ # pad the height if needed
+ if self.pad_if_needed and img.size[1] < self.size[0]:
+ img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)
+
+ i, j, h, w = self.get_params(img, self.size)
+
+ return F.crop(img, i, j, h, w)
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(size={0}, padding={1})'.format(self.size, self.padding)
+
+
+class RandomHorizontalFlip(object):
+ """Horizontally flip the given PIL Image randomly with a given probability.
+
+ Args:
+ p (float): probability of the image being flipped. Default value is 0.5
+ """
+
+ def __init__(self, p=0.5):
+ self.p = p
+
+ def __call__(self, img):
+ """
+ Args:
+ img (PIL Image): Image to be flipped.
+
+ Returns:
+ PIL Image: Randomly flipped image.
+ """
+ if random.random() < self.p:
+ return F.hflip(img)
+ return img
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(p={})'.format(self.p)
+
+
+class RandomVerticalFlip(object):
+ """Vertically flip the given PIL Image randomly with a given probability.
+
+ Args:
+ p (float): probability of the image being flipped. Default value is 0.5
+ """
+
+ def __init__(self, p=0.5):
+ self.p = p
+
+ def __call__(self, img):
+ """
+ Args:
+ img (PIL Image): Image to be flipped.
+
+ Returns:
+ PIL Image: Randomly flipped image.
+ """
+ if random.random() < self.p:
+ return F.vflip(img)
+ return img
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(p={})'.format(self.p)
+
+
+class RandomPerspective(object):
+ """Performs Perspective transformation of the given PIL Image randomly with a given probability.
+
+ Args:
+ interpolation : Default- Image.BICUBIC
+
+ p (float): probability of the image being perspectively transformed. Default value is 0.5
+
+ distortion_scale(float): it controls the degree of distortion and ranges from 0 to 1. Default value is 0.5.
+
+ fill (3-tuple or int): RGB pixel fill value for area outside the rotated image.
+ If int, it is used for all channels respectively. Default value is 0.
+ """
+
+ def __init__(self, distortion_scale=0.5, p=0.5, interpolation=Image.BICUBIC, fill=0):
+ self.p = p
+ self.interpolation = interpolation
+ self.distortion_scale = distortion_scale
+ self.fill = fill
+
+ def __call__(self, img):
+ """
+ Args:
+ img (PIL Image): Image to be Perspectively transformed.
+
+ Returns:
+ PIL Image: Random perspectivley transformed image.
+ """
+ if not F._is_pil_image(img):
+ raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+ if random.random() < self.p:
+ width, height = img.size
+ startpoints, endpoints = self.get_params(width, height, self.distortion_scale)
+ return F.perspective(img, startpoints, endpoints, self.interpolation, self.fill)
+ return img
+
+ @staticmethod
+ def get_params(width, height, distortion_scale):
+ """Get parameters for ``perspective`` for a random perspective transform.
+
+ Args:
+ width : width of the image.
+ height : height of the image.
+
+ Returns:
+ List containing [top-left, top-right, bottom-right, bottom-left] of the original image,
+ List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image.
+ """
+ half_height = int(height / 2)
+ half_width = int(width / 2)
+ topleft = (random.randint(0, int(distortion_scale * half_width)),
+ random.randint(0, int(distortion_scale * half_height)))
+ topright = (random.randint(width - int(distortion_scale * half_width) - 1, width - 1),
+ random.randint(0, int(distortion_scale * half_height)))
+ botright = (random.randint(width - int(distortion_scale * half_width) - 1, width - 1),
+ random.randint(height - int(distortion_scale * half_height) - 1, height - 1))
+ botleft = (random.randint(0, int(distortion_scale * half_width)),
+ random.randint(height - int(distortion_scale * half_height) - 1, height - 1))
+ startpoints = [(0, 0), (width - 1, 0), (width - 1, height - 1), (0, height - 1)]
+ endpoints = [topleft, topright, botright, botleft]
+ return startpoints, endpoints
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(p={})'.format(self.p)
+
+
+class RandomResizedCrop(object):
+ """Crop the given PIL Image to random size and aspect ratio.
+
+ A crop of random size (default: of 0.08 to 1.0) of the original size and a random
+ aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
+ is finally resized to given size.
+ This is popularly used to train the Inception networks.
+
+ Args:
+ size: expected output size of each edge
+ scale: range of size of the origin size cropped
+ ratio: range of aspect ratio of the origin aspect ratio cropped
+ interpolation: Default: PIL.Image.BILINEAR
+ """
+
+ def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR):
+ if isinstance(size, (tuple, list)):
+ self.size = size
+ else:
+ self.size = (size, size)
+ if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+ warnings.warn("range should be of kind (min, max)")
+
+ self.interpolation = interpolation
+ self.scale = scale
+ self.ratio = ratio
+
+ @staticmethod
+ def get_params(img, scale, ratio):
+ """Get parameters for ``crop`` for a random sized crop.
+
+ Args:
+ img (PIL Image): Image to be cropped.
+ scale (tuple): range of size of the origin size cropped
+ ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
+
+ Returns:
+ tuple: params (i, j, h, w) to be passed to ``crop`` for a random
+ sized crop.
+ """
+ width, height = _get_image_size(img)
+ area = height * width
+
+ for _ in range(10):
+ target_area = random.uniform(*scale) * area
+ log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
+ aspect_ratio = math.exp(random.uniform(*log_ratio))
+
+ w = int(round(math.sqrt(target_area * aspect_ratio)))
+ h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+ if 0 < w <= width and 0 < h <= height:
+ i = random.randint(0, height - h)
+ j = random.randint(0, width - w)
+ return i, j, h, w
+
+ # Fallback to central crop
+ in_ratio = float(width) / float(height)
+ if (in_ratio < min(ratio)):
+ w = width
+ h = int(round(w / min(ratio)))
+ elif (in_ratio > max(ratio)):
+ h = height
+ w = int(round(h * max(ratio)))
+ else: # whole image
+ w = width
+ h = height
+ i = (height - h) // 2
+ j = (width - w) // 2
+ return i, j, h, w
+
+ def __call__(self, img):
+ """
+ Args:
+ img (PIL Image): Image to be cropped and resized.
+
+ Returns:
+ PIL Image: Randomly cropped and resized image.
+ """
+ i, j, h, w = self.get_params(img, self.scale, self.ratio)
+ return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
+
+ def __repr__(self):
+ interpolate_str = _pil_interpolation_to_str[self.interpolation]
+ format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
+ format_string += ', scale={0}'.format(tuple(round(s, 4) for s in self.scale))
+ format_string += ', ratio={0}'.format(tuple(round(r, 4) for r in self.ratio))
+ format_string += ', interpolation={0})'.format(interpolate_str)
+ return format_string
+
+
+class RandomSizedCrop(RandomResizedCrop):
+ """
+ Note: This transform is deprecated in favor of RandomResizedCrop.
+ """
+ def __init__(self, *args, **kwargs):
+ warnings.warn("The use of the transforms.RandomSizedCrop transform is deprecated, " +
+ "please use transforms.RandomResizedCrop instead.")
+ super(RandomSizedCrop, self).__init__(*args, **kwargs)
+
+
+class FiveCrop(object):
+ """Crop the given PIL Image into four corners and the central crop
+
+ .. Note::
+ This transform returns a tuple of images and there may be a mismatch in the number of
+ inputs and targets your Dataset returns. See below for an example of how to deal with
+ this.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an ``int``
+ instead of sequence like (h, w), a square crop of size (size, size) is made.
+
+ Example:
+ >>> transform = Compose([
+ >>> FiveCrop(size), # this is a list of PIL Images
+ >>> Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor
+ >>> ])
+ >>> #In your test loop you can do the following:
+ >>> input, target = batch # input is a 5d tensor, target is 2d
+ >>> bs, ncrops, c, h, w = input.size()
+ >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops
+ >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops
+ """
+
+ def __init__(self, size):
+ self.size = size
+ if isinstance(size, numbers.Number):
+ self.size = (int(size), int(size))
+ else:
+ assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+ self.size = size
+
+ def __call__(self, img):
+ return F.five_crop(img, self.size)
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(size={0})'.format(self.size)
+
+
+class TenCrop(object):
+ """Crop the given PIL Image into four corners and the central crop plus the flipped version of
+ these (horizontal flipping is used by default)
+
+ .. Note::
+ This transform returns a tuple of images and there may be a mismatch in the number of
+ inputs and targets your Dataset returns. See below for an example of how to deal with
+ this.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made.
+ vertical_flip (bool): Use vertical flipping instead of horizontal
+
+ Example:
+ >>> transform = Compose([
+ >>> TenCrop(size), # this is a list of PIL Images
+ >>> Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor
+ >>> ])
+ >>> #In your test loop you can do the following:
+ >>> input, target = batch # input is a 5d tensor, target is 2d
+ >>> bs, ncrops, c, h, w = input.size()
+ >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops
+ >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops
+ """
+
+ def __init__(self, size, vertical_flip=False):
+ self.size = size
+ if isinstance(size, numbers.Number):
+ self.size = (int(size), int(size))
+ else:
+ assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+ self.size = size
+ self.vertical_flip = vertical_flip
+
+ def __call__(self, img):
+ return F.ten_crop(img, self.size, self.vertical_flip)
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(size={0}, vertical_flip={1})'.format(self.size, self.vertical_flip)
+
+
+class LinearTransformation(object):
+ """Transform a tensor image with a square transformation matrix and a mean_vector computed
+ offline.
+ Given transformation_matrix and mean_vector, will flatten the torch.*Tensor and
+ subtract mean_vector from it which is then followed by computing the dot
+ product with the transformation matrix and then reshaping the tensor to its
+ original shape.
+
+ Applications:
+ whitening transformation: Suppose X is a column vector zero-centered data.
+ Then compute the data covariance matrix [D x D] with torch.mm(X.t(), X),
+ perform SVD on this matrix and pass it as transformation_matrix.
+
+ Args:
+ transformation_matrix (Tensor): tensor [D x D], D = C x H x W
+ mean_vector (Tensor): tensor [D], D = C x H x W
+ """
+
+ def __init__(self, transformation_matrix, mean_vector):
+ if transformation_matrix.size(0) != transformation_matrix.size(1):
+ raise ValueError("transformation_matrix should be square. Got " +
+ "[{} x {}] rectangular matrix.".format(*transformation_matrix.size()))
+
+ if mean_vector.size(0) != transformation_matrix.size(0):
+ raise ValueError("mean_vector should have the same length {}".format(mean_vector.size(0)) +
+ " as any one of the dimensions of the transformation_matrix [{} x {}]"
+ .format(transformation_matrix.size()))
+
+ self.transformation_matrix = transformation_matrix
+ self.mean_vector = mean_vector
+
+ def __call__(self, tensor):
+ """
+ Args:
+ tensor (Tensor): Tensor image of size (C, H, W) to be whitened.
+
+ Returns:
+ Tensor: Transformed image.
+ """
+ if tensor.size(0) * tensor.size(1) * tensor.size(2) != self.transformation_matrix.size(0):
+ raise ValueError("tensor and transformation matrix have incompatible shape." +
+ "[{} x {} x {}] != ".format(*tensor.size()) +
+ "{}".format(self.transformation_matrix.size(0)))
+ flat_tensor = tensor.view(1, -1) - self.mean_vector
+ transformed_tensor = torch.mm(flat_tensor, self.transformation_matrix)
+ tensor = transformed_tensor.view(tensor.size())
+ return tensor
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + '(transformation_matrix='
+ format_string += (str(self.transformation_matrix.tolist()) + ')')
+ format_string += (", (mean_vector=" + str(self.mean_vector.tolist()) + ')')
+ return format_string
+
+
+class ColorJitter(object):
+ """Randomly change the brightness, contrast and saturation of an image.
+
+ Args:
+ brightness (float or tuple of float (min, max)): How much to jitter brightness.
+ brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
+ or the given [min, max]. Should be non negative numbers.
+ contrast (float or tuple of float (min, max)): How much to jitter contrast.
+ contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
+ or the given [min, max]. Should be non negative numbers.
+ saturation (float or tuple of float (min, max)): How much to jitter saturation.
+ saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
+ or the given [min, max]. Should be non negative numbers.
+ hue (float or tuple of float (min, max)): How much to jitter hue.
+ hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
+ Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
+ """
+ def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
+ self.brightness = self._check_input(brightness, 'brightness')
+ self.contrast = self._check_input(contrast, 'contrast')
+ self.saturation = self._check_input(saturation, 'saturation')
+ self.hue = self._check_input(hue, 'hue', center=0, bound=(-0.5, 0.5),
+ clip_first_on_zero=False)
+
+ def _check_input(self, value, name, center=1, bound=(0, float('inf')), clip_first_on_zero=True):
+ if isinstance(value, numbers.Number):
+ if value < 0:
+ raise ValueError("If {} is a single number, it must be non negative.".format(name))
+ value = [center - value, center + value]
+ if clip_first_on_zero:
+ value[0] = max(value[0], 0)
+ elif isinstance(value, (tuple, list)) and len(value) == 2:
+ if not bound[0] <= value[0] <= value[1] <= bound[1]:
+ raise ValueError("{} values should be between {}".format(name, bound))
+ else:
+ raise TypeError("{} should be a single number or a list/tuple with lenght 2.".format(name))
+
+ # if value is 0 or (1., 1.) for brightness/contrast/saturation
+ # or (0., 0.) for hue, do nothing
+ if value[0] == value[1] == center:
+ value = None
+ return value
+
+ @staticmethod
+ def get_params(brightness, contrast, saturation, hue):
+ """Get a randomized transform to be applied on image.
+
+ Arguments are same as that of __init__.
+
+ Returns:
+ Transform which randomly adjusts brightness, contrast and
+ saturation in a random order.
+ """
+ transforms = []
+
+ if brightness is not None:
+ brightness_factor = random.uniform(brightness[0], brightness[1])
+ transforms.append(Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))
+
+ if contrast is not None:
+ contrast_factor = random.uniform(contrast[0], contrast[1])
+ transforms.append(Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))
+
+ if saturation is not None:
+ saturation_factor = random.uniform(saturation[0], saturation[1])
+ transforms.append(Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))
+
+ if hue is not None:
+ hue_factor = random.uniform(hue[0], hue[1])
+ transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
+
+ random.shuffle(transforms)
+ transform = Compose(transforms)
+
+ return transform
+
+ def __call__(self, img):
+ """
+ Args:
+ img (PIL Image): Input image.
+
+ Returns:
+ PIL Image: Color jittered image.
+ """
+ transform = self.get_params(self.brightness, self.contrast,
+ self.saturation, self.hue)
+ return transform(img)
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + '('
+ format_string += 'brightness={0}'.format(self.brightness)
+ format_string += ', contrast={0}'.format(self.contrast)
+ format_string += ', saturation={0}'.format(self.saturation)
+ format_string += ', hue={0})'.format(self.hue)
+ return format_string
+
+
+class RandomRotation(object):
+ """Rotate the image by angle.
+
+ Args:
+ degrees (sequence or float or int): Range of degrees to select from.
+ If degrees is a number instead of sequence like (min, max), the range of degrees
+ will be (-degrees, +degrees).
+ resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
+ An optional resampling filter. See `filters`_ for more information.
+ If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
+ expand (bool, optional): Optional expansion flag.
+ If true, expands the output to make it large enough to hold the entire rotated image.
+ If false or omitted, make the output image the same size as the input image.
+ Note that the expand flag assumes rotation around the center and no translation.
+ center (2-tuple, optional): Optional center of rotation.
+ Origin is the upper left corner.
+ Default is the center of the image.
+ fill (n-tuple or int or float): Pixel fill value for area outside the rotated
+ image. If int or float, the value is used for all bands respectively.
+ Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``.
+
+ .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
+
+ """
+
+ def __init__(self, degrees, resample=False, expand=False, center=None, fill=None):
+ if isinstance(degrees, numbers.Number):
+ if degrees < 0:
+ raise ValueError("If degrees is a single number, it must be positive.")
+ self.degrees = (-degrees, degrees)
+ else:
+ if len(degrees) != 2:
+ raise ValueError("If degrees is a sequence, it must be of len 2.")
+ self.degrees = degrees
+
+ self.resample = resample
+ self.expand = expand
+ self.center = center
+ self.fill = fill
+
+ @staticmethod
+ def get_params(degrees):
+ """Get parameters for ``rotate`` for a random rotation.
+
+ Returns:
+ sequence: params to be passed to ``rotate`` for random rotation.
+ """
+ angle = random.uniform(degrees[0], degrees[1])
+
+ return angle
+
+ def __call__(self, img):
+ """
+ Args:
+ img (PIL Image): Image to be rotated.
+
+ Returns:
+ PIL Image: Rotated image.
+ """
+
+ angle = self.get_params(self.degrees)
+
+ return F.rotate(img, angle, self.resample, self.expand, self.center, self.fill)
+
+ def __repr__(self):
+ format_string = self.__class__.__name__ + '(degrees={0}'.format(self.degrees)
+ format_string += ', resample={0}'.format(self.resample)
+ format_string += ', expand={0}'.format(self.expand)
+ if self.center is not None:
+ format_string += ', center={0}'.format(self.center)
+ format_string += ')'
+ return format_string
+
+
+class RandomAffine(object):
+ """Random affine transformation of the image keeping center invariant
+
+ Args:
+ degrees (sequence or float or int): Range of degrees to select from.
+ If degrees is a number instead of sequence like (min, max), the range of degrees
+ will be (-degrees, +degrees). Set to 0 to deactivate rotations.
+ translate (tuple, optional): tuple of maximum absolute fraction for horizontal
+ and vertical translations. For example translate=(a, b), then horizontal shift
+ is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is
+ randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default.
+ scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is
+ randomly sampled from the range a <= scale <= b. Will keep original scale by default.
+ shear (sequence or float or int, optional): Range of degrees to select from.
+ If shear is a number, a shear parallel to the x axis in the range (-shear, +shear)
+ will be apllied. Else if shear is a tuple or list of 2 values a shear parallel to the x axis in the
+ range (shear[0], shear[1]) will be applied. Else if shear is a tuple or list of 4 values,
+ a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied.
+ Will not apply shear by default
+ resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
+ An optional resampling filter. See `filters`_ for more information.
+ If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
+ fillcolor (tuple or int): Optional fill color (Tuple for RGB Image And int for grayscale) for the area
+ outside the transform in the output image.(Pillow>=5.0.0)
+
+ .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
+
+ """
+
+ def __init__(self, degrees, translate=None, scale=None, shear=None, resample=False, fillcolor=0):
+ if isinstance(degrees, numbers.Number):
+ if degrees < 0:
+ raise ValueError("If degrees is a single number, it must be positive.")
+ self.degrees = (-degrees, degrees)
+ else:
+ assert isinstance(degrees, (tuple, list)) and len(degrees) == 2, \
+ "degrees should be a list or tuple and it must be of length 2."
+ self.degrees = degrees
+
+ if translate is not None:
+ assert isinstance(translate, (tuple, list)) and len(translate) == 2, \
+ "translate should be a list or tuple and it must be of length 2."
+ for t in translate:
+ if not (0.0 <= t <= 1.0):
+ raise ValueError("translation values should be between 0 and 1")
+ self.translate = translate
+
+ if scale is not None:
+ assert isinstance(scale, (tuple, list)) and len(scale) == 2, \
+ "scale should be a list or tuple and it must be of length 2."
+ for s in scale:
+ if s <= 0:
+ raise ValueError("scale values should be positive")
+ self.scale = scale
+
+ if shear is not None:
+ if isinstance(shear, numbers.Number):
+ if shear < 0:
+ raise ValueError("If shear is a single number, it must be positive.")
+ self.shear = (-shear, shear)
+ else:
+ assert isinstance(shear, (tuple, list)) and \
+ (len(shear) == 2 or len(shear) == 4), \
+ "shear should be a list or tuple and it must be of length 2 or 4."
+ # X-Axis shear with [min, max]
+ if len(shear) == 2:
+ self.shear = [shear[0], shear[1], 0., 0.]
+ elif len(shear) == 4:
+ self.shear = [s for s in shear]
+ else:
+ self.shear = shear
+
+ self.resample = resample
+ self.fillcolor = fillcolor
+
+ @staticmethod
+ def get_params(degrees, translate, scale_ranges, shears, img_size):
+ """Get parameters for affine transformation
+
+ Returns:
+ sequence: params to be passed to the affine transformation
+ """
+ angle = random.uniform(degrees[0], degrees[1])
+ if translate is not None:
+ max_dx = translate[0] * img_size[0]
+ max_dy = translate[1] * img_size[1]
+ translations = (np.round(random.uniform(-max_dx, max_dx)),
+ np.round(random.uniform(-max_dy, max_dy)))
+ else:
+ translations = (0, 0)
+
+ if scale_ranges is not None:
+ scale = random.uniform(scale_ranges[0], scale_ranges[1])
+ else:
+ scale = 1.0
+
+ if shears is not None:
+ if len(shears) == 2:
+ shear = [random.uniform(shears[0], shears[1]), 0.]
+ elif len(shears) == 4:
+ shear = [random.uniform(shears[0], shears[1]),
+ random.uniform(shears[2], shears[3])]
+ else:
+ shear = 0.0
+
+ return angle, translations, scale, shear
+
+ def __call__(self, img):
+ """
+ img (PIL Image): Image to be transformed.
+
+ Returns:
+ PIL Image: Affine transformed image.
+ """
+ ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img.size)
+ return F.affine(img, *ret, resample=self.resample, fillcolor=self.fillcolor)
+
+ def __repr__(self):
+ s = '{name}(degrees={degrees}'
+ if self.translate is not None:
+ s += ', translate={translate}'
+ if self.scale is not None:
+ s += ', scale={scale}'
+ if self.shear is not None:
+ s += ', shear={shear}'
+ if self.resample > 0:
+ s += ', resample={resample}'
+ if self.fillcolor != 0:
+ s += ', fillcolor={fillcolor}'
+ s += ')'
+ d = dict(self.__dict__)
+ d['resample'] = _pil_interpolation_to_str[d['resample']]
+ return s.format(name=self.__class__.__name__, **d)
+
+
+class Grayscale(object):
+ """Convert image to grayscale.
+
+ Args:
+ num_output_channels (int): (1 or 3) number of channels desired for output image
+
+ Returns:
+ PIL Image: Grayscale version of the input.
+ - If ``num_output_channels == 1`` : returned image is single channel
+ - If ``num_output_channels == 3`` : returned image is 3 channel with r == g == b
+
+ """
+
+ def __init__(self, num_output_channels=1):
+ self.num_output_channels = num_output_channels
+
+ def __call__(self, img):
+ """
+ Args:
+ img (PIL Image): Image to be converted to grayscale.
+
+ Returns:
+ PIL Image: Randomly grayscaled image.
+ """
+ return F.to_grayscale(img, num_output_channels=self.num_output_channels)
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(num_output_channels={0})'.format(self.num_output_channels)
+
+
+class RandomGrayscale(object):
+ """Randomly convert image to grayscale with a probability of p (default 0.1).
+
+ Args:
+ p (float): probability that image should be converted to grayscale.
+
+ Returns:
+ PIL Image: Grayscale version of the input image with probability p and unchanged
+ with probability (1-p).
+ - If input image is 1 channel: grayscale version is 1 channel
+ - If input image is 3 channel: grayscale version is 3 channel with r == g == b
+
+ """
+
+ def __init__(self, p=0.1):
+ self.p = p
+
+ def __call__(self, img):
+ """
+ Args:
+ img (PIL Image): Image to be converted to grayscale.
+
+ Returns:
+ PIL Image: Randomly grayscaled image.
+ """
+ num_output_channels = 1 if img.mode == 'L' else 3
+ if random.random() < self.p:
+ return F.to_grayscale(img, num_output_channels=num_output_channels)
+ return img
+
+ def __repr__(self):
+ return self.__class__.__name__ + '(p={0})'.format(self.p)
+
+
+class RandomErasing(object):
+ """ Randomly selects a rectangle region in an image and erases its pixels.
+ 'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/pdf/1708.04896.pdf
+
+ Args:
+ p: probability that the random erasing operation will be performed.
+ scale: range of proportion of erased area against input image.
+ ratio: range of aspect ratio of erased area.
+ value: erasing value. Default is 0. If a single int, it is used to
+ erase all pixels. If a tuple of length 3, it is used to erase
+ R, G, B channels respectively.
+ If a str of 'random', erasing each pixel with random values.
+ inplace: boolean to make this transform inplace. Default set to False.
+
+ Returns:
+ Erased Image.
+
+ # Examples:
+ >>> transform = transforms.Compose([
+ >>> transforms.RandomHorizontalFlip(),
+ >>> transforms.ToTensor(),
+ >>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+ >>> transforms.RandomErasing(),
+ >>> ])
+ """
+
+ def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False):
+ assert isinstance(value, (numbers.Number, str, tuple, list))
+ if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+ warnings.warn("range should be of kind (min, max)")
+ if scale[0] < 0 or scale[1] > 1:
+ raise ValueError("range of scale should be between 0 and 1")
+ if p < 0 or p > 1:
+ raise ValueError("range of random erasing probability should be between 0 and 1")
+
+ self.p = p
+ self.scale = scale
+ self.ratio = ratio
+ self.value = value
+ self.inplace = inplace
+
+ @staticmethod
+ def get_params(img, scale, ratio, value=0):
+ """Get parameters for ``erase`` for a random erasing.
+
+ Args:
+ img (Tensor): Tensor image of size (C, H, W) to be erased.
+ scale: range of proportion of erased area against input image.
+ ratio: range of aspect ratio of erased area.
+
+ Returns:
+ tuple: params (i, j, h, w, v) to be passed to ``erase`` for random erasing.
+ """
+ img_c, img_h, img_w = img.shape
+ area = img_h * img_w
+
+ for _ in range(10):
+ erase_area = random.uniform(scale[0], scale[1]) * area
+ aspect_ratio = random.uniform(ratio[0], ratio[1])
+
+ h = int(round(math.sqrt(erase_area * aspect_ratio)))
+ w = int(round(math.sqrt(erase_area / aspect_ratio)))
+
+ if h < img_h and w < img_w:
+ i = random.randint(0, img_h - h)
+ j = random.randint(0, img_w - w)
+ if isinstance(value, numbers.Number):
+ v = value
+ elif isinstance(value, torch._six.string_classes):
+ v = torch.empty([img_c, h, w], dtype=torch.float32).normal_()
+ elif isinstance(value, (list, tuple)):
+ v = torch.tensor(value, dtype=torch.float32).view(-1, 1, 1).expand(-1, h, w)
+ return i, j, h, w, v
+
+ # Return original image
+ return 0, 0, img_h, img_w, img
+
+ def __call__(self, img):
+ """
+ Args:
+ img (Tensor): Tensor image of size (C, H, W) to be erased.
+
+ Returns:
+ img (Tensor): Erased Tensor image.
+ """
+ if random.uniform(0, 1) < self.p:
+ x, y, h, w, v = self.get_params(img, scale=self.scale, ratio=self.ratio, value=self.value)
+ return F.erase(img, x, y, h, w, v, self.inplace)
+ return img
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py
new file mode 100644
index 0000000000..399dc3fcc5
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py
@@ -0,0 +1,309 @@
+import math
+import pathlib
+import warnings
+from typing import Union, Optional, List, Tuple, BinaryIO
+
+import numpy as np
+import torch
+from PIL import Image, ImageDraw, ImageFont, ImageColor
+
+__all__ = ["make_grid", "save_image", "draw_bounding_boxes", "draw_segmentation_masks"]
+
+
+@torch.no_grad()
+def make_grid(
+ tensor: Union[torch.Tensor, List[torch.Tensor]],
+ nrow: int = 8,
+ padding: int = 2,
+ normalize: bool = False,
+ value_range: Optional[Tuple[int, int]] = None,
+ scale_each: bool = False,
+ pad_value: int = 0,
+ **kwargs,
+) -> torch.Tensor:
+ """
+ Make a grid of images.
+
+ Args:
+ tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W)
+ or a list of images all of the same size.
+ nrow (int, optional): Number of images displayed in each row of the grid.
+ The final grid size is ``(B / nrow, nrow)``. Default: ``8``.
+ padding (int, optional): amount of padding. Default: ``2``.
+ normalize (bool, optional): If True, shift the image to the range (0, 1),
+ by the min and max values specified by ``value_range``. Default: ``False``.
+ value_range (tuple, optional): tuple (min, max) where min and max are numbers,
+ then these numbers are used to normalize the image. By default, min and max
+ are computed from the tensor.
+ scale_each (bool, optional): If ``True``, scale each image in the batch of
+ images separately rather than the (min, max) over all images. Default: ``False``.
+ pad_value (float, optional): Value for the padded pixels. Default: ``0``.
+
+ Returns:
+ grid (Tensor): the tensor containing grid of images.
+ """
+ if not (torch.is_tensor(tensor) or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))):
+ raise TypeError(f"tensor or list of tensors expected, got {type(tensor)}")
+
+ if "range" in kwargs.keys():
+ warning = "range will be deprecated, please use value_range instead."
+ warnings.warn(warning)
+ value_range = kwargs["range"]
+
+ # if list of tensors, convert to a 4D mini-batch Tensor
+ if isinstance(tensor, list):
+ tensor = torch.stack(tensor, dim=0)
+
+ if tensor.dim() == 2: # single image H x W
+ tensor = tensor.unsqueeze(0)
+ if tensor.dim() == 3: # single image
+ if tensor.size(0) == 1: # if single-channel, convert to 3-channel
+ tensor = torch.cat((tensor, tensor, tensor), 0)
+ tensor = tensor.unsqueeze(0)
+
+ if tensor.dim() == 4 and tensor.size(1) == 1: # single-channel images
+ tensor = torch.cat((tensor, tensor, tensor), 1)
+
+ if normalize is True:
+ tensor = tensor.clone() # avoid modifying tensor in-place
+ if value_range is not None:
+ assert isinstance(
+ value_range, tuple
+ ), "value_range has to be a tuple (min, max) if specified. min and max are numbers"
+
+ def norm_ip(img, low, high):
+ img.clamp_(min=low, max=high)
+ img.sub_(low).div_(max(high - low, 1e-5))
+
+ def norm_range(t, value_range):
+ if value_range is not None:
+ norm_ip(t, value_range[0], value_range[1])
+ else:
+ norm_ip(t, float(t.min()), float(t.max()))
+
+ if scale_each is True:
+ for t in tensor: # loop over mini-batch dimension
+ norm_range(t, value_range)
+ else:
+ norm_range(tensor, value_range)
+
+ if tensor.size(0) == 1:
+ return tensor.squeeze(0)
+
+ # make the mini-batch of images into a grid
+ nmaps = tensor.size(0)
+ xmaps = min(nrow, nmaps)
+ ymaps = int(math.ceil(float(nmaps) / xmaps))
+ height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding)
+ num_channels = tensor.size(1)
+ grid = tensor.new_full((num_channels, height * ymaps + padding, width * xmaps + padding), pad_value)
+ k = 0
+ for y in range(ymaps):
+ for x in range(xmaps):
+ if k >= nmaps:
+ break
+ # Tensor.copy_() is a valid method but seems to be missing from the stubs
+ # https://pytorch.org/docs/stable/tensors.html#torch.Tensor.copy_
+ grid.narrow(1, y * height + padding, height - padding).narrow( # type: ignore[attr-defined]
+ 2, x * width + padding, width - padding
+ ).copy_(tensor[k])
+ k = k + 1
+ return grid
+
+
+@torch.no_grad()
+def save_image(
+ tensor: Union[torch.Tensor, List[torch.Tensor]],
+ fp: Union[str, pathlib.Path, BinaryIO],
+ format: Optional[str] = None,
+ **kwargs,
+) -> None:
+ """
+ Save a given Tensor into an image file.
+
+ Args:
+ tensor (Tensor or list): Image to be saved. If given a mini-batch tensor,
+ saves the tensor as a grid of images by calling ``make_grid``.
+ fp (string or file object): A filename or a file object
+ format(Optional): If omitted, the format to use is determined from the filename extension.
+ If a file object was used instead of a filename, this parameter should always be used.
+ **kwargs: Other arguments are documented in ``make_grid``.
+ """
+
+ grid = make_grid(tensor, **kwargs)
+ # Add 0.5 after unnormalizing to [0, 255] to round to nearest integer
+ ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to("cpu", torch.uint8).numpy()
+ im = Image.fromarray(ndarr)
+ im.save(fp, format=format)
+
+
+@torch.no_grad()
+def draw_bounding_boxes(
+ image: torch.Tensor,
+ boxes: torch.Tensor,
+ labels: Optional[List[str]] = None,
+ colors: Optional[Union[List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]]] = None,
+ fill: Optional[bool] = False,
+ width: int = 1,
+ font: Optional[str] = None,
+ font_size: int = 10,
+) -> torch.Tensor:
+
+ """
+ Draws bounding boxes on given image.
+ The values of the input image should be uint8 between 0 and 255.
+ If fill is True, Resulting Tensor should be saved as PNG image.
+
+ Args:
+ image (Tensor): Tensor of shape (C x H x W) and dtype uint8.
+ boxes (Tensor): Tensor of size (N, 4) containing bounding boxes in (xmin, ymin, xmax, ymax) format. Note that
+ the boxes are absolute coordinates with respect to the image. In other words: `0 <= xmin < xmax < W` and
+ `0 <= ymin < ymax < H`.
+ labels (List[str]): List containing the labels of bounding boxes.
+ colors (color or list of colors, optional): List containing the colors
+ of the boxes or single color for all boxes. The color can be represented as
+ PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``.
+ fill (bool): If `True` fills the bounding box with specified color.
+ width (int): Width of bounding box.
+ font (str): A filename containing a TrueType font. If the file is not found in this filename, the loader may
+ also search in other directories, such as the `fonts/` directory on Windows or `/Library/Fonts/`,
+ `/System/Library/Fonts/` and `~/Library/Fonts/` on macOS.
+ font_size (int): The requested font size in points.
+
+ Returns:
+ img (Tensor[C, H, W]): Image Tensor of dtype uint8 with bounding boxes plotted.
+ """
+
+ if not isinstance(image, torch.Tensor):
+ raise TypeError(f"Tensor expected, got {type(image)}")
+ elif image.dtype != torch.uint8:
+ raise ValueError(f"Tensor uint8 expected, got {image.dtype}")
+ elif image.dim() != 3:
+ raise ValueError("Pass individual images, not batches")
+ elif image.size(0) not in {1, 3}:
+ raise ValueError("Only grayscale and RGB images are supported")
+
+ if image.size(0) == 1:
+ image = torch.tile(image, (3, 1, 1))
+
+ ndarr = image.permute(1, 2, 0).numpy()
+ img_to_draw = Image.fromarray(ndarr)
+
+ img_boxes = boxes.to(torch.int64).tolist()
+
+ if fill:
+ draw = ImageDraw.Draw(img_to_draw, "RGBA")
+
+ else:
+ draw = ImageDraw.Draw(img_to_draw)
+
+ txt_font = ImageFont.load_default() if font is None else ImageFont.truetype(font=font, size=font_size)
+
+ for i, bbox in enumerate(img_boxes):
+ if colors is None:
+ color = None
+ elif isinstance(colors, list):
+ color = colors[i]
+ else:
+ color = colors
+
+ if fill:
+ if color is None:
+ fill_color = (255, 255, 255, 100)
+ elif isinstance(color, str):
+ # This will automatically raise Error if rgb cannot be parsed.
+ fill_color = ImageColor.getrgb(color) + (100,)
+ elif isinstance(color, tuple):
+ fill_color = color + (100,)
+ draw.rectangle(bbox, width=width, outline=color, fill=fill_color)
+ else:
+ draw.rectangle(bbox, width=width, outline=color)
+
+ if labels is not None:
+ margin = width + 1
+ draw.text((bbox[0] + margin, bbox[1] + margin), labels[i], fill=color, font=txt_font)
+
+ return torch.from_numpy(np.array(img_to_draw)).permute(2, 0, 1).to(dtype=torch.uint8)
+
+
+@torch.no_grad()
+def draw_segmentation_masks(
+ image: torch.Tensor,
+ masks: torch.Tensor,
+ alpha: float = 0.8,
+ colors: Optional[Union[List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]]] = None,
+) -> torch.Tensor:
+
+ """
+ Draws segmentation masks on given RGB image.
+ The values of the input image should be uint8 between 0 and 255.
+
+ Args:
+ image (Tensor): Tensor of shape (3, H, W) and dtype uint8.
+ masks (Tensor): Tensor of shape (num_masks, H, W) or (H, W) and dtype bool.
+ alpha (float): Float number between 0 and 1 denoting the transparency of the masks.
+ 0 means full transparency, 1 means no transparency.
+ colors (color or list of colors, optional): List containing the colors
+ of the masks or single color for all masks. The color can be represented as
+ PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``.
+ By default, random colors are generated for each mask.
+
+ Returns:
+ img (Tensor[C, H, W]): Image Tensor, with segmentation masks drawn on top.
+ """
+
+ if not isinstance(image, torch.Tensor):
+ raise TypeError(f"The image must be a tensor, got {type(image)}")
+ elif image.dtype != torch.uint8:
+ raise ValueError(f"The image dtype must be uint8, got {image.dtype}")
+ elif image.dim() != 3:
+ raise ValueError("Pass individual images, not batches")
+ elif image.size()[0] != 3:
+ raise ValueError("Pass an RGB image. Other Image formats are not supported")
+ if masks.ndim == 2:
+ masks = masks[None, :, :]
+ if masks.ndim != 3:
+ raise ValueError("masks must be of shape (H, W) or (batch_size, H, W)")
+ if masks.dtype != torch.bool:
+ raise ValueError(f"The masks must be of dtype bool. Got {masks.dtype}")
+ if masks.shape[-2:] != image.shape[-2:]:
+ raise ValueError("The image and the masks must have the same height and width")
+
+ num_masks = masks.size()[0]
+ if colors is not None and num_masks > len(colors):
+ raise ValueError(f"There are more masks ({num_masks}) than colors ({len(colors)})")
+
+ if colors is None:
+ colors = _generate_color_palette(num_masks)
+
+ if not isinstance(colors, list):
+ colors = [colors]
+ if not isinstance(colors[0], (tuple, str)):
+ raise ValueError("colors must be a tuple or a string, or a list thereof")
+ if isinstance(colors[0], tuple) and len(colors[0]) != 3:
+ raise ValueError("It seems that you passed a tuple of colors instead of a list of colors")
+
+ out_dtype = torch.uint8
+
+ colors_ = []
+ for color in colors:
+ if isinstance(color, str):
+ color = ImageColor.getrgb(color)
+ colors_.append(torch.tensor(color, dtype=out_dtype))
+
+ img_to_draw = image.detach().clone()
+ # TODO: There might be a way to vectorize this
+ for mask, color in zip(masks, colors_):
+ img_to_draw[:, mask] = color[:, None]
+
+ out = image * (1 - alpha) + img_to_draw * alpha
+ return out.to(out_dtype)
+
+
+def _generate_color_palette(num_masks: int):
+ palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
+ return [tuple((i * palette) % 255) for i in range(num_masks)]
+
+
+def _log_api_usage_once(obj: object) -> None:
+ torch._C._log_api_usage_once(f"{obj.__module__}.{obj.__class__.__name__}")
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py
new file mode 100644
index 0000000000..1a773b3fd2
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py
@@ -0,0 +1,109 @@
+import torch
+import math
+irange = range
+
+
+def make_grid(tensor, nrow=8, padding=2,
+ normalize=False, range=None, scale_each=False, pad_value=0):
+ """Make a grid of images.
+
+ Args:
+ tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W)
+ or a list of images all of the same size.
+ nrow (int, optional): Number of images displayed in each row of the grid.
+ The final grid size is ``(B / nrow, nrow)``. Default: ``8``.
+ padding (int, optional): amount of padding. Default: ``2``.
+ normalize (bool, optional): If True, shift the image to the range (0, 1),
+ by the min and max values specified by :attr:`range`. Default: ``False``.
+ range (tuple, optional): tuple (min, max) where min and max are numbers,
+ then these numbers are used to normalize the image. By default, min and max
+ are computed from the tensor.
+ scale_each (bool, optional): If ``True``, scale each image in the batch of
+ images separately rather than the (min, max) over all images. Default: ``False``.
+ pad_value (float, optional): Value for the padded pixels. Default: ``0``.
+
+ Example:
+ See this notebook `here `_
+
+ """
+ if not (torch.is_tensor(tensor) or
+ (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))):
+ raise TypeError('tensor or list of tensors expected, got {}'.format(type(tensor)))
+
+ # if list of tensors, convert to a 4D mini-batch Tensor
+ if isinstance(tensor, list):
+ tensor = torch.stack(tensor, dim=0)
+
+ if tensor.dim() == 2: # single image H x W
+ tensor = tensor.unsqueeze(0)
+ if tensor.dim() == 3: # single image
+ if tensor.size(0) == 1: # if single-channel, convert to 3-channel
+ tensor = torch.cat((tensor, tensor, tensor), 0)
+ tensor = tensor.unsqueeze(0)
+
+ if tensor.dim() == 4 and tensor.size(1) == 1: # single-channel images
+ tensor = torch.cat((tensor, tensor, tensor), 1)
+
+ if normalize is True:
+ tensor = tensor.clone() # avoid modifying tensor in-place
+ if range is not None:
+ assert isinstance(range, tuple), \
+ "range has to be a tuple (min, max) if specified. min and max are numbers"
+
+ def norm_ip(img, min, max):
+ img.clamp_(min=min, max=max)
+ img.add_(-min).div_(max - min + 1e-5)
+
+ def norm_range(t, range):
+ if range is not None:
+ norm_ip(t, range[0], range[1])
+ else:
+ norm_ip(t, float(t.min()), float(t.max()))
+
+ if scale_each is True:
+ for t in tensor: # loop over mini-batch dimension
+ norm_range(t, range)
+ else:
+ norm_range(tensor, range)
+
+ if tensor.size(0) == 1:
+ return tensor.squeeze(0)
+
+ # make the mini-batch of images into a grid
+ nmaps = tensor.size(0)
+ xmaps = min(nrow, nmaps)
+ ymaps = int(math.ceil(float(nmaps) / xmaps))
+ height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding)
+ num_channels = tensor.size(1)
+ grid = tensor.new_full((num_channels, height * ymaps + padding, width * xmaps + padding), pad_value)
+ k = 0
+ for y in irange(ymaps):
+ for x in irange(xmaps):
+ if k >= nmaps:
+ break
+ grid.narrow(1, y * height + padding, height - padding)\
+ .narrow(2, x * width + padding, width - padding)\
+ .copy_(tensor[k])
+ k = k + 1
+ return grid
+
+
+def save_image(tensor, fp, nrow=8, padding=2,
+ normalize=False, range=None, scale_each=False, pad_value=0, format=None):
+ """Save a given Tensor into an image file.
+
+ Args:
+ tensor (Tensor or list): Image to be saved. If given a mini-batch tensor,
+ saves the tensor as a grid of images by calling ``make_grid``.
+ fp (string or file object): A filename or a file object
+ format(Optional): If omitted, the format to use is determined from the filename extension.
+ If a file object was used instead of a filename, this parameter should always be used.
+ **kwargs: Other arguments are documented in ``make_grid``.
+ """
+ from PIL import Image
+ grid = make_grid(tensor, nrow=nrow, padding=padding, pad_value=pad_value,
+ normalize=normalize, range=range, scale_each=scale_each)
+ # Add 0.5 after unnormalizing to [0, 255] to round to nearest integer
+ ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
+ im = Image.fromarray(ndarr)
+ im.save(fp, format=format)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py
new file mode 100644
index 0000000000..146fc171ca
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py
@@ -0,0 +1,5 @@
+__version__ = '0.6.0'
+git_version = '82fd1c85d7e42d93255ed01f763ca40d58f288e3'
+from torchvision.extension import _check_cuda_version
+if _check_cuda_version() > 0:
+ cuda = _check_cuda_version()
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/train.py b/PyTorch/contrib/cv/classification/SSDLite320/train.py
new file mode 100644
index 0000000000..ba2de2a72d
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/train.py
@@ -0,0 +1,281 @@
+r"""PyTorch Detection Training.
+
+To run in a multi-gpu environment, use the distributed launcher::
+
+python -m torch.distributed.launch --nproc_per_node=8 train.py --world-size 8 --dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660 --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 512 --weight-decay 0.00004 --data-augmentation ssdlite
+
+python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py --dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660 --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24 --weight-decay 0.00004 --data-augmentation ssdlite
+
+ python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \
+ train.py ... --world-size $NGPU
+
+The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu.
+ --lr 0.02 --batch-size 2 --world-size 8
+If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU.
+
+On top of that, for training Faster/Mask R-CNN, the default hyperparameters are
+ --epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3
+
+Also, if you train Keypoint R-CNN, the default hyperparameters are
+ --epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3
+Because the number of images is smaller in the person keypoint subset of COCO,
+the number of epochs should be adapted so that we have the same number of iterations.
+
+跑通1(py37):
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 python train.py --dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660 --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24 --weight-decay 0.00004 --data-augmentation ssdlite
+Not using distributed mode (单卡跑通的)
+
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 python train.py --test-only (单卡跑通)
+
+
+torchrun --nproc_per_node=8 train.py --dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660 --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24\
+ --weight-decay 0.00004 --data-augmentation ssdlite
+
+"""
+import datetime
+import os
+import sys
+import time
+
+import presets
+import torch
+import torch.utils.data
+import torchvision
+import torchvision.models.detection
+import torchvision.models.detection.mask_rcnn
+import torchvision.models.detection
+import utils
+from coco_utils import get_coco, get_coco_kp
+from engine import train_one_epoch, evaluate
+from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
+
+from torch.utils.data import dataloader
+from apex import amp
+
+os.environ['MASTER_ADDR'] = '127.0.0.1'
+os.environ['MASTER_PORT'] = '29688'
+
+# for servers to immediately record the logs
+def flush_print(func):
+ def new_print(*args, **kwargs):
+ func(*args, **kwargs)
+ sys.stdout.flush()
+ return new_print
+print = flush_print(print)
+
+def get_dataset(name, image_set, transform, data_path):
+ paths = {"coco": (data_path, get_coco, 91), "coco_kp": (data_path, get_coco_kp, 2)}
+ p, ds_fn, num_classes = paths[name]
+
+ ds = ds_fn(p, image_set=image_set, transforms=transform)
+ return ds, num_classes
+
+def get_transform(train, data_augmentation):
+ return presets.DetectionPresetTrain(data_augmentation) if train else presets.DetectionPresetEval()
+
+
+def get_args_parser(add_help=True):
+ import argparse
+
+ parser = argparse.ArgumentParser(description="PyTorch Detection Training", add_help=add_help)
+
+ parser.add_argument("--data-path", default="", type=str, help="dataset path")#"/datasets01/COCO/022719/"
+ parser.add_argument("--dataset", default="coco", type=str, help="dataset name")
+ parser.add_argument("--model", default="ssdlite320_mobilenet_v3_large", type=str, help="model name")#"maskrcnn_resnet50_fpn"
+ parser.add_argument("--device", default="npu", type=str, help="device (Use cuda or cpu Default: cuda)")
+ parser.add_argument(
+ "-b", "--batch-size", default=128, type=int, help="images per gpu, the total batch size is $NGPU x batch_size"
+ )
+ parser.add_argument("--epochs", default=600, type=int, metavar="N", help="number of total epochs to run")
+ parser.add_argument(
+ "-j", "--workers", default=4, type=int, metavar="N", help="number of data loading workers (default: 4)" #4
+ )
+ parser.add_argument(
+ "--lr",
+ default=0.15,
+ type=float,
+ help="initial learning rate, 0.02 is the default value for training on 8 gpus and 2 images_per_gpu",
+ )
+ parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum")
+ parser.add_argument(
+ "--wd",
+ "--weight-decay",
+ default=0.00004, #1e-4,
+ type=float,
+ metavar="W",
+ help="weight decay (default: 1e-4)",
+ dest="weight_decay",
+ )
+ parser.add_argument(
+ "--lr-scheduler", default="cosineannealinglr", type=str, help="name of lr scheduler (default: multisteplr)" #multisteplr
+ )
+ parser.add_argument(
+ "--lr-step-size", default=8, type=int, help="decrease lr every step-size epochs (multisteplr scheduler only)"
+ )
+ parser.add_argument(
+ "--lr-steps",
+ default=[16, 22],
+ nargs="+",
+ type=int,
+ help="decrease lr every step-size epochs (multisteplr scheduler only)",
+ )
+ parser.add_argument(
+ "--lr-gamma", default=0.1, type=float, help="decrease lr by a factor of lr-gamma (multisteplr scheduler only)"
+ )
+ parser.add_argument("--print-freq", default=1, type=int, help="print frequency")
+ parser.add_argument("--output-dir", default="./multigpu", type=str, help="path to save outputs")
+ # parser.add_argument("--resume", default="./ssdlite320_mobilenet_v3_large_coco-a79551df.pth",
+ # type=str, help="path of checkpoint")#model_128.pth
+ parser.add_argument("--resume", default="./multigpu/model_599.pth", type=str, help="path of checkpoint")
+
+ parser.add_argument("--start_epoch", default=0, type=int, help="start epoch")
+ parser.add_argument("--aspect-ratio-group-factor", default=3, type=int)
+ parser.add_argument("--rpn-score-thresh", default=None, type=float, help="rpn score threshold for faster-rcnn")
+ parser.add_argument(
+ "--trainable-backbone-layers", default=None, type=int, help="number of trainable layers of backbone"
+ )
+ parser.add_argument(
+ "--data-augmentation", default="ssdlite", type=str, help="data augmentation policy (default: hflip)" #hflip
+ )
+ parser.add_argument(
+ "--sync-bn",
+ dest="sync_bn",
+ help="Use sync batch norm",
+ action="store_true",
+ )
+ parser.add_argument(
+ "--test-only",
+ dest="test_only",
+ help="Only test the model",
+ action="store_true",
+ )
+ parser.add_argument(
+ "--pretrained",
+ dest="pretrained",
+ help="Use pre-trained models from the modelzoo",
+ action="store_true",
+ )
+
+ # distributed training parameters
+ parser.add_argument("--world-size", default=20, type=int, help="number of distributed processes")
+ parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training")
+
+ # Mixed precision training parameters
+ parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training")
+
+ return parser
+
+
+def main(args):
+ if args.output_dir:
+ utils.mkdir(args.output_dir)
+
+ utils.init_distributed_mode(args)
+ print(args)
+
+ device = torch.device(args.device)
+
+ # Data loading code
+ print("Loading data")
+
+ dataset, num_classes = get_dataset(
+ args.dataset, "train", get_transform(True, args.data_augmentation), args.data_path
+ )
+ dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args.data_augmentation), args.data_path)
+
+ print("Creating data loaders")
+ if args.distributed:
+ train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
+ test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
+ else:
+ train_sampler = torch.utils.data.RandomSampler(dataset)
+ test_sampler = torch.utils.data.SequentialSampler(dataset_test)
+
+ if args.aspect_ratio_group_factor >= 0:
+ group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
+ train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
+ else:
+ train_batch_sampler = torch.utils.data.BatchSampler(train_sampler, args.batch_size, drop_last=True)
+
+ data_loader = torch.utils.data.DataLoader(
+ dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=utils.collate_fn
+ )
+
+ data_loader_test = torch.utils.data.DataLoader(
+ dataset_test, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn
+ )
+
+ print("Creating model")
+ kwargs = {"trainable_backbone_layers": args.trainable_backbone_layers}
+
+ model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(pretrained=False)
+
+ model.to(device)
+
+ params = [p for p in model.parameters() if p.requires_grad]
+ optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
+
+ model, optimizer = amp.initialize(model, optimizer, opt_level="O1", loss_scale=128.0, combine_grad=True)
+
+ if args.distributed and args.sync_bn:
+ model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
+
+ model_without_ddp = model
+ if args.distributed:
+ model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
+ model_without_ddp = model.module
+
+ args.lr_scheduler = args.lr_scheduler.lower()
+ if args.lr_scheduler == "multisteplr":
+ lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)
+ elif args.lr_scheduler == "cosineannealinglr":
+ lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
+ else:
+ raise RuntimeError(
+ f"Invalid lr scheduler '{args.lr_scheduler}'. Only MultiStepLR and CosineAnnealingLR are supported."
+ )
+
+ if args.resume:
+ print(args.resume)
+ checkpoint = torch.load(args.resume, map_location="cpu")
+ model_without_ddp.load_state_dict(checkpoint["model"])
+ optimizer.load_state_dict(checkpoint["optimizer"])
+ lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
+ args.start_epoch = checkpoint["epoch"] + 1
+ amp.load_state_dict(checkpoint['amp'])
+
+ if args.test_only:
+ evaluate(model_without_ddp, data_loader_test, device=device)
+ return
+
+ print("Start training",'args.distributed:',args.distributed)
+ start_time = time.time()
+ for epoch in range(0, 1):
+ if args.distributed:
+ train_sampler.set_epoch(epoch)
+
+ train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
+ lr_scheduler.step()
+ if args.output_dir:
+ checkpoint = {
+ "model": model_without_ddp.state_dict(),
+ "optimizer": optimizer.state_dict(),
+ "lr_scheduler": lr_scheduler.state_dict(),
+ "args": args,
+ "epoch": epoch,
+ 'amp': amp.state_dict()
+ }
+ utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth"))
+ utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth"))
+
+ # evaluate after every epoch
+ evaluate(model, data_loader_test, device=device)
+
+ total_time = time.time() - start_time
+ total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+ print(f"Training time {total_time_str}")
+
+
+if __name__ == "__main__":
+ args = get_args_parser().parse_args()
+ main(args)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py b/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py
new file mode 100644
index 0000000000..f0683a1a37
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py
@@ -0,0 +1,302 @@
+import math
+from typing import List, Tuple, Dict, Optional
+
+import torch
+import torchvision
+import pdb
+import numpy as np
+from torch import nn, Tensor
+from image_list import ImageList
+
+
+@torch.jit.unused
+def _get_shape_onnx(image: Tensor) -> Tensor:
+ from torch.onnx import operators
+
+ return operators.shape_as_tensor(image)[-2:]
+
+
+@torch.jit.unused
+def _fake_cast_onnx(v: Tensor) -> float:
+ # ONNX requires a tensor but here we fake its type for JIT.
+ return v
+
+
+def _resize_image_and_masks(
+ image: Tensor,
+ self_min_size: float,
+ self_max_size: float,
+ target: Optional[Dict[str, Tensor]] = None,
+ fixed_size: Optional[Tuple[int, int]] = None,
+) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
+ if torchvision._is_tracing():
+ im_shape = _get_shape_onnx(image)
+ else:
+ im_shape = torch.tensor(image.shape[-2:])
+
+ size: Optional[List[int]] = None
+ scale_factor: Optional[float] = None
+ recompute_scale_factor: Optional[bool] = None
+ if fixed_size is not None:
+ size = [fixed_size[1], fixed_size[0]]
+ else:
+ min_size = torch.min(im_shape).to(dtype=torch.float32)
+ max_size = torch.max(im_shape).to(dtype=torch.float32)
+ scale = torch.min(self_min_size / min_size, self_max_size / max_size)
+
+ if torchvision._is_tracing():
+ scale_factor = _fake_cast_onnx(scale)
+ else:
+ scale_factor = scale.item()
+ recompute_scale_factor = True
+
+ image = torch.nn.functional.interpolate(
+ image[None],
+ size=size,
+ scale_factor=scale_factor,
+ mode="bilinear",
+ recompute_scale_factor=recompute_scale_factor,
+ align_corners=False,
+ )[0]
+
+ if target is None:
+ return image, target
+
+ if "masks" in target:
+ mask = target["masks"]
+ mask = torch.nn.functional.interpolate(
+ mask[:, None].float(), size=size, scale_factor=scale_factor, recompute_scale_factor=recompute_scale_factor
+ )[:, 0].byte()
+ target["masks"] = mask
+ return image, target
+
+
+class GeneralizedRCNNTransform(nn.Module):
+ """
+ Performs input / target transformation before feeding the data to a GeneralizedRCNN
+ model.
+
+ The transformations it perform are:
+ - input normalization (mean subtraction and std division)
+ - input / target resizing to match min_size / max_size
+
+ It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets
+ """
+
+ def __init__(
+ self,
+ device: 'cuda',
+ min_size: int,
+ max_size: int,
+ image_mean: List[float],
+ image_std: List[float],
+ size_divisible: int = 32,
+ fixed_size: Optional[Tuple[int, int]] = None,
+ ):
+ super().__init__()
+ if not isinstance(min_size, (list, tuple)):
+ min_size = (min_size,)
+ self.min_size = min_size
+ self.max_size = max_size
+ self.image_mean = image_mean
+ self.image_std = image_std
+ self.size_divisible = size_divisible
+ self.fixed_size = fixed_size
+ self.device = device
+
+ def forward(
+ self, images: List[Tensor], targets: Optional[List[Dict[str, Tensor]]] = None
+ ) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]:
+ images = [img for img in images]
+ if targets is not None:
+ # make a copy of targets to avoid modifying it in-place
+ # once torchscript supports dict comprehension
+ # this can be simplified as follows
+ # targets = [{k: v for k,v in t.items()} for t in targets]
+ targets_copy: List[Dict[str, Tensor]] = []
+ for t in targets:
+ data: Dict[str, Tensor] = {}
+ for k, v in t.items():
+ data[k] = v
+ targets_copy.append(data)
+ targets = targets_copy
+ for i in range(len(images)):
+ image = images[i]
+ target_index = targets[i] if targets is not None else None
+
+ if image.dim() != 3:
+ raise ValueError(f"images is expected to be a list of 3d tensors of shape [C, H, W], got {image.shape}")
+ image = self.normalize(image)
+ image, target_index = self.resize(image, target_index)
+
+ image = image.to(self.device)
+ images[i] = image
+ if targets is not None and target_index is not None:
+ targets[i] = target_index
+ image_sizes = [img.shape[-2:] for img in images]
+ images = self.batch_images(images, size_divisible=self.size_divisible)
+ image_sizes_list: List[Tuple[int, int]] = []
+ for image_size in image_sizes:
+ assert len(image_size) == 2
+ image_sizes_list.append((image_size[0], image_size[1]))
+
+ image_list = ImageList(images, image_sizes_list)
+ return image_list, targets
+
+ def normalize(self, image: Tensor) -> Tensor:
+ if not image.is_floating_point():
+ raise TypeError(
+ f"Expected input images to be of floating type (in range [0, 1]), "
+ f"but found type {image.dtype} instead"
+ )
+ dtype, device = image.dtype, image.device
+ mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)
+ std = torch.as_tensor(self.image_std, dtype=dtype, device=device)
+ return (image - mean[:, None, None]) / std[:, None, None]
+
+ def torch_choice(self, k: List[int]) -> int:
+ """
+ Implements `random.choice` via torch ops so it can be compiled with
+ TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803
+ is fixed.
+ """
+ index = int(torch.empty(1).uniform_(0.0, float(len(k))).item())
+ return k[index]
+
+ def resize(
+ self,
+ image: Tensor,
+ target: Optional[Dict[str, Tensor]] = None,
+ ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
+ h, w = image.shape[-2:]
+ if self.training:
+ size = float(self.torch_choice(self.min_size))
+ else:
+ # FIXME assume for now that testing uses the largest scale
+ size = float(self.min_size[-1])
+ image, target = _resize_image_and_masks(image, size, float(self.max_size), target, self.fixed_size)
+
+ if target is None:
+ return image, target
+
+ # 填充值
+ # 固定ground_box数量
+ max_boxes = 20
+ classes = 0
+ target = fix_target(target, max_boxes, classes)
+
+ bbox = target["boxes"]
+ bbox = resize_boxes(bbox, (h, w), image.shape[-2:])
+ target["boxes"] = bbox
+
+ if "keypoints" in target:
+ keypoints = target["keypoints"]
+ keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:])
+ target["keypoints"] = keypoints
+ return image, target
+
+ # _onnx_batch_images() is an implementation of
+ # batch_images() that is supported by ONNX tracing.
+ @torch.jit.unused
+ def _onnx_batch_images(self, images: List[Tensor], size_divisible: int = 32) -> Tensor:
+ max_size = []
+ for i in range(images[0].dim()):
+ max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64)
+ max_size.append(max_size_i)
+ stride = size_divisible
+ max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64)
+ max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64)
+ max_size = tuple(max_size)
+
+ # work around for
+ # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+ # which is not yet supported in onnx
+ padded_imgs = []
+ for img in images:
+ padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
+ padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
+ padded_imgs.append(padded_img)
+
+ return torch.stack(padded_imgs)
+
+ def max_by_axis(self, the_list: List[List[int]]) -> List[int]:
+ maxes = the_list[0]
+ for sublist in the_list[1:]:
+ for index, item in enumerate(sublist):
+ maxes[index] = max(maxes[index], item)
+ return maxes
+
+ def batch_images(self, images: List[Tensor], size_divisible: int = 32) -> Tensor:
+ if torchvision._is_tracing():
+ # batch_images() does not export well to ONNX
+ # call _onnx_batch_images() instead
+ return self._onnx_batch_images(images, size_divisible)
+
+ max_size = self.max_by_axis([list(img.shape) for img in images])
+ stride = float(size_divisible)
+ max_size = list(max_size)
+ max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)
+ max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride)
+
+ batch_shape = [len(images)] + max_size
+ batched_imgs = images[0].new_full(batch_shape, 0)
+ for i in range(batched_imgs.shape[0]):
+ img = images[i]
+ batched_imgs[i, : img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+
+ return batched_imgs
+
+def resize_boxes(boxes: Tensor, original_size: List[int], new_size: List[int]) -> Tensor:
+ ratios = [
+ torch.tensor(s, dtype=torch.float32, device=boxes.device)
+ / torch.tensor(s_orig, dtype=torch.float32, device=boxes.device)
+ for s, s_orig in zip(new_size, original_size)
+ ]
+ ratio_height, ratio_width = ratios
+ xmin, ymin, xmax, ymax = boxes.unbind(1)
+
+ xmin = xmin * ratio_width
+ xmax = xmax * ratio_width
+ ymin = ymin * ratio_height
+ ymax = ymax * ratio_height
+ return torch.stack((xmin, ymin, xmax, ymax), dim=1)
+
+def fix_target(target, max_boxes, classes):
+ target_pad = []
+ boxes_num = target['boxes'].shape[0]
+ if boxes_num < max_boxes:
+ diff_num = max_boxes - boxes_num
+ # box对齐
+ np_boxes = target['boxes'].numpy()
+ np_boxes = np.concatenate((np_boxes, np.zeros([diff_num, 4])), axis=0)
+ target['boxes'] = torch.as_tensor(np_boxes,dtype=torch.float)
+ # label对齐
+ padding_label = torch.zeros(diff_num) + classes
+ np_padding_label = padding_label.long().numpy()
+ np_labels = target['labels'].numpy()
+ np_labels = np.concatenate((np_labels, np_padding_label), axis=0)
+ target['labels'] = torch.from_numpy(np_labels)
+ # mask对齐
+ padding_mask = target['masks'][0].unsqueeze(0)
+ np_padding_mask = padding_mask.numpy()
+ np_masks = target['masks'].numpy()
+ np_masks = np.concatenate((np_masks, np.tile(np_padding_mask, (diff_num, 1, 1))), axis=0)
+ target['masks'] = torch.from_numpy(np_masks)
+ # area对齐
+ np_area = target['area'].numpy()
+ np_area = np.concatenate((np_area, np.zeros(diff_num)), axis=0)
+ target['area'] = torch.as_tensor(np_area, dtype=torch.float)
+ # iscrowd对齐
+ padding_iscrowd = torch.zeros(diff_num).long()
+ np_padding_iscrowd = padding_iscrowd.numpy()
+ np_iscrowd = target['iscrowd'].numpy()
+ np_iscrowd = np.concatenate((np_iscrowd, np_padding_iscrowd), axis=0)
+ target['iscrowd'] = torch.from_numpy(np_iscrowd)
+ else:
+ select_idx = torch.randperm(boxes_num)[:max_boxes]
+ target['boxes'] = target['boxes'][select_idx]
+ target['labels'] = target['labels'][select_idx]
+ target['masks'] = target['masks'][select_idx]
+ target['area'] = target['area'][select_idx]
+ target['iscrowd'] = target['iscrowd'][select_idx]
+ return target
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/transforms.py b/PyTorch/contrib/cv/classification/SSDLite320/transforms.py
new file mode 100644
index 0000000000..9415efb893
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/transforms.py
@@ -0,0 +1,286 @@
+from typing import List, Tuple, Dict, Optional
+
+import torch
+import torchvision
+from torch import nn, Tensor
+from torchvision.transforms import functional as F
+from torchvision.transforms import transforms as T
+# from transforms import transforms as T
+
+
+
+def _flip_coco_person_keypoints(kps, width):
+ flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+ flipped_data = kps[:, flip_inds]
+ flipped_data[..., 0] = width - flipped_data[..., 0]
+ # Maintain COCO convention that if visibility == 0, then x, y = 0
+ inds = flipped_data[..., 2] == 0
+ flipped_data[inds] = 0
+ return flipped_data
+
+
+class Compose:
+ def __init__(self, transforms):
+ self.transforms = transforms
+
+ def __call__(self, image, target):
+ for t in self.transforms:
+ image, target = t(image, target)
+
+ return image, target
+
+
+class RandomHorizontalFlip(T.RandomHorizontalFlip):
+ def forward(
+ self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
+ ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
+ if torch.rand(1) < self.p:
+ image = F.hflip(image)
+ if target is not None:
+ width, _ = F.get_image_size(image)
+ target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]]
+ if "masks" in target:
+ target["masks"] = target["masks"].flip(-1)
+ if "keypoints" in target:
+ keypoints = target["keypoints"]
+ keypoints = _flip_coco_person_keypoints(keypoints, width)
+ target["keypoints"] = keypoints
+ return image, target
+
+
+class ToTensor(nn.Module):
+ def forward(
+ self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
+ ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
+ image = F.pil_to_tensor(image)
+ image = F.convert_image_dtype(image)
+ return image, target
+
+
+class PILToTensor(nn.Module):
+ def forward(
+ self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
+ ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
+ image = F.pil_to_tensor(image)
+ return image, target
+
+
+class ConvertImageDtype(nn.Module):
+ def __init__(self, dtype: torch.dtype) -> None:
+ super().__init__()
+ self.dtype = dtype
+
+ def forward(
+ self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
+ ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
+ image = F.convert_image_dtype(image, self.dtype)
+ return image, target
+
+
+class RandomIoUCrop(nn.Module):
+ def __init__(
+ self,
+ min_scale: float = 0.3,
+ max_scale: float = 1.0,
+ min_aspect_ratio: float = 0.5,
+ max_aspect_ratio: float = 2.0,
+ sampler_options: Optional[List[float]] = None,
+ trials: int = 40,
+ ):
+ super().__init__()
+ # Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174
+ self.min_scale = min_scale
+ self.max_scale = max_scale
+ self.min_aspect_ratio = min_aspect_ratio
+ self.max_aspect_ratio = max_aspect_ratio
+ if sampler_options is None:
+ sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0]
+ self.options = sampler_options
+ self.trials = trials
+
+ def forward(
+ self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
+ ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
+ if target is None:
+ raise ValueError("The targets can't be None for this transform.")
+
+ if isinstance(image, torch.Tensor):
+ if image.ndimension() not in {2, 3}:
+ raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.")
+ elif image.ndimension() == 2:
+ image = image.unsqueeze(0)
+
+ orig_w, orig_h = F.get_image_size(image)
+
+ while True:
+ # sample an option
+ idx = int(torch.randint(low=0, high=len(self.options), size=(1,)))
+ min_jaccard_overlap = self.options[idx]
+ if min_jaccard_overlap >= 1.0: # a value larger than 1 encodes the leave as-is option
+ return image, target
+
+ for _ in range(self.trials):
+ # check the aspect ratio limitations
+ r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2)
+ new_w = int(orig_w * r[0])
+ new_h = int(orig_h * r[1])
+ aspect_ratio = new_w / new_h
+ if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio):
+ continue
+
+ # check for 0 area crops
+ r = torch.rand(2)
+ left = int((orig_w - new_w) * r[0])
+ top = int((orig_h - new_h) * r[1])
+ right = left + new_w
+ bottom = top + new_h
+ if left == right or top == bottom:
+ continue
+
+ # check for any valid boxes with centers within the crop area
+ cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2])
+ cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3])
+ is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom)
+ if not is_within_crop_area.any():
+ continue
+
+ # check at least 1 box with jaccard limitations
+ boxes = target["boxes"][is_within_crop_area]
+ ious = torchvision.ops.boxes.box_iou(
+ boxes, torch.tensor([[left, top, right, bottom]], dtype=boxes.dtype, device=boxes.device)
+ )
+ if ious.max() < min_jaccard_overlap:
+ continue
+
+ # keep only valid boxes and perform cropping
+ target["boxes"] = boxes
+ target["labels"] = target["labels"][is_within_crop_area]
+ target["boxes"][:, 0::2] -= left
+ target["boxes"][:, 1::2] -= top
+ target["boxes"][:, 0::2].clamp_(min=0, max=new_w)
+ target["boxes"][:, 1::2].clamp_(min=0, max=new_h)
+ image = F.crop(image, top, left, new_h, new_w)
+
+ return image, target
+
+
+class RandomZoomOut(nn.Module):
+ def __init__(
+ self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5
+ ):
+ super().__init__()
+ if fill is None:
+ fill = [0.0, 0.0, 0.0]
+ self.fill = fill
+ self.side_range = side_range
+ if side_range[0] < 1.0 or side_range[0] > side_range[1]:
+ raise ValueError(f"Invalid canvas side range provided {side_range}.")
+ self.p = p
+
+ @torch.jit.unused
+ def _get_fill_value(self, is_pil):
+ # type: (bool) -> int
+ # We fake the type to make it work on JIT
+ return tuple(int(x) for x in self.fill) if is_pil else 0
+
+ def forward(
+ self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
+ ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
+ if isinstance(image, torch.Tensor):
+ if image.ndimension() not in {2, 3}:
+ raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.")
+ elif image.ndimension() == 2:
+ image = image.unsqueeze(0)
+
+ if torch.rand(1) < self.p:
+ return image, target
+
+ orig_w, orig_h = F.get_image_size(image)
+
+ r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
+ canvas_width = int(orig_w * r)
+ canvas_height = int(orig_h * r)
+
+ r = torch.rand(2)
+ left = int((canvas_width - orig_w) * r[0])
+ top = int((canvas_height - orig_h) * r[1])
+ right = canvas_width - (left + orig_w)
+ bottom = canvas_height - (top + orig_h)
+
+ if torch.jit.is_scripting():
+ fill = 0
+ else:
+ fill = self._get_fill_value(F._is_pil_image(image))
+
+ image = F.pad(image, [left, top, right, bottom], fill=fill)
+ if isinstance(image, torch.Tensor):
+ v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1)
+ image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h) :, :] = image[
+ ..., :, (left + orig_w) :
+ ] = v
+
+ if target is not None:
+ target["boxes"][:, 0::2] += left
+ target["boxes"][:, 1::2] += top
+
+ return image, target
+
+
+class RandomPhotometricDistort(nn.Module):
+ def __init__(
+ self,
+ contrast: Tuple[float] = (0.5, 1.5),
+ saturation: Tuple[float] = (0.5, 1.5),
+ hue: Tuple[float] = (-0.05, 0.05),
+ brightness: Tuple[float] = (0.875, 1.125),
+ p: float = 0.5,
+ ):
+ super().__init__()
+ self._brightness = T.ColorJitter(brightness=brightness)
+ self._contrast = T.ColorJitter(contrast=contrast)
+ self._hue = T.ColorJitter(hue=hue)
+ self._saturation = T.ColorJitter(saturation=saturation)
+ self.p = p
+
+ def forward(
+ self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
+ ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
+ if isinstance(image, torch.Tensor):
+ if image.ndimension() not in {2, 3}:
+ raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.")
+ elif image.ndimension() == 2:
+ image = image.unsqueeze(0)
+
+ r = torch.rand(7)
+
+ if r[0] < self.p:
+ image = self._brightness(image)
+
+ contrast_before = r[1] < 0.5
+ if contrast_before:
+ if r[2] < self.p:
+ image = self._contrast(image)
+
+ if r[3] < self.p:
+ image = self._saturation(image)
+
+ if r[4] < self.p:
+ image = self._hue(image)
+
+ if not contrast_before:
+ if r[5] < self.p:
+ image = self._contrast(image)
+
+ if r[6] < self.p:
+ channels = F.get_image_num_channels(image)
+ permutation = torch.randperm(channels)
+
+ is_pil = F._is_pil_image(image)
+ if is_pil:
+ image = F.pil_to_tensor(image)
+ image = F.convert_image_dtype(image)
+ image = image[..., permutation, :, :]
+ if is_pil:
+ image = F.to_pil_image(image)
+
+ return image, target
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/utils.py
new file mode 100644
index 0000000000..00bb4bbce1
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/utils.py
@@ -0,0 +1,288 @@
+import datetime
+import errno
+import os
+import time
+from collections import defaultdict, deque
+
+import torch
+import torch.distributed as dist
+
+
+class SmoothedValue:
+ """Track a series of values and provide access to smoothed values over a
+ window or the global series average.
+ """
+
+ def __init__(self, window_size=20, fmt=None):
+ if fmt is None:
+ fmt = "{median:.4f} ({global_avg:.4f})"
+ self.deque = deque(maxlen=window_size)
+ self.total = 0.0
+ self.count = 0
+ self.fmt = fmt
+
+ def update(self, value, n=1):
+ self.deque.append(value)
+ self.count += n
+ self.total += value * n
+
+ def synchronize_between_processes(self):
+ """
+ Warning: does not synchronize the deque!
+ """
+ if not is_dist_avail_and_initialized():
+ return
+ t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
+ dist.barrier()
+ dist.all_reduce(t)
+ t = t.tolist()
+ self.count = int(t[0])
+ self.total = t[1]
+
+ @property
+ def median(self):
+ d = torch.tensor(list(self.deque))
+ return d.median().item()
+
+ @property
+ def avg(self):
+ d = torch.tensor(list(self.deque), dtype=torch.float32)
+ return d.mean().item()
+
+ @property
+ def global_avg(self):
+ return self.total / self.count
+
+ @property
+ def max(self):
+ return max(self.deque)
+
+ @property
+ def value(self):
+ return self.deque[-1]
+
+ def __str__(self):
+ return self.fmt.format(
+ median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value
+ )
+
+
+def all_gather(data):
+ """
+ Run all_gather on arbitrary picklable data (not necessarily tensors)
+ Args:
+ data: any picklable object
+ Returns:
+ list[data]: list of data gathered from each rank
+ """
+ world_size = get_world_size()
+ if world_size == 1:
+ return [data]
+ data_list = [None] * world_size
+ dist.all_gather_object(data_list, data)
+ return data_list
+
+
+def reduce_dict(input_dict, average=True):
+ """
+ Args:
+ input_dict (dict): all the values will be reduced
+ average (bool): whether to do average or sum
+ Reduce the values in the dictionary from all processes so that all processes
+ have the averaged results. Returns a dict with the same fields as
+ input_dict, after reduction.
+ """
+ world_size = get_world_size()
+ if world_size < 2:
+ return input_dict
+ with torch.no_grad(): #torch.inference_mode():
+ names = []
+ values = []
+ # sort the keys so that they are consistent across processes
+ for k in sorted(input_dict.keys()):
+ names.append(k)
+ values.append(input_dict[k])
+ values = torch.stack(values, dim=0)
+ dist.all_reduce(values)
+ if average:
+ values /= world_size
+ reduced_dict = {k: v for k, v in zip(names, values)}
+ return reduced_dict
+
+
+class MetricLogger:
+ def __init__(self, delimiter="\t"):
+ self.meters = defaultdict(SmoothedValue)
+ self.delimiter = delimiter
+
+ def update(self, **kwargs):
+ for k, v in kwargs.items():
+ if isinstance(v, torch.Tensor):
+ v = v.item()
+ assert isinstance(v, (float, int))
+ self.meters[k].update(v)
+
+ def __getattr__(self, attr):
+ if attr in self.meters:
+ return self.meters[attr]
+ if attr in self.__dict__:
+ return self.__dict__[attr]
+ raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'")
+
+ def __str__(self):
+ loss_str = []
+ for name, meter in self.meters.items():
+ loss_str.append(f"{name}: {str(meter)}")
+ return self.delimiter.join(loss_str)
+
+ def synchronize_between_processes(self):
+ for meter in self.meters.values():
+ meter.synchronize_between_processes()
+
+ def add_meter(self, name, meter):
+ self.meters[name] = meter
+
+ def log_every(self, iterable, print_freq, header=None):
+ i = 0
+ if not header:
+ header = ""
+ start_time = time.time()
+ end = time.time()
+ iter_time = SmoothedValue(fmt="{avg:.4f}")
+ data_time = SmoothedValue(fmt="{avg:.4f}")
+ space_fmt = ":" + str(len(str(len(iterable)))) + "d"
+ if torch.cuda.is_available():
+ log_msg = self.delimiter.join(
+ [
+ header,
+ "[{0" + space_fmt + "}/{1}]",
+ "eta: {eta}",
+ "{meters}",
+ "time: {time}",
+ "data: {data}",
+ "max mem: {memory:.0f}",
+ ]
+ )
+ else:
+ log_msg = self.delimiter.join(
+ [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"]
+ )
+ MB = 1024.0 * 1024.0
+ for obj in iterable:
+ data_time.update(time.time() - end)
+ yield obj
+ iter_time.update(time.time() - end)
+ if i % print_freq == 0 or i == len(iterable) - 1:
+ eta_seconds = iter_time.global_avg * (len(iterable) - i)
+ eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+ if torch.cuda.is_available():
+ print(
+ log_msg.format(
+ i,
+ len(iterable),
+ eta=eta_string,
+ meters=str(self),
+ time=str(iter_time),
+ data=str(data_time),
+ memory=torch.cuda.max_memory_allocated() / MB,
+ )
+ )
+ else:
+ print(
+ log_msg.format(
+ i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time)
+ )
+ )
+ i += 1
+ end = time.time()
+ total_time = time.time() - start_time
+ total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+ print(f"{header} Total time: {total_time_str} ({total_time / len(iterable):.4f} s / it)")
+
+
+def collate_fn(batch):
+ return tuple(zip(*batch))
+
+
+def mkdir(path):
+ try:
+ os.makedirs(path)
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise
+
+
+def setup_for_distributed(is_master):
+ """
+ This function disables printing when not in master process
+ """
+ import builtins as __builtin__
+
+ builtin_print = __builtin__.print
+
+ def print(*args, **kwargs):
+ force = kwargs.pop("force", False)
+ if is_master or force:
+ builtin_print(*args, **kwargs)
+
+ __builtin__.print = print
+
+
+def is_dist_avail_and_initialized():
+ if not dist.is_available():
+ return False
+ if not dist.is_initialized():
+ return False
+ return True
+
+
+def get_world_size():
+ if not is_dist_avail_and_initialized():
+ return 1
+ return dist.get_world_size()
+
+
+def get_rank():
+ if not is_dist_avail_and_initialized():
+ return 0
+ return dist.get_rank()
+
+
+def is_main_process():
+ return get_rank() == 0
+
+
+def save_on_master(*args, **kwargs):
+ if is_main_process():
+ torch.save(*args, **kwargs)
+
+
+def init_distributed_mode(args):
+ if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+ args.rank = int(os.environ["RANK"])
+ args.world_size = int(os.environ["WORLD_SIZE"])
+ args.gpu = int(os.environ["LOCAL_RANK"])
+ elif "SLURM_PROCID" in os.environ:
+ args.rank = int(os.environ["SLURM_PROCID"])
+ args.gpu = args.rank % torch.npu.device_count()
+ else:
+ print("Not using distributed mode")
+ args.distributed = False
+ return
+ # print("Not using distributed mode")
+ # args.distributed = False
+ # return
+
+ args.distributed = True
+
+ torch.npu.set_device(args.gpu)
+ args.dist_backend = "hccl"
+ print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True)
+ torch.distributed.init_process_group(
+ backend=args.dist_backend,
+ # init_method=args.dist_url,
+ world_size=args.world_size,
+ rank=args.rank
+ )
+ torch.distributed.barrier()
+ setup_for_distributed(args.rank == 0)
--
Gitee
From 54f87a70154fe68bb0668d0c9db9c7d204a04e57 Mon Sep 17 00:00:00 2001
From: Qw1kowa <1741235576@qq.com>
Date: Mon, 4 Jul 2022 19:38:01 +0800
Subject: [PATCH 2/8] =?UTF-8?q?SSDLite320=E9=A6=96=E6=AC=A1-8p=E7=B2=BE?=
=?UTF-8?q?=E5=BA=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../classification/SSDLite320/add_license.py | 50 ++++
.../cv/classification/SSDLite320/coco_eval.py | 33 +++
.../classification/SSDLite320/coco_utils.py | 33 +++
.../cv/classification/SSDLite320/engine.py | 33 +++
.../SSDLite320/extract_ops_by_step.py | 33 +++
.../SSDLite320/group_by_aspect_ratio.py | 33 +++
.../classification/SSDLite320/image_list.py | 33 +++
.../cv/classification/SSDLite320/jenkins.txt | 265 ++++++++++++++++++
.../cv/classification/SSDLite320/presets.py | 33 +++
.../{train_eval_1p.sh => train_eval_8p.sh} | 8 +-
.../torchvision/_internally_replaced_utils.py | 33 +++
.../torchvision/datasets/caltech.py | 33 +++
.../SSDLite320/torchvision/datasets/celeba.py | 33 +++
.../SSDLite320/torchvision/datasets/cifar.py | 33 +++
.../SSDLite320/torchvision/datasets/coco.py | 33 +++
.../torchvision/datasets/fakedata.py | 33 +++
.../SSDLite320/torchvision/datasets/flickr.py | 33 +++
.../SSDLite320/torchvision/datasets/folder.py | 33 +++
.../SSDLite320/torchvision/datasets/hmdb51.py | 33 +++
.../torchvision/datasets/imagenet.py | 33 +++
.../torchvision/datasets/kinetics.py | 33 +++
.../SSDLite320/torchvision/datasets/lsun.py | 33 +++
.../SSDLite320/torchvision/datasets/mnist.py | 33 +++
.../torchvision/datasets/omniglot.py | 33 +++
.../torchvision/datasets/phototour.py | 33 +++
.../datasets/samplers/clip_sampler.py | 33 +++
.../SSDLite320/torchvision/datasets/sbd.py | 33 +++
.../SSDLite320/torchvision/datasets/sbu.py | 33 +++
.../torchvision/datasets/semeion.py | 33 +++
.../SSDLite320/torchvision/datasets/stl10.py | 33 +++
.../SSDLite320/torchvision/datasets/svhn.py | 33 +++
.../SSDLite320/torchvision/datasets/ucf101.py | 33 +++
.../SSDLite320/torchvision/datasets/usps.py | 33 +++
.../SSDLite320/torchvision/datasets/utils.py | 33 +++
.../torchvision/datasets/video_utils.py | 33 +++
.../SSDLite320/torchvision/datasets/vision.py | 33 +++
.../SSDLite320/torchvision/datasets/voc.py | 33 +++
.../SSDLite320/torchvision/extension.py | 33 +++
.../SSDLite320/torchvision/io/_video_opt.py | 33 +++
.../SSDLite320/torchvision/io/video.py | 33 +++
.../SSDLite320/torchvision/models/_utils.py | 33 +++
.../torchvision/models/_utils_origin.py | 33 +++
.../SSDLite320/torchvision/models/alexnet.py | 33 +++
.../SSDLite320/torchvision/models/densenet.py | 33 +++
.../torchvision/models/detection/_utils.py | 33 +++
.../models/detection/_utils_origin.py | 33 +++
.../models/detection/anchor_utils.py | 33 +++
.../models/detection/anchor_utils_origin.py | 33 +++
.../models/detection/backbone_utils.py | 33 +++
.../models/detection/backbone_utils_origin.py | 33 +++
.../models/detection/faster_rcnn.py | 33 +++
.../models/detection/generalized_rcnn.py | 33 +++
.../models/detection/image_list.py | 33 +++
.../models/detection/keypoint_rcnn.py | 33 +++
.../torchvision/models/detection/mask_rcnn.py | 33 +++
.../torchvision/models/detection/roi_heads.py | 33 +++
.../torchvision/models/detection/rpn.py | 33 +++
.../torchvision/models/detection/ssd.py | 33 +++
.../models/detection/ssd_origin.py | 33 +++
.../torchvision/models/detection/ssdlite.py | 33 +++
.../torchvision/models/detection/transform.py | 33 +++
.../models/detection/transform_origin.py | 33 +++
.../torchvision/models/googlenet.py | 33 +++
.../torchvision/models/inception.py | 33 +++
.../SSDLite320/torchvision/models/mnasnet.py | 33 +++
.../torchvision/models/mobilenet.py | 33 +++
.../torchvision/models/mobilenet_origin.py | 33 +++
.../torchvision/models/mobilenetv2.py | 33 +++
.../torchvision/models/mobilenetv3.py | 33 +++
.../models/quantization/googlenet.py | 33 +++
.../models/quantization/inception.py | 33 +++
.../models/quantization/mobilenet.py | 33 +++
.../models/quantization/mobilenetv2.py | 33 +++
.../models/quantization/mobilenetv3.py | 33 +++
.../torchvision/models/quantization/resnet.py | 33 +++
.../models/quantization/shufflenetv2.py | 33 +++
.../torchvision/models/quantization/utils.py | 33 +++
.../SSDLite320/torchvision/models/resnet.py | 33 +++
.../torchvision/models/segmentation/_utils.py | 33 +++
.../models/segmentation/deeplabv3.py | 33 +++
.../torchvision/models/segmentation/fcn.py | 33 +++
.../models/segmentation/segmentation.py | 33 +++
.../torchvision/models/shufflenetv2.py | 33 +++
.../torchvision/models/squeezenet.py | 33 +++
.../SSDLite320/torchvision/models/utils.py | 33 +++
.../SSDLite320/torchvision/models/vgg.py | 33 +++
.../torchvision/models/video/resnet.py | 33 +++
.../torchvision/ops/_register_onnx_ops.py | 33 +++
.../SSDLite320/torchvision/ops/_utils.py | 33 +++
.../torchvision/ops/_utils_origin.py | 33 +++
.../SSDLite320/torchvision/ops/boxes.py | 33 +++
.../SSDLite320/torchvision/ops/deform_conv.py | 33 +++
.../ops/feature_pyramid_network.py | 33 +++
.../SSDLite320/torchvision/ops/misc.py | 33 +++
.../SSDLite320/torchvision/ops/misc_origin.py | 33 +++
.../torchvision/ops/new_empty_tensor.py | 33 +++
.../SSDLite320/torchvision/ops/poolers.py | 33 +++
.../torchvision/ops/ps_roi_align.py | 33 +++
.../SSDLite320/torchvision/ops/ps_roi_pool.py | 33 +++
.../SSDLite320/torchvision/ops/roi_align.py | 33 +++
.../SSDLite320/torchvision/ops/roi_pool.py | 33 +++
.../transforms/_functional_video.py | 33 +++
.../transforms/_transforms_video.py | 33 +++
.../torchvision/transforms/functional.py | 33 +++
.../transforms/functional_origin.py | 33 +++
.../torchvision/transforms/functional_pil.py | 33 +++
.../transforms/functional_tensor.py | 33 +++
.../transforms/functional_tensor_origin.py | 33 +++
.../torchvision/transforms/transforms.py | 33 +++
.../transforms/transforms_origin.py | 33 +++
.../SSDLite320/torchvision/utils.py | 33 +++
.../SSDLite320/torchvision/utils_origin.py | 33 +++
.../SSDLite320/torchvision/version.py | 33 +++
.../cv/classification/SSDLite320/train.py | 33 +++
.../SSDLite320/transform_ssd.py | 33 +++
.../classification/SSDLite320/transforms.py | 33 +++
.../cv/classification/SSDLite320/utils.py | 33 +++
117 files changed, 4081 insertions(+), 4 deletions(-)
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/add_license.py
create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/jenkins.txt
rename PyTorch/contrib/cv/classification/SSDLite320/test/{train_eval_1p.sh => train_eval_8p.sh} (95%)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/add_license.py b/PyTorch/contrib/cv/classification/SSDLite320/add_license.py
new file mode 100644
index 0000000000..f1bdbfc9b3
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/add_license.py
@@ -0,0 +1,50 @@
+# 以下不需要修改
+lic = '''# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+'''
+file_list = []
+with open("jenkins.txt", encoding = "utf-8") as f:
+ for i in f.readlines():
+ if 'README.md' in i:
+ model_name = i.split('/')[4]
+ print('model_name:', model_name)
+
+ if 'license no exists' in i:
+ file_list.append(i.split(',')[0].split(model_name + '/')[1])
+
+for file in file_list:
+ print(file)
+ with open(file, 'r+') as filename:
+ filetext = filename.read()
+ filename.seek(0, 0)
+ filename.write(lic + '\n' + filetext)
\ No newline at end of file
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py b/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py
index ec0709c5d9..54f686e269 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import copy
import io
from contextlib import redirect_stdout
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py
index a656602865..01eb6d4a8a 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import copy
import os
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/engine.py b/PyTorch/contrib/cv/classification/SSDLite320/engine.py
index 33f4897eb5..0944340903 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/engine.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/engine.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import math
import sys
import time
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py b/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py
index 6a499a143f..c318666467 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import re
from collections import defaultdict
import argparse
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py b/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py
index 1323849a6a..f858b994ce 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import bisect
import copy
import math
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/image_list.py b/PyTorch/contrib/cv/classification/SSDLite320/image_list.py
index 583866557e..960aefd75d 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/image_list.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/image_list.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from typing import List, Tuple
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/jenkins.txt b/PyTorch/contrib/cv/classification/SSDLite320/jenkins.txt
new file mode 100644
index 0000000000..9de6f1b1d4
--- /dev/null
+++ b/PyTorch/contrib/cv/classification/SSDLite320/jenkins.txt
@@ -0,0 +1,265 @@
+####################################################################
+# Start Modelzoo Network Test....
+####################################################################
+=================Modified files in this PR: =================
+PyTorch/contrib/cv/classification/SSDLite320/README.md
+PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py
+PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py
+PyTorch/contrib/cv/classification/SSDLite320/engine.py
+PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py
+PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py
+PyTorch/contrib/cv/classification/SSDLite320/image_list.py
+PyTorch/contrib/cv/classification/SSDLite320/presets.py
+PyTorch/contrib/cv/classification/SSDLite320/requirements.txt
+PyTorch/contrib/cv/classification/SSDLite320/test/env_npu.sh
+PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh
+PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh
+PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh
+PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh
+PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/__init__.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/__init__.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cityscapes.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/__init__.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/__init__.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/__init__.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/__init__.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/__init__.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/__init__.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/__init__.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/__init__.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/__init__.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py
+PyTorch/contrib/cv/classification/SSDLite320/train.py
+PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py
+PyTorch/contrib/cv/classification/SSDLite320/transforms.py
+PyTorch/contrib/cv/classification/SSDLite320/utils.py
+=================Start to Check Type of File =================
+=================Start to Check License =================
+PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/engine.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/image_list.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/presets.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/train.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/transforms.py,The keyword license no exists in the file,please check and add it!
+PyTorch/contrib/cv/classification/SSDLite320/utils.py,The keyword license no exists in the file,please check and add it!
+License check failed, Please follow the guide to add License:
+https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md
+=================Start to Check Size of File =================
+=================Start to Check Junk file =================
+=================Start to Check file of First Directory =================
+=================Start to Check Internal Link =================
+=================Start to Check Sensitive Information =================
+=================Start to Check Modelzoo Level =================
+=================Start to Check File&Keywords of Test Directory =================
+=================Start to Check core_binding&Device Id status =================
+check fail
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/presets.py b/PyTorch/contrib/cv/classification/SSDLite320/presets.py
index 88d8c697d2..b15ae0069f 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/presets.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/presets.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
import transforms as T
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh
similarity index 95%
rename from PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh
rename to PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh
index 159f319279..03ad5ec6ce 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh
@@ -1,7 +1,7 @@
################基础配置参数,需要模型审视修改##################
Network="SSDLite320"
# 训练使用的npu卡数
-export RANK_SIZE=1
+export RANK_SIZE=8
# 数据集路径,保持为空,不需要修改
data_path="/opt/npu/dataset/coco/"
batch_size=24
@@ -56,7 +56,7 @@ python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.p
--epochs 600\
--aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\
--lr 0.15 --batch-size 24 --test-only\
- --weight-decay 0.00004 --data-augmentation ssdlite > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_1p.log 2>&1 &
+ --weight-decay 0.00004 --data-augmentation ssdlite > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log 2>&1 &
wait
@@ -68,13 +68,13 @@ e2e_time=$(( $end_time - $start_time ))
# 结果打印,不需要修改
echo "------------------ Final result ------------------"
# 输出性能FPS,需要模型审视修改
-step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'
+step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'
FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'`
# 打印,不需要修改
echo "Final Performance images/sec : $FPS"
# 输出训练精度,需要模型审视修改
-train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'
+train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'
# 打印,不需要修改
echo "Final Train Accuracy : ${train_accuracy}"
echo "E2E Training Duration sec : $e2e_time"
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py
index d147997b0b..20bc303762 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import importlib.machinery
import os
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py
index 09ec1c3d7f..f1970eabe6 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from PIL import Image
import os
import os.path
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py
index 71af65ed11..10c91ebf3a 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from functools import partial
import torch
import os
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py
index 127c085cfb..033ecd038c 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from PIL import Image
import os
import os.path
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py
index 9dd3c7adf8..2417b970d2 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from .vision import VisionDataset
from PIL import Image
import os
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py
index f079c1a92d..3dfa957ac4 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
from .vision import VisionDataset
from .. import transforms
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py
index 77cd430705..00f98166f3 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from collections import defaultdict
from PIL import Image
from html.parser import HTMLParser
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py
index 16d092b716..02c87cfc84 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from .vision import VisionDataset
from PIL import Image
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py
index 3b826bfa9a..044bc8be9e 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import glob
import os
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py
index a45ff3cd44..2f72916c55 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import warnings
from contextlib import contextmanager
import os
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py
index 07db91cc19..75253b3818 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from .utils import list_dir
from .folder import make_dataset
from .video_utils import VideoClips
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py
index fc67f8f024..33abf6af51 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from .vision import VisionDataset
from PIL import Image
import os
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py
index 74bc0c16aa..931d50bfe8 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from .vision import VisionDataset
import warnings
from PIL import Image
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py
index dd86128488..4849e8bc6f 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from PIL import Image
from os.path import join
import os
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py
index 47591e3db8..13ab9645d1 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import os
import numpy as np
from PIL import Image
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py
index 2432a6d20d..71cfcd34dc 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import math
import torch
from torch.utils.data import Sampler
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py
index c4713f7257..60a3c1f801 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import os
import shutil
from .vision import VisionDataset
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py
index 70cb68344b..5e70cfaa08 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from PIL import Image
from .utils import download_url, check_integrity
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py
index 12c92c4a35..096a32c021 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from PIL import Image
import os
import os.path
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py
index 6bec45afe2..c0e224ab6b 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from PIL import Image
import os
import os.path
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py
index d96d0f3f43..2e702323dc 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from .vision import VisionDataset
from PIL import Image
import os
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py
index 43d8124bd4..646c2bba77 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import glob
import os
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py
index 06f1fd0596..f71278b9c1 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from PIL import Image
import os
import numpy as np
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py
index 6689eef649..99ac7f6d93 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import os
import os.path
import hashlib
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py
index 5c9244e545..7351b8fd5c 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import bisect
import math
from fractions import Fraction
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py
index 7ee5a84dfc..654cc594cb 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import os
import torch
import torch.utils.data as data
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py
index 2be53c4fcc..8eca53904e 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import os
import tarfile
import collections
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py
index db3356aa67..35f844dae4 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
_HAS_OPS = False
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py
index da37c66cfa..b0879e66cc 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import importlib
import math
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py
index 40d1cfeed8..eb6f76a895 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import gc
import math
import re
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py
index f4e1cd8450..36b321d7e4 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from collections import OrderedDict
from typing import Dict, Optional
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py
index 291041d7b5..d4f91ad495 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from collections import OrderedDict
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py
index a0126312d1..4ecc35fd9e 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
import torch.nn as nn
from .utils import load_state_dict_from_url
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py
index 822dde0925..efb484b239 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import re
import torch
import torch.nn as nn
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py
index ce70d93be0..464944d009 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import math
from collections import OrderedDict
from typing import List, Tuple
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py
index c48576328d..eaf9df1144 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import math
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py
index 1d6298eabe..52df4ec852 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import math
from typing import List, Optional
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py
index bac7cb6c74..63323b23e7 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import math
from typing import List, Optional
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py
index 54fdc4c05c..9c59d10756 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import warnings
from typing import Callable, Dict, Optional, List, Union
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py
index f5335c451d..483804669b 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from collections import OrderedDict
from torch import nn
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py
index 92366352b9..74fa7708f0 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from collections import OrderedDict
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py
index 50a25fb4f9..7d570e0903 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
"""
Implements the Generalized R-CNN framework
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py
index 583866557e..960aefd75d 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from typing import List, Tuple
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py
index aeee558ca2..469ff0ca89 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
from torch import nn
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py
index a8a980fa3c..b188fceb56 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from collections import OrderedDict
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py
index fd1334fbc2..678d2be236 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
import torchvision
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py
index 381bc77084..dd1ae5bbdb 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import torch
from torch.nn import functional as F
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py
index 91a5ae5cda..47d102ced3 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import warnings
from collections import OrderedDict
from typing import Any, Dict, List, Optional, Tuple
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py
index be30bb54c4..69ced9e7dd 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import warnings
from collections import OrderedDict
from typing import Any, Dict, List, Optional, Tuple
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py
index 652d3afe4d..76baeced45 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import warnings
from collections import OrderedDict
from functools import partial
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py
index af9d13414d..9922ebd87e 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import math
from typing import List, Tuple, Dict, Optional
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py
index ffbe2279b7..d345738091 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import random
import math
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py
index 4b1cb28003..77233d0ffd 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import warnings
from collections import namedtuple
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py
index e4c5430c31..0e739bab41 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from collections import namedtuple
import warnings
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py
index 59677427f1..70635ec63d 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import math
import warnings
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py
index 4108305d3f..a272fdc740 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from .mobilenetv2 import MobileNetV2, mobilenet_v2, __all__ as mv2_all
from .mobilenetv3 import MobileNetV3, mobilenet_v3_large, mobilenet_v3_small, __all__ as mv3_all
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py
index e4c3069a60..e33cc5dd29 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from torch import nn
from .utils import load_state_dict_from_url
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py
index 1a470953df..90023f33e2 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import warnings
from typing import Callable, Any, Optional, List
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py
index e6a2bbbfbe..fa94b3256e 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import warnings
from functools import partial
from typing import Any, Callable, List, Optional, Sequence
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py
index d01534bc70..22d9065d54 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import warnings
import torch
import torch.nn as nn
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py
index f452de0281..76409c0d64 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import warnings
from collections import namedtuple
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py
index 8f2c42db64..14ac30395d 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from .mobilenetv2 import QuantizableMobileNetV2, mobilenet_v2, __all__ as mv2_all
from .mobilenetv3 import QuantizableMobileNetV3, mobilenet_v3_large, __all__ as mv3_all
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py
index faa63e73be..6d7e33c366 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from typing import Any
from torch import Tensor
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py
index 948b72ead7..91c3232232 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from typing import Any, List, Optional
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py
index 5fd3c03929..0ced2fc858 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
from torchvision.models.resnet import Bottleneck, BasicBlock, ResNet, model_urls
import torch.nn as nn
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py
index a2030ca5ec..58e3cfd915 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
import torch.nn as nn
from torchvision.models.utils import load_state_dict_from_url
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py
index bf23c9a933..df49a023c3 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
from torch import nn
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py
index 797f459f5c..ff355c68df 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
import torch.nn as nn
from .utils import load_state_dict_from_url
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py
index c5a7ae99e4..9d79016c8d 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from collections import OrderedDict
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py
index ae652cd7d2..8721897082 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
from torch import nn
from torch.nn import functional as F
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py
index 4d7701cc4e..d55085c26a 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from torch import nn
from ._utils import _SimpleSegmentationModel
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py
index 15df4d8ae3..49a5a21900 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from .._utils import IntermediateLayerGetter
from ..utils import load_state_dict_from_url
from .. import resnet
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py
index 14f9521886..31091b58bf 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
import torch.nn as nn
from .utils import load_state_dict_from_url
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py
index 964f3ec66d..1ce19a5334 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
import torch.nn as nn
import torch.nn.init as init
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py
index 638ef07cd8..8452307d73 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
try:
from torch.hub import load_state_dict_from_url
except ImportError:
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py
index dba534f651..ac4b6f7ec8 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
import torch.nn as nn
from .utils import load_state_dict_from_url
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py
index a9e59a149c..ed3e394379 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
import torch.nn as nn
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py
index d9d9c5c094..a3a0cf4d29 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import sys
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py
index 3a07c747f5..672f2aab4a 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from typing import List, Optional, Tuple, Union
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py
index 714022f042..fcc0613017 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
from torch import Tensor
from torch.jit.annotations import List
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py
index ac0dba1fe7..0239781515 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
from torch.jit.annotations import Tuple
from torch import Tensor
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py
index c948b16419..56995bc2ba 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import math
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py
index 09e79cc7ef..9ab0db7994 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from collections import OrderedDict
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py
index caf0d999f7..b35a940fb4 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
"""
helper class that supports empty tensors on some nn functions.
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py
index 65e150700a..ae80603532 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from collections import OrderedDict
from torch.jit.annotations import Optional, List
from torch import Tensor
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py
index 74455a98c4..01a0e15300 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
from torch.jit.annotations import List
from torch import Tensor
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py
index b94a9eb405..2d529d8b72 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import torch
import torch.nn.functional as F
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py
index c0c761b72c..03e70a3bbc 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
from torch import nn, Tensor
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py
index 710f2cb019..dbdda1ceb8 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
from torch import nn, Tensor
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py
index 14224d8a83..bd126efac9 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
from torch import nn, Tensor
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py
index 10232f16b4..9c53abf2d8 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
from torch import nn, Tensor
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py
index 06c3071690..84a9babb57 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py
index aa1a4b0531..f43db9a696 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
#!/usr/bin/env python3
import numbers
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py
index bd5b170626..d866860a90 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import math
import numbers
import warnings
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py
index 7ce1fb6ab3..8b2728c85a 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
import math
from PIL import Image, ImageOps, ImageEnhance, __version__ as PILLOW_VERSION
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py
index fdaf5f7de1..92c396d174 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import numbers
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py
index 09ae726931..af0d34038d 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import warnings
from typing import Optional, Tuple, List
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py
index b81deed6d4..8672ebe10b 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
import torchvision.transforms.functional as F
from torch import Tensor
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py
index a409ff3cbb..2450e32509 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import math
import numbers
import random
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py
index 10783c8e53..0d90426edd 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
import math
import random
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py
index 399dc3fcc5..f11b6b7459 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import math
import pathlib
import warnings
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py
index 1a773b3fd2..e8edd62e3a 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import torch
import math
irange = range
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py
index 146fc171ca..2a0e78d5c9 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
__version__ = '0.6.0'
git_version = '82fd1c85d7e42d93255ed01f763ca40d58f288e3'
from torchvision.extension import _check_cuda_version
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/train.py b/PyTorch/contrib/cv/classification/SSDLite320/train.py
index ba2de2a72d..34b1b5c74b 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/train.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/train.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
r"""PyTorch Detection Training.
To run in a multi-gpu environment, use the distributed launcher::
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py b/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py
index f0683a1a37..0091d84ab5 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import math
from typing import List, Tuple, Dict, Optional
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/transforms.py b/PyTorch/contrib/cv/classification/SSDLite320/transforms.py
index 9415efb893..2cb72322d8 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/transforms.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/transforms.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
from typing import List, Tuple, Dict, Optional
import torch
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/utils.py
index 00bb4bbce1..0b32356460 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/utils.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/utils.py
@@ -1,3 +1,36 @@
+# -*- coding: utf-8 -*-
+# BSD 3-Clause License
+#
+# Copyright (c) 2017
+# All rights reserved.
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ==========================================================================
+
import datetime
import errno
import os
--
Gitee
From 2f51c6bc3aa0347c1c9df6481589d6226a67613b Mon Sep 17 00:00:00 2001
From: Qw1kowa <1741235576@qq.com>
Date: Mon, 4 Jul 2022 19:41:31 +0800
Subject: [PATCH 3/8] =?UTF-8?q?SSDLite320=E9=A6=96=E6=AC=A1=E6=8F=90?=
=?UTF-8?q?=E4=BA=A4&=E5=AE=8C=E5=96=848p=E7=B2=BE=E5=BA=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../contrib/cv/classification/SSDLite320/README.md | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/README.md b/PyTorch/contrib/cv/classification/SSDLite320/README.md
index b53891e1c8..b68150270b 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/README.md
+++ b/PyTorch/contrib/cv/classification/SSDLite320/README.md
@@ -35,7 +35,7 @@ bash test/train_full_8p.sh --data_path=/opt/npu/dataset/coco/
# 1p train full
bash test/train_full_1p.sh --data_path=/opt/npu/dataset/coco/
-# 1p eval
+# 8p eval
bash test/train_eval_8p.sh --data_path=/opt/npu/dataset/coco/ --model_path=/eval_model.pth
```
@@ -43,19 +43,19 @@ bash test/train_eval_8p.sh --data_path=/opt/npu/dataset/coco/ --model_path=/eval
Log Path:
-- train_perf_1p.log # 1p 训练下性能测试日志
-- train_perf_8p.log # 8p 训练下性能测试日志
+- train_perf_1p.log # 1p 训练下性能测试日志
+- train_perf_8p.log # 8p 训练下性能测试日志
- train_full_1p.log # 1p 完整训练下性能和精度测试日志
- train_full_8p.log # 8p 完整训练下性能和精度测试日志
-- train_eval_1p.log # 1p 测试模型验证集精度日志
+- train_eval_1p.log # 8p 测试模型验证集精度日志
## SSDlite 训练结果
| top1 acc | FPS | Epochs | AMP_Type | Device |
| :------: | :---: | :----: | :------: | :----: |
| - | 10.8 | 1 | O1 | 1p Npu |
-| ? | 100.8 | 660 | O1 | 8p Npu |
-| - | 54.7 | 1 | - | 1p Gpu |
-| 20.4 | 387.2 | 660 | - | 8p Gpu |
+| 20.5 | 100.8 | 600 | O1 | 8p Npu |
+| - | 54.7 | 1 | O1 | 1p Gpu |
+| 20.4 | 387.2 | 600 | O1 | 8p Gpu |
注:源仓库模型测试为 21.3 (660 epochs)
\ No newline at end of file
--
Gitee
From a7b76024f2e791cc44c0887ea2ef3a0eb7a1fd0c Mon Sep 17 00:00:00 2001
From: qingy735 <2429841116@qq.com>
Date: Tue, 9 Aug 2022 20:58:47 +0800
Subject: [PATCH 4/8] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=B8=AA=E5=88=AB?=
=?UTF-8?q?=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../cv/classification/SSDLite320/test/train_eval_8p.sh | 4 ++--
.../cv/classification/SSDLite320/test/train_full_1p.sh | 4 ++--
.../cv/classification/SSDLite320/test/train_full_8p.sh | 4 ++--
.../classification/SSDLite320/test/train_performance_1p.sh | 4 ++--
.../classification/SSDLite320/test/train_performance_8p.sh | 4 ++--
PyTorch/contrib/cv/classification/SSDLite320/train.py | 5 +++--
6 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh
index 03ad5ec6ce..b966ae9e24 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh
@@ -68,13 +68,13 @@ e2e_time=$(( $end_time - $start_time ))
# 结果打印,不需要修改
echo "------------------ Final result ------------------"
# 输出性能FPS,需要模型审视修改
-step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'
+step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'`
FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'`
# 打印,不需要修改
echo "Final Performance images/sec : $FPS"
# 输出训练精度,需要模型审视修改
-train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'
+train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'`
# 打印,不需要修改
echo "Final Train Accuracy : ${train_accuracy}"
echo "E2E Training Duration sec : $e2e_time"
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh
index 7dafc83788..c2605ca73c 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh
@@ -66,13 +66,13 @@ e2e_time=$(( $end_time - $start_time ))
# 结果打印,不需要修改
echo "------------------ Final result ------------------"
# 输出性能FPS,需要模型审视修改
-step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'
+step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'`
FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'`
# 打印,不需要修改
echo "Final Performance images/sec : $FPS"
# 输出训练精度,需要模型审视修改
-train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'
+train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'`
# 打印,不需要修改
echo "Final Train Accuracy : ${train_accuracy}"
echo "E2E Training Duration sec : $e2e_time"
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh
index 2dc85c74a4..c421a3cc63 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh
@@ -66,13 +66,13 @@ e2e_time=$(( $end_time - $start_time ))
# 结果打印,不需要修改
echo "------------------ Final result ------------------"
# 输出性能FPS,需要模型审视修改
-step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'
+step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'`
FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*8/'${step_time}'}'`
# 打印,不需要修改
echo "Final Performance images/sec : $FPS"
# 输出训练精度,需要模型审视修改
-train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'
+train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'`
# 打印,不需要修改
echo "Final Train Accuracy : ${train_accuracy}"
echo "E2E Training Duration sec : $e2e_time"
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh
index 35d9a5c924..44612007f4 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh
@@ -66,13 +66,13 @@ e2e_time=$(( $end_time - $start_time ))
# 结果打印,不需要修改
echo "------------------ Final result ------------------"
# 输出性能FPS,需要模型审视修改
-step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'
+step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'`
FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'`
# 打印,不需要修改
echo "Final Performance images/sec : $FPS"
# 输出训练精度,需要模型审视修改
-train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'
+train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'`
# 打印,不需要修改
echo "Final Train Accuracy : ${train_accuracy}"
echo "E2E Training Duration sec : $e2e_time"
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh
index 90656c3937..0b495404ae 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh
@@ -66,13 +66,13 @@ e2e_time=$(( $end_time - $start_time ))
# 结果打印,不需要修改
echo "------------------ Final result ------------------"
# 输出性能FPS,需要模型审视修改
-step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'
+step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'`
FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'`
# 打印,不需要修改
echo "Final Performance images/sec : $FPS"
# 输出训练精度,需要模型审视修改
-train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'
+train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'`
# 打印,不需要修改
echo "Final Train Accuracy : ${train_accuracy}"
echo "E2E Training Duration sec : $e2e_time"
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/train.py b/PyTorch/contrib/cv/classification/SSDLite320/train.py
index 34b1b5c74b..51d90e1407 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/train.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/train.py
@@ -155,11 +155,12 @@ def get_args_parser(add_help=True):
parser.add_argument(
"--lr-gamma", default=0.1, type=float, help="decrease lr by a factor of lr-gamma (multisteplr scheduler only)"
)
- parser.add_argument("--print-freq", default=1, type=int, help="print frequency")
+ parser.add_argument("--print-freq", default=20, type=int, help="print frequency")
parser.add_argument("--output-dir", default="./multigpu", type=str, help="path to save outputs")
# parser.add_argument("--resume", default="./ssdlite320_mobilenet_v3_large_coco-a79551df.pth",
# type=str, help="path of checkpoint")#model_128.pth
- parser.add_argument("--resume", default="./multigpu/model_599.pth", type=str, help="path of checkpoint")
+ parser.add_argument("--resume", default="", type=str, help="path of checkpoint")
+ # ./multigpu/model_599.pth
parser.add_argument("--start_epoch", default=0, type=int, help="start epoch")
parser.add_argument("--aspect-ratio-group-factor", default=3, type=int)
--
Gitee
From 0c6c3d8ec5641576b84c71458a568080d72253c2 Mon Sep 17 00:00:00 2001
From: qingy735 <2429841116@qq.com>
Date: Wed, 10 Aug 2022 00:47:08 +0800
Subject: [PATCH 5/8] =?UTF-8?q?SSDLite320=20=E5=BE=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
PyTorch/contrib/cv/classification/SSDLite320/train.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/train.py b/PyTorch/contrib/cv/classification/SSDLite320/train.py
index 51d90e1407..7fc71d633e 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/train.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/train.py
@@ -284,7 +284,7 @@ def main(args):
print("Start training",'args.distributed:',args.distributed)
start_time = time.time()
- for epoch in range(0, 1):
+ for epoch in range(args.start_epoch, args.epochs):
if args.distributed:
train_sampler.set_epoch(epoch)
--
Gitee
From c02cbc0db17d44761032553659eb8887bfc22cb8 Mon Sep 17 00:00:00 2001
From: qingy735 <2429841116@qq.com>
Date: Wed, 10 Aug 2022 13:43:53 +0800
Subject: [PATCH 6/8] =?UTF-8?q?SSDLite320=20=E5=BE=AE=E8=B0=83=E8=AE=AD?=
=?UTF-8?q?=E7=BB=83=E8=84=9A=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../SSDLite320/test/train_performance_1p.sh | 2 +-
.../SSDLite320/test/train_performance_8p.sh | 2 +-
.../cv/classification/SSDLite320/train.py | 64 +++++++++++++------
3 files changed, 47 insertions(+), 21 deletions(-)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh
index 44612007f4..4e473dbc4f 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh
@@ -53,7 +53,7 @@ python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.p
--model ssdlite320_mobilenet_v3_large\
--epochs 2\
--aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\
- --lr 0.15 --batch-size 24\
+ --lr 0.15 --batch-size 24 --train_only\
--weight-decay 0.00004 --data-augmentation ssdlite > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log 2>&1 &
wait
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh
index 0b495404ae..03758c34ac 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh
@@ -53,7 +53,7 @@ python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.p
--model ssdlite320_mobilenet_v3_large\
--epochs 2\
--aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\
- --lr 0.15 --batch-size 24\
+ --lr 0.15 --batch-size 24 --train_only\
--weight-decay 0.00004 --data-augmentation ssdlite --world-size 8 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log 2>&1 &
wait
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/train.py b/PyTorch/contrib/cv/classification/SSDLite320/train.py
index 7fc71d633e..e4e3294820 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/train.py
+++ b/PyTorch/contrib/cv/classification/SSDLite320/train.py
@@ -183,6 +183,12 @@ def get_args_parser(add_help=True):
help="Only test the model",
action="store_true",
)
+ parser.add_argument(
+ "--train-only",
+ dest="train_only",
+ help="Only train the model",
+ action="store_true",
+ )
parser.add_argument(
"--pretrained",
dest="pretrained",
@@ -284,26 +290,46 @@ def main(args):
print("Start training",'args.distributed:',args.distributed)
start_time = time.time()
- for epoch in range(args.start_epoch, args.epochs):
- if args.distributed:
- train_sampler.set_epoch(epoch)
+
+ if args.train_only:
+ for epoch in range(0, 1):
+ if args.distributed:
+ train_sampler.set_epoch(epoch)
+
+ train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
+ lr_scheduler.step()
+ if args.output_dir:
+ checkpoint = {
+ "model": model_without_ddp.state_dict(),
+ "optimizer": optimizer.state_dict(),
+ "lr_scheduler": lr_scheduler.state_dict(),
+ "args": args,
+ "epoch": epoch,
+ 'amp': amp.state_dict()
+ }
+ utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth"))
+ utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth"))
+ else:
+ for epoch in range(args.start_epoch, args.epochs):
+ if args.distributed:
+ train_sampler.set_epoch(epoch)
- train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
- lr_scheduler.step()
- if args.output_dir:
- checkpoint = {
- "model": model_without_ddp.state_dict(),
- "optimizer": optimizer.state_dict(),
- "lr_scheduler": lr_scheduler.state_dict(),
- "args": args,
- "epoch": epoch,
- 'amp': amp.state_dict()
- }
- utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth"))
- utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth"))
-
- # evaluate after every epoch
- evaluate(model, data_loader_test, device=device)
+ train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq)
+ lr_scheduler.step()
+ if args.output_dir:
+ checkpoint = {
+ "model": model_without_ddp.state_dict(),
+ "optimizer": optimizer.state_dict(),
+ "lr_scheduler": lr_scheduler.state_dict(),
+ "args": args,
+ "epoch": epoch,
+ 'amp': amp.state_dict()
+ }
+ utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth"))
+ utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth"))
+
+ # evaluate after every epoch
+ evaluate(model, data_loader_test, device=device)
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
--
Gitee
From f08199bcb95ef5962d0f739a1baf6f1cf3b43be8 Mon Sep 17 00:00:00 2001
From: qingy735 <2429841116@qq.com>
Date: Fri, 19 Aug 2022 23:52:29 +0800
Subject: [PATCH 7/8] =?UTF-8?q?=E4=BF=AE=E6=94=B98pFPS=E8=AE=A1=E7=AE=97?=
=?UTF-8?q?=E8=84=9A=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../contrib/cv/classification/SSDLite320/test/train_eval_8p.sh | 2 +-
.../cv/classification/SSDLite320/test/train_performance_8p.sh | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh
index b966ae9e24..55618eae01 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh
@@ -69,7 +69,7 @@ e2e_time=$(( $end_time - $start_time ))
echo "------------------ Final result ------------------"
# 输出性能FPS,需要模型审视修改
step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'`
-FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'`
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*8/'${step_time}'}'`
# 打印,不需要修改
echo "Final Performance images/sec : $FPS"
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh
index 03758c34ac..18bacd4a80 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh
+++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh
@@ -67,7 +67,7 @@ e2e_time=$(( $end_time - $start_time ))
echo "------------------ Final result ------------------"
# 输出性能FPS,需要模型审视修改
step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'`
-FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'`
+FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*8/'${step_time}'}'`
# 打印,不需要修改
echo "Final Performance images/sec : $FPS"
--
Gitee
From 82b7a727e5e9c247dd36605c873f0e9832bb61f4 Mon Sep 17 00:00:00 2001
From: qingy735 <2429841116@qq.com>
Date: Mon, 26 Dec 2022 11:37:59 +0000
Subject: [PATCH 8/8] update
PyTorch/contrib/cv/classification/SSDLite320/README.md.
Signed-off-by: qingy735 <2429841116@qq.com>
---
.../cv/classification/SSDLite320/README.md | 47 +++++++++++++++++--
1 file changed, 43 insertions(+), 4 deletions(-)
diff --git a/PyTorch/contrib/cv/classification/SSDLite320/README.md b/PyTorch/contrib/cv/classification/SSDLite320/README.md
index b68150270b..8632248e19 100644
--- a/PyTorch/contrib/cv/classification/SSDLite320/README.md
+++ b/PyTorch/contrib/cv/classification/SSDLite320/README.md
@@ -1,8 +1,29 @@
# SSDLite320
-SSDlite320 模型在 COCO2017 数据集上的实现,主要修改自 [pytorch/vision/tree/main/references/detection]([github.com](https://github.com/pytorch/vision/tree/main/references/detection)) 源码
+# 概述
-## 环境准备
+## 简述
+SSDLite320
+
+- 参考实现:
+
+ ```
+ url=https://github.com/pytorch/vision/tree/main/references/detection
+ ```
+
+- 通过Git获取代码方法如下:
+
+ ```
+ git clone {url} # 克隆仓库的代码
+ cd {code_path} # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换
+ ```
+
+- 通过单击“立即下载”,下载源码包。
+
+
+# 准备训练环境
+
+## 准备环境
- 安装 Pytorch 和混合精度训练工具 Apex
- 安装依赖 `pip install -r requirements.txt`
@@ -18,7 +39,13 @@ cp -rf torchvision ~/archiconda3/envs/xxx/lib/python3.7/site-packages/torchvisio
注:`~/archiconda3/envs/xxx/lib/python3.7/site-packages`为`xxx`环境下模块下载地址
-## 训练
+## 准备数据集
+该训练采用COCO2017数据集进行训练
+
+
+# 开始训练
+
+## 训练模型
训练阶段,脚本调用 `train.py` 进行训练
@@ -49,6 +76,7 @@ Log Path:
- train_full_8p.log # 8p 完整训练下性能和精度测试日志
- train_eval_1p.log # 8p 测试模型验证集精度日志
+# 训练结果展示
## SSDlite 训练结果
| top1 acc | FPS | Epochs | AMP_Type | Device |
@@ -58,4 +86,15 @@ Log Path:
| - | 54.7 | 1 | O1 | 1p Gpu |
| 20.4 | 387.2 | 600 | O1 | 8p Gpu |
-注:源仓库模型测试为 21.3 (660 epochs)
\ No newline at end of file
+注:源仓库模型测试为 21.3 (660 epochs)
+
+# 版本说明
+
+## 变更
+
+2022.08.19:首次发布
+2011.12.26:更改模板
+
+## 已知问题
+
+无。
\ No newline at end of file
--
Gitee