From 5add2f3f44199b96fa98b74d9c91924730daafa4 Mon Sep 17 00:00:00 2001 From: Qw1kowa <1741235576@qq.com> Date: Thu, 30 Jun 2022 21:39:56 +0800 Subject: [PATCH 1/8] =?UTF-8?q?SSDLite320=E9=A6=96=E6=AC=A1=E6=8F=90?= =?UTF-8?q?=E4=BA=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cv/classification/SSDLite320/README.md | 61 + .../cv/classification/SSDLite320/coco_eval.py | 191 ++ .../classification/SSDLite320/coco_utils.py | 249 ++ .../cv/classification/SSDLite320/engine.py | 214 ++ .../SSDLite320/extract_ops_by_step.py | 60 + .../SSDLite320/group_by_aspect_ratio.py | 196 ++ .../classification/SSDLite320/image_list.py | 25 + .../cv/classification/SSDLite320/presets.py | 47 + .../SSDLite320/requirements.txt | 5 + .../classification/SSDLite320/test/env_npu.sh | 75 + .../SSDLite320/test/train_eval_1p.sh | 103 + .../SSDLite320/test/train_full_1p.sh | 101 + .../SSDLite320/test/train_full_8p.sh | 101 + .../SSDLite320/test/train_performance_1p.sh | 101 + .../SSDLite320/test/train_performance_8p.sh | 101 + .../SSDLite320/torchvision/__init__.py | 74 + .../torchvision/_internally_replaced_utils.py | 52 + .../torchvision/datasets/__init__.py | 34 + .../torchvision/datasets/caltech.py | 206 ++ .../SSDLite320/torchvision/datasets/celeba.py | 158 ++ .../SSDLite320/torchvision/datasets/cifar.py | 162 ++ .../torchvision/datasets/cityscapes.py | 207 ++ .../SSDLite320/torchvision/datasets/coco.py | 123 + .../torchvision/datasets/fakedata.py | 58 + .../SSDLite320/torchvision/datasets/flickr.py | 154 ++ .../SSDLite320/torchvision/datasets/folder.py | 207 ++ .../SSDLite320/torchvision/datasets/hmdb51.py | 130 ++ .../torchvision/datasets/imagenet.py | 218 ++ .../torchvision/datasets/kinetics.py | 79 + .../SSDLite320/torchvision/datasets/lsun.py | 152 ++ .../SSDLite320/torchvision/datasets/mnist.py | 485 ++++ .../torchvision/datasets/omniglot.py | 91 + .../torchvision/datasets/phototour.py | 209 ++ .../torchvision/datasets/samplers/__init__.py | 3 + .../datasets/samplers/clip_sampler.py | 174 ++ .../SSDLite320/torchvision/datasets/sbd.py | 124 + .../SSDLite320/torchvision/datasets/sbu.py | 107 + .../torchvision/datasets/semeion.py | 84 + .../SSDLite320/torchvision/datasets/stl10.py | 176 ++ .../SSDLite320/torchvision/datasets/svhn.py | 114 + .../SSDLite320/torchvision/datasets/ucf101.py | 107 + .../SSDLite320/torchvision/datasets/usps.py | 84 + .../SSDLite320/torchvision/datasets/utils.py | 282 +++ .../torchvision/datasets/video_utils.py | 367 +++ .../SSDLite320/torchvision/datasets/vision.py | 80 + .../SSDLite320/torchvision/datasets/voc.py | 242 ++ .../SSDLite320/torchvision/extension.py | 58 + .../SSDLite320/torchvision/io/__init__.py | 34 + .../SSDLite320/torchvision/io/_video_opt.py | 551 +++++ .../SSDLite320/torchvision/io/video.py | 349 +++ .../SSDLite320/torchvision/models/__init__.py | 14 + .../SSDLite320/torchvision/models/_utils.py | 83 + .../torchvision/models/_utils_origin.py | 67 + .../SSDLite320/torchvision/models/alexnet.py | 65 + .../SSDLite320/torchvision/models/densenet.py | 279 +++ .../torchvision/models/detection/__init__.py | 5 + .../torchvision/models/detection/_utils.py | 406 ++++ .../models/detection/_utils_origin.py | 348 +++ .../models/detection/anchor_utils.py | 279 +++ .../models/detection/anchor_utils_origin.py | 270 +++ .../models/detection/backbone_utils.py | 212 ++ .../models/detection/backbone_utils_origin.py | 63 + .../models/detection/faster_rcnn.py | 355 +++ .../models/detection/generalized_rcnn.py | 84 + .../models/detection/image_list.py | 25 + .../models/detection/keypoint_rcnn.py | 330 +++ .../torchvision/models/detection/mask_rcnn.py | 323 +++ .../torchvision/models/detection/roi_heads.py | 870 +++++++ .../torchvision/models/detection/rpn.py | 501 ++++ .../torchvision/models/detection/ssd.py | 638 ++++++ .../models/detection/ssd_origin.py | 629 +++++ .../torchvision/models/detection/ssdlite.py | 274 +++ .../torchvision/models/detection/transform.py | 302 +++ .../models/detection/transform_origin.py | 226 ++ .../torchvision/models/googlenet.py | 290 +++ .../torchvision/models/inception.py | 432 ++++ .../SSDLite320/torchvision/models/mnasnet.py | 258 +++ .../torchvision/models/mobilenet.py | 4 + .../torchvision/models/mobilenet_origin.py | 177 ++ .../torchvision/models/mobilenetv2.py | 211 ++ .../torchvision/models/mobilenetv3.py | 333 +++ .../models/quantization/__init__.py | 5 + .../models/quantization/googlenet.py | 166 ++ .../models/quantization/inception.py | 222 ++ .../models/quantization/mobilenet.py | 4 + .../models/quantization/mobilenetv2.py | 102 + .../models/quantization/mobilenetv3.py | 171 ++ .../torchvision/models/quantization/resnet.py | 174 ++ .../models/quantization/shufflenetv2.py | 154 ++ .../torchvision/models/quantization/utils.py | 40 + .../SSDLite320/torchvision/models/resnet.py | 353 +++ .../models/segmentation/__init__.py | 3 + .../torchvision/models/segmentation/_utils.py | 34 + .../models/segmentation/deeplabv3.py | 94 + .../torchvision/models/segmentation/fcn.py | 36 + .../models/segmentation/segmentation.py | 106 + .../torchvision/models/shufflenetv2.py | 208 ++ .../torchvision/models/squeezenet.py | 137 ++ .../SSDLite320/torchvision/models/utils.py | 4 + .../SSDLite320/torchvision/models/vgg.py | 183 ++ .../torchvision/models/video/__init__.py | 1 + .../torchvision/models/video/resnet.py | 341 +++ .../SSDLite320/torchvision/ops/__init__.py | 20 + .../torchvision/ops/_register_onnx_ops.py | 51 + .../SSDLite320/torchvision/ops/_utils.py | 63 + .../torchvision/ops/_utils_origin.py | 38 + .../SSDLite320/torchvision/ops/boxes.py | 237 ++ .../SSDLite320/torchvision/ops/deform_conv.py | 139 ++ .../ops/feature_pyramid_network.py | 193 ++ .../SSDLite320/torchvision/ops/misc.py | 168 ++ .../SSDLite320/torchvision/ops/misc_origin.py | 153 ++ .../torchvision/ops/new_empty_tensor.py | 16 + .../SSDLite320/torchvision/ops/poolers.py | 232 ++ .../torchvision/ops/ps_roi_align.py | 68 + .../SSDLite320/torchvision/ops/ps_roi_pool.py | 59 + .../SSDLite320/torchvision/ops/roi_align.py | 69 + .../SSDLite320/torchvision/ops/roi_pool.py | 57 + .../torchvision/transforms/__init__.py | 1 + .../transforms/_functional_video.py | 101 + .../transforms/_transforms_video.py | 173 ++ .../torchvision/transforms/functional.py | 1392 ++++++++++++ .../transforms/functional_origin.py | 906 ++++++++ .../torchvision/transforms/functional_pil.py | 399 ++++ .../transforms/functional_tensor.py | 987 ++++++++ .../transforms/functional_tensor_origin.py | 238 ++ .../torchvision/transforms/transforms.py | 2016 +++++++++++++++++ .../transforms/transforms_origin.py | 1297 +++++++++++ .../SSDLite320/torchvision/utils.py | 309 +++ .../SSDLite320/torchvision/utils_origin.py | 109 + .../SSDLite320/torchvision/version.py | 5 + .../cv/classification/SSDLite320/train.py | 281 +++ .../SSDLite320/transform_ssd.py | 302 +++ .../classification/SSDLite320/transforms.py | 286 +++ .../cv/classification/SSDLite320/utils.py | 288 +++ 134 files changed, 28909 insertions(+) create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/README.md create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/engine.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/image_list.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/presets.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/requirements.txt create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/test/env_npu.sh create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/__init__.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/__init__.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cityscapes.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/__init__.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/__init__.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/__init__.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/__init__.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/__init__.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/__init__.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/__init__.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/__init__.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/__init__.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/train.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/transforms.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/utils.py diff --git a/PyTorch/contrib/cv/classification/SSDLite320/README.md b/PyTorch/contrib/cv/classification/SSDLite320/README.md new file mode 100644 index 0000000000..b53891e1c8 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/README.md @@ -0,0 +1,61 @@ +# SSDLite320 + +SSDlite320 模型在 COCO2017 数据集上的实现,主要修改自 [pytorch/vision/tree/main/references/detection]([github.com](https://github.com/pytorch/vision/tree/main/references/detection)) 源码 + +## 环境准备 + +- 安装 Pytorch 和混合精度训练工具 Apex +- 安装依赖 `pip install -r requirements.txt` +- 下载 COCO2017 数据集 + +## torchvision环境替换 + +将当前路径下torchvision目录替换Ancona环境中的torchvision(固定shape及适配torch1.5),具体操作如下: + +```shell +cp -rf torchvision ~/archiconda3/envs/xxx/lib/python3.7/site-packages/torchvision +``` + +注:`~/archiconda3/envs/xxx/lib/python3.7/site-packages`为`xxx`环境下模块下载地址 + +## 训练 + +训练阶段,脚本调用 `train.py` 进行训练 + +```bash +# 1p train perf +bash test/train_performance_1p.sh --data_path=/opt/npu/dataset/coco/ + +# 8p train perf +bash test/train_performance_8p.sh --data_path=/opt/npu/dataset/coco/ + +# 8p train full +bash test/train_full_8p.sh --data_path=/opt/npu/dataset/coco/ + +# 1p train full +bash test/train_full_1p.sh --data_path=/opt/npu/dataset/coco/ + +# 1p eval +bash test/train_eval_8p.sh --data_path=/opt/npu/dataset/coco/ --model_path=/eval_model.pth +``` + +注: 可以通过修改 `--data_path` 来指定数据集文件夹的位置,例如,你的数据集地址为:`/opt/npu/dataset/coco/`, 可设置 `--data_path=/opt/npu/dataset/coco/` + +Log Path: + +- train_perf_1p.log # 1p 训练下性能测试日志 +- train_perf_8p.log # 8p 训练下性能测试日志 +- train_full_1p.log # 1p 完整训练下性能和精度测试日志 +- train_full_8p.log # 8p 完整训练下性能和精度测试日志 +- train_eval_1p.log # 1p 测试模型验证集精度日志 + +## SSDlite 训练结果 + +| top1 acc | FPS | Epochs | AMP_Type | Device | +| :------: | :---: | :----: | :------: | :----: | +| - | 10.8 | 1 | O1 | 1p Npu | +| ? | 100.8 | 660 | O1 | 8p Npu | +| - | 54.7 | 1 | - | 1p Gpu | +| 20.4 | 387.2 | 660 | - | 8p Gpu | + +注:源仓库模型测试为 21.3 (660 epochs) \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py b/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py new file mode 100644 index 0000000000..ec0709c5d9 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py @@ -0,0 +1,191 @@ +import copy +import io +from contextlib import redirect_stdout + +import numpy as np +import pycocotools.mask as mask_util +import torch +import utils +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + + +class CocoEvaluator: + def __init__(self, coco_gt, iou_types): + assert isinstance(iou_types, (list, tuple)) + coco_gt = copy.deepcopy(coco_gt) + self.coco_gt = coco_gt + + self.iou_types = iou_types + self.coco_eval = {} + for iou_type in iou_types: + self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) + + self.img_ids = [] + self.eval_imgs = {k: [] for k in iou_types} + + def update(self, predictions): + img_ids = list(np.unique(list(predictions.keys()))) + self.img_ids.extend(img_ids) + + for iou_type in self.iou_types: + results = self.prepare(predictions, iou_type) + with redirect_stdout(io.StringIO()): + coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO() + coco_eval = self.coco_eval[iou_type] + + coco_eval.cocoDt = coco_dt + coco_eval.params.imgIds = list(img_ids) + img_ids, eval_imgs = evaluate(coco_eval) + + self.eval_imgs[iou_type].append(eval_imgs) + + def synchronize_between_processes(self): + for iou_type in self.iou_types: + self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) + create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) + + def accumulate(self): + for coco_eval in self.coco_eval.values(): + coco_eval.accumulate() + + def summarize(self): + for iou_type, coco_eval in self.coco_eval.items(): + print(f"IoU metric: {iou_type}") + coco_eval.summarize() + + def prepare(self, predictions, iou_type): + if iou_type == "bbox": + return self.prepare_for_coco_detection(predictions) + if iou_type == "segm": + return self.prepare_for_coco_segmentation(predictions) + if iou_type == "keypoints": + return self.prepare_for_coco_keypoint(predictions) + raise ValueError(f"Unknown iou type {iou_type}") + + def prepare_for_coco_detection(self, predictions): + coco_results = [] + for original_id, prediction in predictions.items(): + if len(prediction) == 0: + continue + + boxes = prediction["boxes"] + boxes = convert_to_xywh(boxes).tolist() + scores = prediction["scores"].tolist() + labels = prediction["labels"].tolist() + + coco_results.extend( + [ + { + "image_id": original_id, + "category_id": labels[k], + "bbox": box, + "score": scores[k], + } + for k, box in enumerate(boxes) + ] + ) + return coco_results + + def prepare_for_coco_segmentation(self, predictions): + coco_results = [] + for original_id, prediction in predictions.items(): + if len(prediction) == 0: + continue + + scores = prediction["scores"] + labels = prediction["labels"] + masks = prediction["masks"] + + masks = masks > 0.5 + + scores = prediction["scores"].tolist() + labels = prediction["labels"].tolist() + + rles = [ + mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] for mask in masks + ] + for rle in rles: + rle["counts"] = rle["counts"].decode("utf-8") + + coco_results.extend( + [ + { + "image_id": original_id, + "category_id": labels[k], + "segmentation": rle, + "score": scores[k], + } + for k, rle in enumerate(rles) + ] + ) + return coco_results + + def prepare_for_coco_keypoint(self, predictions): + coco_results = [] + for original_id, prediction in predictions.items(): + if len(prediction) == 0: + continue + + boxes = prediction["boxes"] + boxes = convert_to_xywh(boxes).tolist() + scores = prediction["scores"].tolist() + labels = prediction["labels"].tolist() + keypoints = prediction["keypoints"] + keypoints = keypoints.flatten(start_dim=1).tolist() + + coco_results.extend( + [ + { + "image_id": original_id, + "category_id": labels[k], + "keypoints": keypoint, + "score": scores[k], + } + for k, keypoint in enumerate(keypoints) + ] + ) + return coco_results + + +def convert_to_xywh(boxes): + xmin, ymin, xmax, ymax = boxes.unbind(1) + return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) + + +def merge(img_ids, eval_imgs): + all_img_ids = utils.all_gather(img_ids) + all_eval_imgs = utils.all_gather(eval_imgs) + + merged_img_ids = [] + for p in all_img_ids: + merged_img_ids.extend(p) + + merged_eval_imgs = [] + for p in all_eval_imgs: + merged_eval_imgs.append(p) + + merged_img_ids = np.array(merged_img_ids) + merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) + + # keep only unique (and in sorted order) images + merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) + merged_eval_imgs = merged_eval_imgs[..., idx] + + return merged_img_ids, merged_eval_imgs + + +def create_common_coco_eval(coco_eval, img_ids, eval_imgs): + img_ids, eval_imgs = merge(img_ids, eval_imgs) + img_ids = list(img_ids) + eval_imgs = list(eval_imgs.flatten()) + + coco_eval.evalImgs = eval_imgs + coco_eval.params.imgIds = img_ids + coco_eval._paramsEval = copy.deepcopy(coco_eval.params) + + +def evaluate(imgs): + with redirect_stdout(io.StringIO()): + imgs.evaluate() + return imgs.params.imgIds, np.asarray(imgs.evalImgs).reshape(-1, len(imgs.params.areaRng), len(imgs.params.imgIds)) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py new file mode 100644 index 0000000000..a656602865 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py @@ -0,0 +1,249 @@ +import copy +import os + +import torch +import torch.utils.data +import torchvision +import transforms as T +from pycocotools import mask as coco_mask +from pycocotools.coco import COCO + + +class FilterAndRemapCocoCategories: + def __init__(self, categories, remap=True): + self.categories = categories + self.remap = remap + + def __call__(self, image, target): + anno = target["annotations"] + anno = [obj for obj in anno if obj["category_id"] in self.categories] + if not self.remap: + target["annotations"] = anno + return image, target + anno = copy.deepcopy(anno) + for obj in anno: + obj["category_id"] = self.categories.index(obj["category_id"]) + target["annotations"] = anno + return image, target + + +def convert_coco_poly_to_mask(segmentations, height, width): + masks = [] + for polygons in segmentations: + rles = coco_mask.frPyObjects(polygons, height, width) + mask = coco_mask.decode(rles) + if len(mask.shape) < 3: + mask = mask[..., None] + mask = torch.as_tensor(mask, dtype=torch.uint8) + mask = mask.any(dim=2) + masks.append(mask) + if masks: + masks = torch.stack(masks, dim=0) + else: + masks = torch.zeros((0, height, width), dtype=torch.uint8) + return masks + + +class ConvertCocoPolysToMask: + def __call__(self, image, target): + w, h = image.size + + image_id = target["image_id"] + image_id = torch.tensor([image_id]) + + anno = target["annotations"] + + anno = [obj for obj in anno if obj["iscrowd"] == 0] + + boxes = [obj["bbox"] for obj in anno] + # guard against no boxes via resizing + boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) + boxes[:, 2:] += boxes[:, :2] + boxes[:, 0::2].clamp_(min=0, max=w) + boxes[:, 1::2].clamp_(min=0, max=h) + + classes = [obj["category_id"] for obj in anno] + classes = torch.tensor(classes, dtype=torch.int64) + + segmentations = [obj["segmentation"] for obj in anno] + masks = convert_coco_poly_to_mask(segmentations, h, w) + + keypoints = None + if anno and "keypoints" in anno[0]: + keypoints = [obj["keypoints"] for obj in anno] + keypoints = torch.as_tensor(keypoints, dtype=torch.float32) + num_keypoints = keypoints.shape[0] + if num_keypoints: + keypoints = keypoints.view(num_keypoints, -1, 3) + + keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) + boxes = boxes[keep] + classes = classes[keep] + masks = masks[keep] + if keypoints is not None: + keypoints = keypoints[keep] + + target = {} + target["boxes"] = boxes + target["labels"] = classes + target["masks"] = masks + target["image_id"] = image_id + if keypoints is not None: + target["keypoints"] = keypoints + + # for conversion to coco api + area = torch.tensor([obj["area"] for obj in anno]) + iscrowd = torch.tensor([obj["iscrowd"] for obj in anno]) + target["area"] = area + target["iscrowd"] = iscrowd + + return image, target + + +def _coco_remove_images_without_annotations(dataset, cat_list=None): + def _has_only_empty_bbox(anno): + return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) + + def _count_visible_keypoints(anno): + return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) + + min_keypoints_per_image = 10 + + def _has_valid_annotation(anno): + # if it's empty, there is no annotation + if len(anno) == 0: + return False + # if all boxes have close to zero area, there is no annotation + if _has_only_empty_bbox(anno): + return False + # keypoints task have a slight different critera for considering + # if an annotation is valid + if "keypoints" not in anno[0]: + return True + # for keypoint detection tasks, only consider valid images those + # containing at least min_keypoints_per_image + if _count_visible_keypoints(anno) >= min_keypoints_per_image: + return True + return False + + assert isinstance(dataset, torchvision.datasets.CocoDetection) + ids = [] + for ds_idx, img_id in enumerate(dataset.ids): + ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) + anno = dataset.coco.loadAnns(ann_ids) + if cat_list: + anno = [obj for obj in anno if obj["category_id"] in cat_list] + if _has_valid_annotation(anno): + ids.append(ds_idx) + + dataset = torch.utils.data.Subset(dataset, ids) + return dataset + + +def convert_to_coco_api(ds): + coco_ds = COCO() + # annotation IDs need to start at 1, not 0, see torchvision issue #1530 + ann_id = 1 + dataset = {"images": [], "categories": [], "annotations": []} + categories = set() + for img_idx in range(len(ds)): + # find better way to get target + # targets = ds.get_annotations(img_idx) + img, targets = ds[img_idx] + image_id = targets["image_id"].item() + img_dict = {} + img_dict["id"] = image_id + img_dict["height"] = img.shape[-2] + img_dict["width"] = img.shape[-1] + dataset["images"].append(img_dict) + bboxes = targets["boxes"] + bboxes[:, 2:] -= bboxes[:, :2] + bboxes = bboxes.tolist() + labels = targets["labels"].tolist() + areas = targets["area"].tolist() + iscrowd = targets["iscrowd"].tolist() + if "masks" in targets: + masks = targets["masks"] + # make masks Fortran contiguous for coco_mask + masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1) + if "keypoints" in targets: + keypoints = targets["keypoints"] + keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist() + num_objs = len(bboxes) + for i in range(num_objs): + ann = {} + ann["image_id"] = image_id + ann["bbox"] = bboxes[i] + ann["category_id"] = labels[i] + categories.add(labels[i]) + ann["area"] = areas[i] + ann["iscrowd"] = iscrowd[i] + ann["id"] = ann_id + if "masks" in targets: + ann["segmentation"] = coco_mask.encode(masks[i].numpy()) + if "keypoints" in targets: + ann["keypoints"] = keypoints[i] + ann["num_keypoints"] = sum(k != 0 for k in keypoints[i][2::3]) + dataset["annotations"].append(ann) + ann_id += 1 + dataset["categories"] = [{"id": i} for i in sorted(categories)] + coco_ds.dataset = dataset + coco_ds.createIndex() + return coco_ds + + +def get_coco_api_from_dataset(dataset): + for _ in range(10): + if isinstance(dataset, torchvision.datasets.CocoDetection): + break + if isinstance(dataset, torch.utils.data.Subset): + dataset = dataset.dataset + if isinstance(dataset, torchvision.datasets.CocoDetection): + return dataset.coco + return convert_to_coco_api(dataset) + + +class CocoDetection(torchvision.datasets.CocoDetection): + def __init__(self, img_folder, ann_file, transforms): + super().__init__(img_folder, ann_file) + self._transforms = transforms + + def __getitem__(self, idx): + img, target = super().__getitem__(idx) + image_id = self.ids[idx] + target = dict(image_id=image_id, annotations=target) + if self._transforms is not None: + img, target = self._transforms(img, target) + return img, target + + +def get_coco(root, image_set, transforms, mode="instances"): + anno_file_template = "{}_{}2017.json" + PATHS = { + "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))), + "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))), + # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))) + } + + t = [ConvertCocoPolysToMask()] + + if transforms is not None: + t.append(transforms) + transforms = T.Compose(t) + + img_folder, ann_file = PATHS[image_set] + img_folder = os.path.join(root, img_folder) + ann_file = os.path.join(root, ann_file) + + dataset = CocoDetection(img_folder, ann_file, transforms=transforms) + + if image_set == "train": + dataset = _coco_remove_images_without_annotations(dataset) + + # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)]) + + return dataset + + +def get_coco_kp(root, image_set, transforms): + return get_coco(root, image_set, transforms, mode="person_keypoints") diff --git a/PyTorch/contrib/cv/classification/SSDLite320/engine.py b/PyTorch/contrib/cv/classification/SSDLite320/engine.py new file mode 100644 index 0000000000..33f4897eb5 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/engine.py @@ -0,0 +1,214 @@ +import math +import sys +import time +import datetime +import pdb +import torch +import torch.nn.functional as F +import numpy as np +import torchvision.models.detection.mask_rcnn +import utils +from coco_eval import CocoEvaluator +from coco_utils import get_coco_api_from_dataset +from transform_ssd import GeneralizedRCNNTransform +import os +from apex import amp + +def get_ops(images, targets): + # 提取算子 + with torch.autograd.profiler.profile(record_shapes=True, use_cuda=True) as prof: + loss_dict = model(images, targets) + losses = sum(loss for loss in loss_dict.values()) + # reduce losses over all GPUs for logging purposes + loss_dict_reduced = utils.reduce_dict(loss_dict) + losses_reduced = sum(loss for loss in loss_dict_reduced.values()) + loss_value = losses_reduced.item() + if not math.isfinite(loss_value): + print(f"Loss is {loss_value}, stopping training") + print(loss_dict_reduced) + sys.exit(1) + optimizer.zero_grad() + losses.backward() + optimizer.step() + if lr_scheduler is not None: + lr_scheduler.step() + print(prof.table(row_limit=200000)) + + +def fix_input_target(images, targets): + ''' + 固定input和targets + ''' + # 固定input image + batch_shape = (3, 1024, 1024) + # 填充值 + pad_value = 0 + # 固定ground_box数量 + max_boxes = 20 + classes = 91 + + # images = list(image.to(device) for image in images) + # len(images): 24 bs + # iages[0].shape: torch.Size([3, 207, 281]) + images_pad = [] + for image in images: + image = image.to(device) + padding_size = [0, batch_shape[-1] - image.shape[-1], + 0, batch_shape[-2] - image.shape[-2]] + padding_size = [0, batch_shape[-1] - image.shape[-1], + 0, batch_shape[-2] - image.shape[-2]] + image = F.pad(image, padding_size, value=pad_value) + images_pad.append(image) + images = images_pad + # targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + targets_pad = [] + for target in targets: + boxes_num = target['boxes'].shape[0] + if boxes_num < max_boxes: + diff_num = max_boxes - boxes_num + # box对齐 + target['boxes'] = torch.cat([target['boxes'], torch.zeros([diff_num, 4])], dim=0) + # label对齐 + padding_label = np.zeros(diff_num) + classes + target['labels'] = torch.cat([target['labels'], torch.from_numpy(padding_label).long()], dim=0) + # mask对齐 + # padding_mask = torch.zeros(diff_num, target['masks'].shape[1], target['masks'].shape[2]) + padding_mask = target['masks'][0].unsqueeze(0) + target['masks'] = torch.cat([target['masks'], padding_mask], dim=0) + # area对齐 + padding_area = torch.zeros(diff_num) + target['area'] = torch.cat([target['area'], padding_area], dim=0) + # iscrowd对齐 + padding_iscrowd = torch.zeros(diff_num) + target['iscrowd'] = torch.cat([target['iscrowd'], padding_iscrowd.long()], dim=0) + else: + select_idx = torch.randperm(boxes_num)[:max_boxes] + target['boxes'] = target['boxes'][select_idx] + target['labels'] = target['labels'][select_idx] + target['masks'] = target['masks'][select_idx] + target['area'] = target['area'][select_idx] + target['iscrowd'] = target['iscrowd'][select_idx] + target['boxes'] = target['boxes'].to(device) + target['labels'] = target['labels'].to(device) + target['masks'] = target['masks'].to(device) + target['image_id'] = target['image_id'].to(device) + target['area'] = target['area'].to(device) + target['iscrowd'] = target['iscrowd'].to(device) + return images, targets + + +def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): + model.train() + metric_logger = utils.MetricLogger(delimiter=" ") + metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}")) + header = f"Epoch: [{epoch}]" + + lr_scheduler = None + + for images, targets in metric_logger.log_every(data_loader, print_freq, header): + ''' + 调用ssd原生transform + ''' + size = [320, 320] + image_mean = [0.485, 0.456, 0.406] + image_std = [0.229, 0.224, 0.225] + transform_ssd = GeneralizedRCNNTransform( + device, min(size), max(size), image_mean, image_std, size_divisible=1, fixed_size=size + ) + images, targets = transform_ssd(images, targets) + + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + # torch.npu.global_step_inc() + loss_dict = model(images, targets) + losses = sum(loss for loss in loss_dict.values()) + # reduce losses over all GPUs for logging purposes + loss_dict_reduced = utils.reduce_dict(loss_dict) + losses_reduced = sum(loss for loss in loss_dict_reduced.values()) + loss_value = losses_reduced.item() + if not math.isfinite(loss_value): + print(f"Loss is {loss_value}, stopping training") + print(loss_dict_reduced) + sys.exit(1) + optimizer.zero_grad() + with amp.scale_loss(losses, optimizer) as scaled_loss: + scaled_loss.backward() + optimizer.step() + + if lr_scheduler is not None: + lr_scheduler.step() + + metric_logger.update(loss=losses_reduced, **loss_dict_reduced) + metric_logger.update(lr=optimizer.param_groups[0]["lr"]) + + return metric_logger + + +def _get_iou_types(model): + model_without_ddp = model + if isinstance(model, torch.nn.parallel.DistributedDataParallel): + model_without_ddp = model.module + iou_types = ["bbox"] + if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN): + iou_types.append("segm") + if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN): + iou_types.append("keypoints") + return iou_types + + +# @torch.inference_mode() +def evaluate(model, data_loader, device): + n_threads = torch.get_num_threads() + # FIXME remove this and make paste_masks_in_image run on the GPU + torch.set_num_threads(1) + cpu_device = torch.device("cpu") + model.eval() + metric_logger = utils.MetricLogger(delimiter=" ") + header = "Test:" + + coco = get_coco_api_from_dataset(data_loader.dataset) + iou_types = _get_iou_types(model) + coco_evaluator = CocoEvaluator(coco, iou_types) + + for images, targets in metric_logger.log_every(data_loader, 1, header): + # get the original image sizes + original_image_sizes: List[Tuple[int, int]] = [] + for img in images: + val = img.shape[-2:] + assert len(val) == 2 + original_image_sizes.append((val[0], val[1])) + ''' + 调用ssd原生transform + ''' + size = [320, 320] + image_mean = [0.485, 0.456, 0.406] + image_std = [0.229, 0.224, 0.225] + transform_ssd = GeneralizedRCNNTransform( + device, min(size), max(size), image_mean, image_std, size_divisible=1, fixed_size=size + ) + images, _ = transform_ssd(images, None) + if torch.npu.is_available(): + torch.npu.synchronize() + model_time = time.time() + + outputs = model(images, original_image_sizes) + + outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] + model_time = time.time() - model_time + + res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} + evaluator_time = time.time() + coco_evaluator.update(res) + evaluator_time = time.time() - evaluator_time + metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) + + # gather the stats from all processes + metric_logger.synchronize_between_processes() + print("Averaged stats:", metric_logger) + coco_evaluator.synchronize_between_processes() + + # accumulate predictions from all images + coco_evaluator.accumulate() + coco_evaluator.summarize() + torch.set_num_threads(n_threads) + return coco_evaluator diff --git a/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py b/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py new file mode 100644 index 0000000000..6a499a143f --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py @@ -0,0 +1,60 @@ +import re +from collections import defaultdict +import argparse + +def dump_file(ops, file_path): + with open(file_path, 'w') as f: + for op in ops: + f.write(op) + f.write('\n') + +def parse_profiler(profiler_file): + ops_shapes = defaultdict(set) + ops_shapes_first_step = defaultdict(set) + ops_shapes_other_steps = defaultdict(set) + with open(profiler_file, 'r') as f: + lines = f.readlines() + step = 0 + for line in lines: + if re.findall(r'^Name.*Input Shapes$', line.strip()): + step += 1 + continue + if step == 0: + continue + + if -1 == line.find('[[') and -1 == line.find('[]'): + continue + + line_fields = [field.strip() for field in line.strip().split(' ') if field != ''] + ops_shapes[line_fields[0]].add(line_fields[-1]) + if step == 1: + ops_shapes_first_step[line_fields[0]].add(line_fields[-1]) + else: + ops_shapes_other_steps[line_fields[0]].add(line_fields[-1]) + + all_ops = [k for k, v in ops_shapes.items()] + + dynamic_ops = list() + for op_name, shape_set in ops_shapes_other_steps.items(): + if op_name not in ops_shapes_first_step.keys(): + dynamic_ops.append(op_name) + else: + if len(shape_set - ops_shapes_first_step[op_name]) > 0: + dynamic_ops.append(op_name) + return all_ops, dynamic_ops + +def extract_ops(profiler_file): + all_ops, dynamic_ops = parse_profiler(profiler_file) + + print('all_ops:', all_ops) + print('dynamic_ops', dynamic_ops) + + dump_file(all_ops, 'all_ops.txt') + dump_file(dynamic_ops, 'dynamic_ops.txt') + +if __name__ == '__main__': + parser = argparse.ArgumentParser('extract ops') + parser.add_argument('--profiler_file', default='', type=str, metavar='PATH') + + args = parser.parse_args() + extract_ops(args.profiler_file) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py b/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py new file mode 100644 index 0000000000..1323849a6a --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py @@ -0,0 +1,196 @@ +import bisect +import copy +import math +from collections import defaultdict +from itertools import repeat, chain + +import numpy as np +import torch +import torch.utils.data +import torchvision +from PIL import Image +from torch.utils.data.sampler import BatchSampler, Sampler +from torch.utils.model_zoo import tqdm + + +def _repeat_to_at_least(iterable, n): + repeat_times = math.ceil(n / len(iterable)) + repeated = chain.from_iterable(repeat(iterable, repeat_times)) + return list(repeated) + + +class GroupedBatchSampler(BatchSampler): + """ + Wraps another sampler to yield a mini-batch of indices. + It enforces that the batch only contain elements from the same group. + It also tries to provide mini-batches which follows an ordering which is + as close as possible to the ordering from the original sampler. + Args: + sampler (Sampler): Base sampler. + group_ids (list[int]): If the sampler produces indices in range [0, N), + `group_ids` must be a list of `N` ints which contains the group id of each sample. + The group ids must be a continuous set of integers starting from + 0, i.e. they must be in the range [0, num_groups). + batch_size (int): Size of mini-batch. + """ + + def __init__(self, sampler, group_ids, batch_size): + if not isinstance(sampler, Sampler): + raise ValueError(f"sampler should be an instance of torch.utils.data.Sampler, but got sampler={sampler}") + self.sampler = sampler + self.group_ids = group_ids + self.batch_size = batch_size + + def __iter__(self): + buffer_per_group = defaultdict(list) + samples_per_group = defaultdict(list) + + num_batches = 0 + for idx in self.sampler: + group_id = self.group_ids[idx] + buffer_per_group[group_id].append(idx) + samples_per_group[group_id].append(idx) + if len(buffer_per_group[group_id]) == self.batch_size: + yield buffer_per_group[group_id] + num_batches += 1 + del buffer_per_group[group_id] + assert len(buffer_per_group[group_id]) < self.batch_size + + # now we have run out of elements that satisfy + # the group criteria, let's return the remaining + # elements so that the size of the sampler is + # deterministic + expected_num_batches = len(self) + num_remaining = expected_num_batches - num_batches + if num_remaining > 0: + # for the remaining batches, take first the buffers with largest number + # of elements + for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True): + remaining = self.batch_size - len(buffer_per_group[group_id]) + samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining) + buffer_per_group[group_id].extend(samples_from_group_id[:remaining]) + assert len(buffer_per_group[group_id]) == self.batch_size + yield buffer_per_group[group_id] + num_remaining -= 1 + if num_remaining == 0: + break + assert num_remaining == 0 + + def __len__(self): + return len(self.sampler) // self.batch_size + + +def _compute_aspect_ratios_slow(dataset, indices=None): + print( + "Your dataset doesn't support the fast path for " + "computing the aspect ratios, so will iterate over " + "the full dataset and load every image instead. " + "This might take some time..." + ) + if indices is None: + indices = range(len(dataset)) + + class SubsetSampler(Sampler): + def __init__(self, indices): + self.indices = indices + + def __iter__(self): + return iter(self.indices) + + def __len__(self): + return len(self.indices) + + sampler = SubsetSampler(indices) + data_loader = torch.utils.data.DataLoader( + dataset, + batch_size=1, + sampler=sampler, + num_workers=14, # you might want to increase it for faster processing + collate_fn=lambda x: x[0], + ) + aspect_ratios = [] + with tqdm(total=len(dataset)) as pbar: + for _i, (img, _) in enumerate(data_loader): + pbar.update(1) + height, width = img.shape[-2:] + aspect_ratio = float(width) / float(height) + aspect_ratios.append(aspect_ratio) + return aspect_ratios + + +def _compute_aspect_ratios_custom_dataset(dataset, indices=None): + if indices is None: + indices = range(len(dataset)) + aspect_ratios = [] + for i in indices: + height, width = dataset.get_height_and_width(i) + aspect_ratio = float(width) / float(height) + aspect_ratios.append(aspect_ratio) + return aspect_ratios + + +def _compute_aspect_ratios_coco_dataset(dataset, indices=None): + if indices is None: + indices = range(len(dataset)) + aspect_ratios = [] + for i in indices: + img_info = dataset.coco.imgs[dataset.ids[i]] + aspect_ratio = float(img_info["width"]) / float(img_info["height"]) + aspect_ratios.append(aspect_ratio) + return aspect_ratios + + +def _compute_aspect_ratios_voc_dataset(dataset, indices=None): + if indices is None: + indices = range(len(dataset)) + aspect_ratios = [] + for i in indices: + # this doesn't load the data into memory, because PIL loads it lazily + width, height = Image.open(dataset.images[i]).size + aspect_ratio = float(width) / float(height) + aspect_ratios.append(aspect_ratio) + return aspect_ratios + + +def _compute_aspect_ratios_subset_dataset(dataset, indices=None): + if indices is None: + indices = range(len(dataset)) + + ds_indices = [dataset.indices[i] for i in indices] + return compute_aspect_ratios(dataset.dataset, ds_indices) + + +def compute_aspect_ratios(dataset, indices=None): + if hasattr(dataset, "get_height_and_width"): + return _compute_aspect_ratios_custom_dataset(dataset, indices) + + if isinstance(dataset, torchvision.datasets.CocoDetection): + return _compute_aspect_ratios_coco_dataset(dataset, indices) + + if isinstance(dataset, torchvision.datasets.VOCDetection): + return _compute_aspect_ratios_voc_dataset(dataset, indices) + + if isinstance(dataset, torch.utils.data.Subset): + return _compute_aspect_ratios_subset_dataset(dataset, indices) + + # slow path + return _compute_aspect_ratios_slow(dataset, indices) + + +def _quantize(x, bins): + bins = copy.deepcopy(bins) + bins = sorted(bins) + quantized = list(map(lambda y: bisect.bisect_right(bins, y), x)) + return quantized + + +def create_aspect_ratio_groups(dataset, k=0): + aspect_ratios = compute_aspect_ratios(dataset) + bins = (2 ** np.linspace(-1, 1, 2 * k + 1)).tolist() if k > 0 else [1.0] + groups = _quantize(aspect_ratios, bins) + # count number of elements per group + counts = np.unique(groups, return_counts=True)[1] + fbins = [0] + bins + [np.inf] + print(f"Using {fbins} as bins for aspect ratio quantization") + print(f"Count of instances per bin: {counts}") + return groups diff --git a/PyTorch/contrib/cv/classification/SSDLite320/image_list.py b/PyTorch/contrib/cv/classification/SSDLite320/image_list.py new file mode 100644 index 0000000000..583866557e --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/image_list.py @@ -0,0 +1,25 @@ +from typing import List, Tuple + +import torch +from torch import Tensor + + +class ImageList: + """ + Structure that holds a list of images (of possibly + varying sizes) as a single tensor. + This works by padding the images to the same size, + and storing in a field the original sizes of each image + + Args: + tensors (tensor): Tensor containing images. + image_sizes (list[tuple[int, int]]): List of Tuples each containing size of images. + """ + + def __init__(self, tensors: Tensor, image_sizes: List[Tuple[int, int]]) -> None: + self.tensors = tensors + self.image_sizes = image_sizes + + def to(self, device: torch.device) -> "ImageList": + cast_tensor = self.tensors.to(device) + return ImageList(cast_tensor, self.image_sizes) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/presets.py b/PyTorch/contrib/cv/classification/SSDLite320/presets.py new file mode 100644 index 0000000000..88d8c697d2 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/presets.py @@ -0,0 +1,47 @@ +import torch +import transforms as T + + +class DetectionPresetTrain: + def __init__(self, data_augmentation, hflip_prob=0.5, mean=(123.0, 117.0, 104.0)): + if data_augmentation == "hflip": + self.transforms = T.Compose( + [ + T.RandomHorizontalFlip(p=hflip_prob), + T.PILToTensor(), + T.ConvertImageDtype(torch.float), + ] + ) + elif data_augmentation == "ssd": + self.transforms = T.Compose( + [ + T.RandomPhotometricDistort(), + T.RandomZoomOut(fill=list(mean)), + T.RandomIoUCrop(), + T.RandomHorizontalFlip(p=hflip_prob), + T.PILToTensor(), + T.ConvertImageDtype(torch.float), + ] + ) + elif data_augmentation == "ssdlite": + self.transforms = T.Compose( + [ + T.RandomIoUCrop(), + T.RandomHorizontalFlip(p=hflip_prob), + T.PILToTensor(), + T.ConvertImageDtype(torch.float), + ] + ) + else: + raise ValueError(f'Unknown data augmentation policy "{data_augmentation}"') + + def __call__(self, img, target): + return self.transforms(img, target) + + +class DetectionPresetEval: + def __init__(self): + self.transforms = T.ToTensor() + + def __call__(self, img, target): + return self.transforms(img, target) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/requirements.txt b/PyTorch/contrib/cv/classification/SSDLite320/requirements.txt new file mode 100644 index 0000000000..e0311aad69 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/requirements.txt @@ -0,0 +1,5 @@ +cython +matplotlib +pycocotools +sympy +decorator \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/env_npu.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/env_npu.sh new file mode 100644 index 0000000000..1950129888 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/env_npu.sh @@ -0,0 +1,75 @@ +#!/bin/bash +export install_path=/usr/local/Ascend + +if [ -d ${install_path}/toolkit ]; then + export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64/:/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH} + export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH + export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH + export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=${install_path}/opp +else + if [ -d ${install_path}/nnae/latest ];then + export LD_LIBRARY_PATH=${install_path}/nnae/latest/fwkacllib/lib64/:/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/nnae/latest + else + export LD_LIBRARY_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH + export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/ + export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/ + export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so + export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/te:$PYTHONPATH + export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest + fi +fi + +${install_path}/driver/tools/msnpureport -g error -d 0 +${install_path}/driver/tools/msnpureport -g error -d 1 +${install_path}/driver/tools/msnpureport -g error -d 2 +${install_path}/driver/tools/msnpureport -g error -d 3 +${install_path}/driver/tools/msnpureport -g error -d 4 +${install_path}/driver/tools/msnpureport -g error -d 5 +${install_path}/driver/tools/msnpureport -g error -d 6 +${install_path}/driver/tools/msnpureport -g error -d 7 + +#将Host日志输出到串口,0-关闭/1-开启 +export ASCEND_SLOG_PRINT_TO_STDOUT=0 +#设置默认日志级别,0-debug/1-info/2-warning/3-error +export ASCEND_GLOBAL_LOG_LEVEL=3 +#设置Event日志开启标志,0-关闭/1-开启 +export ASCEND_GLOBAL_EVENT_ENABLE=0 +#设置是否开启taskque,0-关闭/1-开启 +export TASK_QUEUE_ENABLE=1 +#设置是否开启PTCopy,0-关闭/1-开启 +export PTCOPY_ENABLE=1 +#设置是否开启combined标志,0-关闭/1-开启 +export COMBINED_ENABLE=1 +#设置特殊场景是否需要重新编译,不需要修改 +export DYNAMIC_OP="ADD#MUL" +#HCCL白名单开关,1-关闭/0-开启 +export HCCL_WHITELIST_DISABLE=1 + +ulimit -SHn 512000 + +path_lib=$(python3.7 -c """ +import sys +import re +result='' +for index in range(len(sys.path)): + match_sit = re.search('-packages', sys.path[index]) + if match_sit is not None: + match_lib = re.search('lib', sys.path[index]) + + if match_lib is not None: + end=match_lib.span()[1] + result += sys.path[index][0:end] + ':' + + result+=sys.path[index] + '/torch/lib:' +print(result)""" +) + +echo ${path_lib} + +export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh new file mode 100644 index 0000000000..159f319279 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh @@ -0,0 +1,103 @@ +################基础配置参数,需要模型审视修改################## +Network="SSDLite320" +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="/opt/npu/dataset/coco/" +batch_size=24 +ASCEND_DEVICE_ID=0 +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --model_path* ]];then + model_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +#################创建日志输出目录,不需要修改################# +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# +# 训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +source ${test_path_dir}/env_npu.sh + +python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.py\ + --dataset coco --data-path $data_path\ + --model ssdlite320_mobilenet_v3_large\ + --epochs 600\ + --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\ + --lr 0.15 --batch-size 24 --test-only\ + --weight-decay 0.00004 --data-augmentation ssdlite > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_1p.log 2>&1 & + +wait + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}' +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'` +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +# 输出训练精度,需要模型审视修改 +train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}' +# 打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh new file mode 100644 index 0000000000..7dafc83788 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh @@ -0,0 +1,101 @@ +################基础配置参数,需要模型审视修改################## +Network="SSDLite320" +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="/opt/npu/dataset/coco/" +batch_size=24 +ASCEND_DEVICE_ID=0 +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +#################创建日志输出目录,不需要修改################# +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# +# 训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +source ${test_path_dir}/env_npu.sh + +python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.py\ + --dataset coco --data-path $data_path\ + --model ssdlite320_mobilenet_v3_large\ + --epochs 600\ + --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\ + --lr 0.15 --batch-size 24\ + --weight-decay 0.00004 --data-augmentation ssdlite --world-size 8 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log 2>&1 & + +wait + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}' +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'` +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +# 输出训练精度,需要模型审视修改 +train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}' +# 打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh new file mode 100644 index 0000000000..2dc85c74a4 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh @@ -0,0 +1,101 @@ +################基础配置参数,需要模型审视修改################## +Network="SSDLite320" +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="/opt/npu/dataset/coco/" +batch_size=24 +ASCEND_DEVICE_ID=0 +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +#################创建日志输出目录,不需要修改################# +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# +# 训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +source ${test_path_dir}/env_npu.sh + +python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.py\ + --dataset coco --data-path $data_path\ + --model ssdlite320_mobilenet_v3_large\ + --epochs 600\ + --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\ + --lr 0.15 --batch-size 24\ + --weight-decay 0.00004 --data-augmentation ssdlite --world-size 8 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log 2>&1 & + +wait + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}' +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*8/'${step_time}'}'` +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +# 输出训练精度,需要模型审视修改 +train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}' +# 打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh new file mode 100644 index 0000000000..35d9a5c924 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh @@ -0,0 +1,101 @@ +################基础配置参数,需要模型审视修改################## +Network="SSDLite320" +# 训练使用的npu卡数 +export RANK_SIZE=1 +# 数据集路径,保持为空,不需要修改 +data_path="/opt/npu/dataset/coco/" +batch_size=24 +ASCEND_DEVICE_ID=0 +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +#################创建日志输出目录,不需要修改################# +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# +# 训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +source ${test_path_dir}/env_npu.sh + +python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.py\ + --dataset coco --data-path $data_path\ + --model ssdlite320_mobilenet_v3_large\ + --epochs 2\ + --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\ + --lr 0.15 --batch-size 24\ + --weight-decay 0.00004 --data-augmentation ssdlite > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log 2>&1 & + +wait + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}' +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'` +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +# 输出训练精度,需要模型审视修改 +train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}' +# 打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh new file mode 100644 index 0000000000..90656c3937 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh @@ -0,0 +1,101 @@ +################基础配置参数,需要模型审视修改################## +Network="SSDLite320" +# 训练使用的npu卡数 +export RANK_SIZE=8 +# 数据集路径,保持为空,不需要修改 +data_path="/opt/npu/dataset/coco/" +batch_size=24 +ASCEND_DEVICE_ID=0 +# 参数校验,data_path为必传参数,其他参数的增删由模型自身决定;此处新增参数需在上面有定义并赋值 +for para in $* +do + if [[ $para == --device_id* ]];then + device_id=`echo ${para#*=}` + elif [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + fi +done + +# 校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +#################创建日志输出目录,不需要修改################# +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output/${ASCEND_DEVICE_ID} + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +else + mkdir -p ${test_path_dir}/output/$ASCEND_DEVICE_ID +fi + +#################启动训练脚本################# +# 训练开始时间,不需要修改 +start_time=$(date +%s) +# 非平台场景时source 环境变量 +source ${test_path_dir}/env_npu.sh + +python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.py\ + --dataset coco --data-path $data_path\ + --model ssdlite320_mobilenet_v3_large\ + --epochs 2\ + --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\ + --lr 0.15 --batch-size 24\ + --weight-decay 0.00004 --data-augmentation ssdlite --world-size 8 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log 2>&1 & + +wait + +##################获取训练数据################ +# 训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) + +# 结果打印,不需要修改 +echo "------------------ Final result ------------------" +# 输出性能FPS,需要模型审视修改 +step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}' +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'` +# 打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +# 输出训练精度,需要模型审视修改 +train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}' +# 打印,不需要修改 +echo "Final Train Accuracy : ${train_accuracy}" +echo "E2E Training Duration sec : $e2e_time" + +# 性能看护结果汇总 +# 训练用例信息,不需要修改 +BatchSize=${batch_size} +DeviceType=`uname -m` +CaseName=${Network}_bs${BatchSize}_${RANK_SIZE}'p'_'acc' + +# 获取性能数据,不需要修改 +# 吞吐量 +ActualFPS=${FPS} +# 单迭代训练时长 +TrainingTime=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*1000/'${FPS}'}'` + +# 关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "RankSize = ${RANK_SIZE}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "BatchSize = ${BatchSize}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "CaseName = ${CaseName}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainingTime = ${TrainingTime}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "TrainAccuracy = ${train_accuracy}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> ${test_path_dir}/output/$ASCEND_DEVICE_ID/${CaseName}.log diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/__init__.py new file mode 100644 index 0000000000..ce8f9784ca --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/__init__.py @@ -0,0 +1,74 @@ +import warnings + +from torchvision import models +from torchvision import datasets +from torchvision import ops +from torchvision import transforms +from torchvision import utils +from torchvision import io + +from .extension import _HAS_OPS +import torch + +try: + from .version import __version__ # noqa: F401 +except ImportError: + pass + +_image_backend = 'PIL' + +_video_backend = "pyav" + + +def set_image_backend(backend): + """ + Specifies the package used to load images. + + Args: + backend (string): Name of the image backend. one of {'PIL', 'accimage'}. + The :mod:`accimage` package uses the Intel IPP library. It is + generally faster than PIL, but does not support as many operations. + """ + global _image_backend + if backend not in ['PIL', 'accimage']: + raise ValueError("Invalid backend '{}'. Options are 'PIL' and 'accimage'" + .format(backend)) + _image_backend = backend + + +def get_image_backend(): + """ + Gets the name of the package used to load images + """ + return _image_backend + + +def set_video_backend(backend): + """ + Specifies the package used to decode videos. + + Args: + backend (string): Name of the video backend. one of {'pyav', 'video_reader'}. + The :mod:`pyav` package uses the 3rd party PyAv library. It is a Pythonic + binding for the FFmpeg libraries. + The :mod:`video_reader` package includes a native C++ implementation on + top of FFMPEG libraries, and a python API of TorchScript custom operator. + It is generally decoding faster than :mod:`pyav`, but perhaps is less robust. + """ + global _video_backend + if backend not in ["pyav", "video_reader"]: + raise ValueError( + "Invalid video backend '%s'. Options are 'pyav' and 'video_reader'" % backend + ) + if backend == "video_reader" and not io._HAS_VIDEO_OPT: + warnings.warn("video_reader video backend is not available") + else: + _video_backend = backend + + +def get_video_backend(): + return _video_backend + + +def _is_tracing(): + return torch._C._get_tracing_state() diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py new file mode 100644 index 0000000000..d147997b0b --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py @@ -0,0 +1,52 @@ +import importlib.machinery +import os + + +def _download_file_from_remote_location(fpath: str, url: str) -> None: + pass + + +def _is_remote_location_available() -> bool: + return False + + +try: + from torch.hub import load_state_dict_from_url # noqa: 401 +except ImportError: + from torch.utils.model_zoo import load_url as load_state_dict_from_url # noqa: 401 + + +def _get_extension_path(lib_name): + + lib_dir = os.path.dirname(__file__) + if os.name == "nt": + # Register the main torchvision library location on the default DLL path + import ctypes + import sys + + kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True) + with_load_library_flags = hasattr(kernel32, "AddDllDirectory") + prev_error_mode = kernel32.SetErrorMode(0x0001) + + if with_load_library_flags: + kernel32.AddDllDirectory.restype = ctypes.c_void_p + + if sys.version_info >= (3, 8): + os.add_dll_directory(lib_dir) + elif with_load_library_flags: + res = kernel32.AddDllDirectory(lib_dir) + if res is None: + err = ctypes.WinError(ctypes.get_last_error()) + err.strerror += f' Error adding "{lib_dir}" to the DLL directories.' + raise err + + kernel32.SetErrorMode(prev_error_mode) + + loader_details = (importlib.machinery.ExtensionFileLoader, importlib.machinery.EXTENSION_SUFFIXES) + + extfinder = importlib.machinery.FileFinder(lib_dir, loader_details) + ext_specs = extfinder.find_spec(lib_name) + if ext_specs is None: + raise ImportError + + return ext_specs.origin diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/__init__.py new file mode 100644 index 0000000000..db5b572a46 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/__init__.py @@ -0,0 +1,34 @@ +from .lsun import LSUN, LSUNClass +from .folder import ImageFolder, DatasetFolder +from .coco import CocoCaptions, CocoDetection +from .cifar import CIFAR10, CIFAR100 +from .stl10 import STL10 +from .mnist import MNIST, EMNIST, FashionMNIST, KMNIST, QMNIST +from .svhn import SVHN +from .phototour import PhotoTour +from .fakedata import FakeData +from .semeion import SEMEION +from .omniglot import Omniglot +from .sbu import SBU +from .flickr import Flickr8k, Flickr30k +from .voc import VOCSegmentation, VOCDetection +from .cityscapes import Cityscapes +from .imagenet import ImageNet +from .caltech import Caltech101, Caltech256 +from .celeba import CelebA +from .sbd import SBDataset +from .vision import VisionDataset +from .usps import USPS +from .kinetics import Kinetics400 +from .hmdb51 import HMDB51 +from .ucf101 import UCF101 + +__all__ = ('LSUN', 'LSUNClass', + 'ImageFolder', 'DatasetFolder', 'FakeData', + 'CocoCaptions', 'CocoDetection', + 'CIFAR10', 'CIFAR100', 'EMNIST', 'FashionMNIST', 'QMNIST', + 'MNIST', 'KMNIST', 'STL10', 'SVHN', 'PhotoTour', 'SEMEION', + 'Omniglot', 'SBU', 'Flickr8k', 'Flickr30k', + 'VOCSegmentation', 'VOCDetection', 'Cityscapes', 'ImageNet', + 'Caltech101', 'Caltech256', 'CelebA', 'SBDataset', 'VisionDataset', + 'USPS', 'Kinetics400', 'HMDB51', 'UCF101') diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py new file mode 100644 index 0000000000..09ec1c3d7f --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py @@ -0,0 +1,206 @@ +from PIL import Image +import os +import os.path + +from .vision import VisionDataset +from .utils import download_and_extract_archive, verify_str_arg + + +class Caltech101(VisionDataset): + """`Caltech 101 `_ Dataset. + + .. warning:: + + This class needs `scipy `_ to load target files from `.mat` format. + + Args: + root (string): Root directory of dataset where directory + ``caltech101`` exists or will be saved to if download is set to True. + target_type (string or list, optional): Type of target to use, ``category`` or + ``annotation``. Can also be a list to output a tuple with all specified target types. + ``category`` represents the target class, and ``annotation`` is a list of points + from a hand-generated outline. Defaults to ``category``. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + def __init__(self, root, target_type="category", transform=None, + target_transform=None, download=False): + super(Caltech101, self).__init__(os.path.join(root, 'caltech101'), + transform=transform, + target_transform=target_transform) + os.makedirs(self.root, exist_ok=True) + if not isinstance(target_type, list): + target_type = [target_type] + self.target_type = [verify_str_arg(t, "target_type", ("category", "annotation")) + for t in target_type] + + if download: + self.download() + + if not self._check_integrity(): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + self.categories = sorted(os.listdir(os.path.join(self.root, "101_ObjectCategories"))) + self.categories.remove("BACKGROUND_Google") # this is not a real class + + # For some reason, the category names in "101_ObjectCategories" and + # "Annotations" do not always match. This is a manual map between the + # two. Defaults to using same name, since most names are fine. + name_map = {"Faces": "Faces_2", + "Faces_easy": "Faces_3", + "Motorbikes": "Motorbikes_16", + "airplanes": "Airplanes_Side_2"} + self.annotation_categories = list(map(lambda x: name_map[x] if x in name_map else x, self.categories)) + + self.index = [] + self.y = [] + for (i, c) in enumerate(self.categories): + n = len(os.listdir(os.path.join(self.root, "101_ObjectCategories", c))) + self.index.extend(range(1, n + 1)) + self.y.extend(n * [i]) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where the type of target specified by target_type. + """ + import scipy.io + + img = Image.open(os.path.join(self.root, + "101_ObjectCategories", + self.categories[self.y[index]], + "image_{:04d}.jpg".format(self.index[index]))) + + target = [] + for t in self.target_type: + if t == "category": + target.append(self.y[index]) + elif t == "annotation": + data = scipy.io.loadmat(os.path.join(self.root, + "Annotations", + self.annotation_categories[self.y[index]], + "annotation_{:04d}.mat".format(self.index[index]))) + target.append(data["obj_contour"]) + target = tuple(target) if len(target) > 1 else target[0] + + if self.transform is not None: + img = self.transform(img) + + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def _check_integrity(self): + # can be more robust and check hash of files + return os.path.exists(os.path.join(self.root, "101_ObjectCategories")) + + def __len__(self): + return len(self.index) + + def download(self): + if self._check_integrity(): + print('Files already downloaded and verified') + return + + download_and_extract_archive( + "http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz", + self.root, + filename="101_ObjectCategories.tar.gz", + md5="b224c7392d521a49829488ab0f1120d9") + download_and_extract_archive( + "http://www.vision.caltech.edu/Image_Datasets/Caltech101/Annotations.tar", + self.root, + filename="101_Annotations.tar", + md5="6f83eeb1f24d99cab4eb377263132c91") + + def extra_repr(self): + return "Target type: {target_type}".format(**self.__dict__) + + +class Caltech256(VisionDataset): + """`Caltech 256 `_ Dataset. + + Args: + root (string): Root directory of dataset where directory + ``caltech256`` exists or will be saved to if download is set to True. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + def __init__(self, root, transform=None, target_transform=None, download=False): + super(Caltech256, self).__init__(os.path.join(root, 'caltech256'), + transform=transform, + target_transform=target_transform) + os.makedirs(self.root, exist_ok=True) + + if download: + self.download() + + if not self._check_integrity(): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + self.categories = sorted(os.listdir(os.path.join(self.root, "256_ObjectCategories"))) + self.index = [] + self.y = [] + for (i, c) in enumerate(self.categories): + n = len(os.listdir(os.path.join(self.root, "256_ObjectCategories", c))) + self.index.extend(range(1, n + 1)) + self.y.extend(n * [i]) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is index of the target class. + """ + img = Image.open(os.path.join(self.root, + "256_ObjectCategories", + self.categories[self.y[index]], + "{:03d}_{:04d}.jpg".format(self.y[index] + 1, self.index[index]))) + + target = self.y[index] + + if self.transform is not None: + img = self.transform(img) + + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def _check_integrity(self): + # can be more robust and check hash of files + return os.path.exists(os.path.join(self.root, "256_ObjectCategories")) + + def __len__(self): + return len(self.index) + + def download(self): + if self._check_integrity(): + print('Files already downloaded and verified') + return + + download_and_extract_archive( + "http://www.vision.caltech.edu/Image_Datasets/Caltech256/256_ObjectCategories.tar", + self.root, + filename="256_ObjectCategories.tar", + md5="67b4f42ca05d46448c6bb8ecd2220f6d") diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py new file mode 100644 index 0000000000..71af65ed11 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py @@ -0,0 +1,158 @@ +from functools import partial +import torch +import os +import PIL +from .vision import VisionDataset +from .utils import download_file_from_google_drive, check_integrity, verify_str_arg + + +class CelebA(VisionDataset): + """`Large-scale CelebFaces Attributes (CelebA) Dataset `_ Dataset. + + Args: + root (string): Root directory where images are downloaded to. + split (string): One of {'train', 'valid', 'test', 'all'}. + Accordingly dataset is selected. + target_type (string or list, optional): Type of target to use, ``attr``, ``identity``, ``bbox``, + or ``landmarks``. Can also be a list to output a tuple with all specified target types. + The targets represent: + ``attr`` (np.array shape=(40,) dtype=int): binary (0, 1) labels for attributes + ``identity`` (int): label for each person (data points with the same identity are the same person) + ``bbox`` (np.array shape=(4,) dtype=int): bounding box (x, y, width, height) + ``landmarks`` (np.array shape=(10,) dtype=int): landmark points (lefteye_x, lefteye_y, righteye_x, + righteye_y, nose_x, nose_y, leftmouth_x, leftmouth_y, rightmouth_x, rightmouth_y) + Defaults to ``attr``. If empty, ``None`` will be returned as target. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.ToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + base_folder = "celeba" + # There currently does not appear to be a easy way to extract 7z in python (without introducing additional + # dependencies). The "in-the-wild" (not aligned+cropped) images are only in 7z, so they are not available + # right now. + file_list = [ + # File ID MD5 Hash Filename + ("0B7EVK8r0v71pZjFTYXZWM3FlRnM", "00d2c5bc6d35e252742224ab0c1e8fcb", "img_align_celeba.zip"), + # ("0B7EVK8r0v71pbWNEUjJKdDQ3dGc", "b6cd7e93bc7a96c2dc33f819aa3ac651", "img_align_celeba_png.7z"), + # ("0B7EVK8r0v71peklHb0pGdDl6R28", "b6cd7e93bc7a96c2dc33f819aa3ac651", "img_celeba.7z"), + ("0B7EVK8r0v71pblRyaVFSWGxPY0U", "75e246fa4810816ffd6ee81facbd244c", "list_attr_celeba.txt"), + ("1_ee_0u7vcNLOfNLegJRHmolfH5ICW-XS", "32bd1bd63d3c78cd57e08160ec5ed1e2", "identity_CelebA.txt"), + ("0B7EVK8r0v71pbThiMVRxWXZ4dU0", "00566efa6fedff7a56946cd1c10f1c16", "list_bbox_celeba.txt"), + ("0B7EVK8r0v71pd0FJY3Blby1HUTQ", "cc24ecafdb5b50baae59b03474781f8c", "list_landmarks_align_celeba.txt"), + # ("0B7EVK8r0v71pTzJIdlJWdHczRlU", "063ee6ddb681f96bc9ca28c6febb9d1a", "list_landmarks_celeba.txt"), + ("0B7EVK8r0v71pY0NSMzRuSXJEVkk", "d32c9cbf5e040fd4025c592c306e6668", "list_eval_partition.txt"), + ] + + def __init__(self, root, split="train", target_type="attr", transform=None, + target_transform=None, download=False): + import pandas + super(CelebA, self).__init__(root, transform=transform, + target_transform=target_transform) + self.split = split + if isinstance(target_type, list): + self.target_type = target_type + else: + self.target_type = [target_type] + + if not self.target_type and self.target_transform is not None: + raise RuntimeError('target_transform is specified but target_type is empty') + + if download: + self.download() + + if not self._check_integrity(): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + split_map = { + "train": 0, + "valid": 1, + "test": 2, + "all": None, + } + split = split_map[verify_str_arg(split.lower(), "split", + ("train", "valid", "test", "all"))] + + fn = partial(os.path.join, self.root, self.base_folder) + splits = pandas.read_csv(fn("list_eval_partition.txt"), delim_whitespace=True, header=None, index_col=0) + identity = pandas.read_csv(fn("identity_CelebA.txt"), delim_whitespace=True, header=None, index_col=0) + bbox = pandas.read_csv(fn("list_bbox_celeba.txt"), delim_whitespace=True, header=1, index_col=0) + landmarks_align = pandas.read_csv(fn("list_landmarks_align_celeba.txt"), delim_whitespace=True, header=1) + attr = pandas.read_csv(fn("list_attr_celeba.txt"), delim_whitespace=True, header=1) + + mask = slice(None) if split is None else (splits[1] == split) + + self.filename = splits[mask].index.values + self.identity = torch.as_tensor(identity[mask].values) + self.bbox = torch.as_tensor(bbox[mask].values) + self.landmarks_align = torch.as_tensor(landmarks_align[mask].values) + self.attr = torch.as_tensor(attr[mask].values) + self.attr = (self.attr + 1) // 2 # map from {-1, 1} to {0, 1} + self.attr_names = list(attr.columns) + + def _check_integrity(self): + for (_, md5, filename) in self.file_list: + fpath = os.path.join(self.root, self.base_folder, filename) + _, ext = os.path.splitext(filename) + # Allow original archive to be deleted (zip and 7z) + # Only need the extracted images + if ext not in [".zip", ".7z"] and not check_integrity(fpath, md5): + return False + + # Should check a hash of the images + return os.path.isdir(os.path.join(self.root, self.base_folder, "img_align_celeba")) + + def download(self): + import zipfile + + if self._check_integrity(): + print('Files already downloaded and verified') + return + + for (file_id, md5, filename) in self.file_list: + download_file_from_google_drive(file_id, os.path.join(self.root, self.base_folder), filename, md5) + + with zipfile.ZipFile(os.path.join(self.root, self.base_folder, "img_align_celeba.zip"), "r") as f: + f.extractall(os.path.join(self.root, self.base_folder)) + + def __getitem__(self, index): + X = PIL.Image.open(os.path.join(self.root, self.base_folder, "img_align_celeba", self.filename[index])) + + target = [] + for t in self.target_type: + if t == "attr": + target.append(self.attr[index, :]) + elif t == "identity": + target.append(self.identity[index, 0]) + elif t == "bbox": + target.append(self.bbox[index, :]) + elif t == "landmarks": + target.append(self.landmarks_align[index, :]) + else: + # TODO: refactor with utils.verify_str_arg + raise ValueError("Target type \"{}\" is not recognized.".format(t)) + + if self.transform is not None: + X = self.transform(X) + + if target: + target = tuple(target) if len(target) > 1 else target[0] + + if self.target_transform is not None: + target = self.target_transform(target) + else: + target = None + + return X, target + + def __len__(self): + return len(self.attr) + + def extra_repr(self): + lines = ["Target type: {target_type}", "Split: {split}"] + return '\n'.join(lines).format(**self.__dict__) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py new file mode 100644 index 0000000000..127c085cfb --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py @@ -0,0 +1,162 @@ +from PIL import Image +import os +import os.path +import numpy as np +import pickle + +from .vision import VisionDataset +from .utils import check_integrity, download_and_extract_archive + + +class CIFAR10(VisionDataset): + """`CIFAR10 `_ Dataset. + + Args: + root (string): Root directory of dataset where directory + ``cifar-10-batches-py`` exists or will be saved to if download is set to True. + train (bool, optional): If True, creates dataset from training set, otherwise + creates from test set. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + + """ + base_folder = 'cifar-10-batches-py' + url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" + filename = "cifar-10-python.tar.gz" + tgz_md5 = 'c58f30108f718f92721af3b95e74349a' + train_list = [ + ['data_batch_1', 'c99cafc152244af753f735de768cd75f'], + ['data_batch_2', 'd4bba439e000b95fd0a9bffe97cbabec'], + ['data_batch_3', '54ebc095f3ab1f0389bbae665268c751'], + ['data_batch_4', '634d18415352ddfa80567beed471001a'], + ['data_batch_5', '482c414d41f54cd18b22e5b47cb7c3cb'], + ] + + test_list = [ + ['test_batch', '40351d587109b95175f43aff81a1287e'], + ] + meta = { + 'filename': 'batches.meta', + 'key': 'label_names', + 'md5': '5ff9c542aee3614f3951f8cda6e48888', + } + + def __init__(self, root, train=True, transform=None, target_transform=None, + download=False): + + super(CIFAR10, self).__init__(root, transform=transform, + target_transform=target_transform) + + self.train = train # training set or test set + + if download: + self.download() + + if not self._check_integrity(): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + if self.train: + downloaded_list = self.train_list + else: + downloaded_list = self.test_list + + self.data = [] + self.targets = [] + + # now load the picked numpy arrays + for file_name, checksum in downloaded_list: + file_path = os.path.join(self.root, self.base_folder, file_name) + with open(file_path, 'rb') as f: + entry = pickle.load(f, encoding='latin1') + self.data.append(entry['data']) + if 'labels' in entry: + self.targets.extend(entry['labels']) + else: + self.targets.extend(entry['fine_labels']) + + self.data = np.vstack(self.data).reshape(-1, 3, 32, 32) + self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC + + self._load_meta() + + def _load_meta(self): + path = os.path.join(self.root, self.base_folder, self.meta['filename']) + if not check_integrity(path, self.meta['md5']): + raise RuntimeError('Dataset metadata file not found or corrupted.' + + ' You can use download=True to download it') + with open(path, 'rb') as infile: + data = pickle.load(infile, encoding='latin1') + self.classes = data[self.meta['key']] + self.class_to_idx = {_class: i for i, _class in enumerate(self.classes)} + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is index of the target class. + """ + img, target = self.data[index], self.targets[index] + + # doing this so that it is consistent with all other datasets + # to return a PIL Image + img = Image.fromarray(img) + + if self.transform is not None: + img = self.transform(img) + + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def __len__(self): + return len(self.data) + + def _check_integrity(self): + root = self.root + for fentry in (self.train_list + self.test_list): + filename, md5 = fentry[0], fentry[1] + fpath = os.path.join(root, self.base_folder, filename) + if not check_integrity(fpath, md5): + return False + return True + + def download(self): + if self._check_integrity(): + print('Files already downloaded and verified') + return + download_and_extract_archive(self.url, self.root, filename=self.filename, md5=self.tgz_md5) + + def extra_repr(self): + return "Split: {}".format("Train" if self.train is True else "Test") + + +class CIFAR100(CIFAR10): + """`CIFAR100 `_ Dataset. + + This is a subclass of the `CIFAR10` Dataset. + """ + base_folder = 'cifar-100-python' + url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" + filename = "cifar-100-python.tar.gz" + tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85' + train_list = [ + ['train', '16019d7e3df5f24257cddd939b257f8d'], + ] + + test_list = [ + ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'], + ] + meta = { + 'filename': 'meta', + 'key': 'fine_label_names', + 'md5': '7973b15100ade9c7d40fb424638fde48', + } diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cityscapes.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cityscapes.py new file mode 100644 index 0000000000..6e92361f50 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cityscapes.py @@ -0,0 +1,207 @@ +import json +import os +from collections import namedtuple +import zipfile + +from .utils import extract_archive, verify_str_arg, iterable_to_str +from .vision import VisionDataset +from PIL import Image + + +class Cityscapes(VisionDataset): + """`Cityscapes `_ Dataset. + + Args: + root (string): Root directory of dataset where directory ``leftImg8bit`` + and ``gtFine`` or ``gtCoarse`` are located. + split (string, optional): The image split to use, ``train``, ``test`` or ``val`` if mode="fine" + otherwise ``train``, ``train_extra`` or ``val`` + mode (string, optional): The quality mode to use, ``fine`` or ``coarse`` + target_type (string or list, optional): Type of target to use, ``instance``, ``semantic``, ``polygon`` + or ``color``. Can also be a list to output a tuple with all specified target types. + transform (callable, optional): A function/transform that takes in a PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Examples: + + Get semantic segmentation target + + .. code-block:: python + + dataset = Cityscapes('./data/cityscapes', split='train', mode='fine', + target_type='semantic') + + img, smnt = dataset[0] + + Get multiple targets + + .. code-block:: python + + dataset = Cityscapes('./data/cityscapes', split='train', mode='fine', + target_type=['instance', 'color', 'polygon']) + + img, (inst, col, poly) = dataset[0] + + Validate on the "coarse" set + + .. code-block:: python + + dataset = Cityscapes('./data/cityscapes', split='val', mode='coarse', + target_type='semantic') + + img, smnt = dataset[0] + """ + + # Based on https://github.com/mcordts/cityscapesScripts + CityscapesClass = namedtuple('CityscapesClass', ['name', 'id', 'train_id', 'category', 'category_id', + 'has_instances', 'ignore_in_eval', 'color']) + + classes = [ + CityscapesClass('unlabeled', 0, 255, 'void', 0, False, True, (0, 0, 0)), + CityscapesClass('ego vehicle', 1, 255, 'void', 0, False, True, (0, 0, 0)), + CityscapesClass('rectification border', 2, 255, 'void', 0, False, True, (0, 0, 0)), + CityscapesClass('out of roi', 3, 255, 'void', 0, False, True, (0, 0, 0)), + CityscapesClass('static', 4, 255, 'void', 0, False, True, (0, 0, 0)), + CityscapesClass('dynamic', 5, 255, 'void', 0, False, True, (111, 74, 0)), + CityscapesClass('ground', 6, 255, 'void', 0, False, True, (81, 0, 81)), + CityscapesClass('road', 7, 0, 'flat', 1, False, False, (128, 64, 128)), + CityscapesClass('sidewalk', 8, 1, 'flat', 1, False, False, (244, 35, 232)), + CityscapesClass('parking', 9, 255, 'flat', 1, False, True, (250, 170, 160)), + CityscapesClass('rail track', 10, 255, 'flat', 1, False, True, (230, 150, 140)), + CityscapesClass('building', 11, 2, 'construction', 2, False, False, (70, 70, 70)), + CityscapesClass('wall', 12, 3, 'construction', 2, False, False, (102, 102, 156)), + CityscapesClass('fence', 13, 4, 'construction', 2, False, False, (190, 153, 153)), + CityscapesClass('guard rail', 14, 255, 'construction', 2, False, True, (180, 165, 180)), + CityscapesClass('bridge', 15, 255, 'construction', 2, False, True, (150, 100, 100)), + CityscapesClass('tunnel', 16, 255, 'construction', 2, False, True, (150, 120, 90)), + CityscapesClass('pole', 17, 5, 'object', 3, False, False, (153, 153, 153)), + CityscapesClass('polegroup', 18, 255, 'object', 3, False, True, (153, 153, 153)), + CityscapesClass('traffic light', 19, 6, 'object', 3, False, False, (250, 170, 30)), + CityscapesClass('traffic sign', 20, 7, 'object', 3, False, False, (220, 220, 0)), + CityscapesClass('vegetation', 21, 8, 'nature', 4, False, False, (107, 142, 35)), + CityscapesClass('terrain', 22, 9, 'nature', 4, False, False, (152, 251, 152)), + CityscapesClass('sky', 23, 10, 'sky', 5, False, False, (70, 130, 180)), + CityscapesClass('person', 24, 11, 'human', 6, True, False, (220, 20, 60)), + CityscapesClass('rider', 25, 12, 'human', 6, True, False, (255, 0, 0)), + CityscapesClass('car', 26, 13, 'vehicle', 7, True, False, (0, 0, 142)), + CityscapesClass('truck', 27, 14, 'vehicle', 7, True, False, (0, 0, 70)), + CityscapesClass('bus', 28, 15, 'vehicle', 7, True, False, (0, 60, 100)), + CityscapesClass('caravan', 29, 255, 'vehicle', 7, True, True, (0, 0, 90)), + CityscapesClass('trailer', 30, 255, 'vehicle', 7, True, True, (0, 0, 110)), + CityscapesClass('train', 31, 16, 'vehicle', 7, True, False, (0, 80, 100)), + CityscapesClass('motorcycle', 32, 17, 'vehicle', 7, True, False, (0, 0, 230)), + CityscapesClass('bicycle', 33, 18, 'vehicle', 7, True, False, (119, 11, 32)), + CityscapesClass('license plate', -1, -1, 'vehicle', 7, False, True, (0, 0, 142)), + ] + + def __init__(self, root, split='train', mode='fine', target_type='instance', + transform=None, target_transform=None, transforms=None): + super(Cityscapes, self).__init__(root, transforms, transform, target_transform) + self.mode = 'gtFine' if mode == 'fine' else 'gtCoarse' + self.images_dir = os.path.join(self.root, 'leftImg8bit', split) + self.targets_dir = os.path.join(self.root, self.mode, split) + self.target_type = target_type + self.split = split + self.images = [] + self.targets = [] + + verify_str_arg(mode, "mode", ("fine", "coarse")) + if mode == "fine": + valid_modes = ("train", "test", "val") + else: + valid_modes = ("train", "train_extra", "val") + msg = ("Unknown value '{}' for argument split if mode is '{}'. " + "Valid values are {{{}}}.") + msg = msg.format(split, mode, iterable_to_str(valid_modes)) + verify_str_arg(split, "split", valid_modes, msg) + + if not isinstance(target_type, list): + self.target_type = [target_type] + [verify_str_arg(value, "target_type", + ("instance", "semantic", "polygon", "color")) + for value in self.target_type] + + if not os.path.isdir(self.images_dir) or not os.path.isdir(self.targets_dir): + + if split == 'train_extra': + image_dir_zip = os.path.join(self.root, 'leftImg8bit{}'.format('_trainextra.zip')) + else: + image_dir_zip = os.path.join(self.root, 'leftImg8bit{}'.format('_trainvaltest.zip')) + + if self.mode == 'gtFine': + target_dir_zip = os.path.join(self.root, '{}{}'.format(self.mode, '_trainvaltest.zip')) + elif self.mode == 'gtCoarse': + target_dir_zip = os.path.join(self.root, '{}{}'.format(self.mode, '.zip')) + + if os.path.isfile(image_dir_zip) and os.path.isfile(target_dir_zip): + extract_archive(from_path=image_dir_zip, to_path=self.root) + extract_archive(from_path=target_dir_zip, to_path=self.root) + else: + raise RuntimeError('Dataset not found or incomplete. Please make sure all required folders for the' + ' specified "split" and "mode" are inside the "root" directory') + + for city in os.listdir(self.images_dir): + img_dir = os.path.join(self.images_dir, city) + target_dir = os.path.join(self.targets_dir, city) + for file_name in os.listdir(img_dir): + target_types = [] + for t in self.target_type: + target_name = '{}_{}'.format(file_name.split('_leftImg8bit')[0], + self._get_target_suffix(self.mode, t)) + target_types.append(os.path.join(target_dir, target_name)) + + self.images.append(os.path.join(img_dir, file_name)) + self.targets.append(target_types) + + def __getitem__(self, index): + """ + Args: + index (int): Index + Returns: + tuple: (image, target) where target is a tuple of all target types if target_type is a list with more + than one item. Otherwise target is a json object if target_type="polygon", else the image segmentation. + """ + + image = Image.open(self.images[index]).convert('RGB') + + targets = [] + for i, t in enumerate(self.target_type): + if t == 'polygon': + target = self._load_json(self.targets[index][i]) + else: + target = Image.open(self.targets[index][i]) + + targets.append(target) + + target = tuple(targets) if len(targets) > 1 else targets[0] + + if self.transforms is not None: + image, target = self.transforms(image, target) + + return image, target + + def __len__(self): + return len(self.images) + + def extra_repr(self): + lines = ["Split: {split}", "Mode: {mode}", "Type: {target_type}"] + return '\n'.join(lines).format(**self.__dict__) + + def _load_json(self, path): + with open(path, 'r') as file: + data = json.load(file) + return data + + def _get_target_suffix(self, mode, target_type): + if target_type == 'instance': + return '{}_instanceIds.png'.format(mode) + elif target_type == 'semantic': + return '{}_labelIds.png'.format(mode) + elif target_type == 'color': + return '{}_color.png'.format(mode) + else: + return '{}_polygons.json'.format(mode) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py new file mode 100644 index 0000000000..9dd3c7adf8 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py @@ -0,0 +1,123 @@ +from .vision import VisionDataset +from PIL import Image +import os +import os.path + + +class CocoCaptions(VisionDataset): + """`MS Coco Captions `_ Dataset. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.ToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + + Example: + + .. code:: python + + import torchvision.datasets as dset + import torchvision.transforms as transforms + cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', + transform=transforms.ToTensor()) + + print('Number of samples: ', len(cap)) + img, target = cap[3] # load 4th sample + + print("Image Size: ", img.size()) + print(target) + + Output: :: + + Number of samples: 82783 + Image Size: (3L, 427L, 640L) + [u'A plane emitting smoke stream flying over a mountain.', + u'A plane darts across a bright blue sky behind a mountain covered in snow', + u'A plane leaves a contrail above the snowy mountain top.', + u'A mountain that has a plane flying overheard in the distance.', + u'A mountain view with a plume of smoke in the background'] + + """ + + def __init__(self, root, annFile, transform=None, target_transform=None, transforms=None): + super(CocoCaptions, self).__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: Tuple (image, target). target is a list of captions for the image. + """ + coco = self.coco + img_id = self.ids[index] + ann_ids = coco.getAnnIds(imgIds=img_id) + anns = coco.loadAnns(ann_ids) + target = [ann['caption'] for ann in anns] + + path = coco.loadImgs(img_id)[0]['file_name'] + + img = Image.open(os.path.join(self.root, path)).convert('RGB') + + if self.transforms is not None: + img, target = self.transforms(img, target) + + return img, target + + def __len__(self): + return len(self.ids) + + +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + + Args: + root (string): Root directory where images are downloaded to. + annFile (string): Path to json annotation file. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.ToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__(self, root, annFile, transform=None, target_transform=None, transforms=None): + super(CocoDetection, self).__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``. + """ + coco = self.coco + img_id = self.ids[index] + ann_ids = coco.getAnnIds(imgIds=img_id) + target = coco.loadAnns(ann_ids) + + path = coco.loadImgs(img_id)[0]['file_name'] + + img = Image.open(os.path.join(self.root, path)).convert('RGB') + if self.transforms is not None: + img, target = self.transforms(img, target) + + return img, target + + def __len__(self): + return len(self.ids) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py new file mode 100644 index 0000000000..f079c1a92d --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py @@ -0,0 +1,58 @@ +import torch +from .vision import VisionDataset +from .. import transforms + + +class FakeData(VisionDataset): + """A fake dataset that returns randomly generated images and returns them as PIL images + + Args: + size (int, optional): Size of the dataset. Default: 1000 images + image_size(tuple, optional): Size if the returned images. Default: (3, 224, 224) + num_classes(int, optional): Number of classes in the datset. Default: 10 + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + random_offset (int): Offsets the index-based random seed used to + generate each image. Default: 0 + + """ + + def __init__(self, size=1000, image_size=(3, 224, 224), num_classes=10, + transform=None, target_transform=None, random_offset=0): + super(FakeData, self).__init__(None, transform=transform, + target_transform=target_transform) + self.size = size + self.num_classes = num_classes + self.image_size = image_size + self.random_offset = random_offset + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is class_index of the target class. + """ + # create random image that is consistent with the index id + if index >= len(self): + raise IndexError("{} index out of range".format(self.__class__.__name__)) + rng_state = torch.get_rng_state() + torch.manual_seed(index + self.random_offset) + img = torch.randn(*self.image_size) + target = torch.randint(0, self.num_classes, size=(1,), dtype=torch.long)[0] + torch.set_rng_state(rng_state) + + # convert to PIL Image + img = transforms.ToPILImage()(img) + if self.transform is not None: + img = self.transform(img) + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def __len__(self): + return self.size diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py new file mode 100644 index 0000000000..77cd430705 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py @@ -0,0 +1,154 @@ +from collections import defaultdict +from PIL import Image +from html.parser import HTMLParser + +import glob +import os +from .vision import VisionDataset + + +class Flickr8kParser(HTMLParser): + """Parser for extracting captions from the Flickr8k dataset web page.""" + + def __init__(self, root): + super(Flickr8kParser, self).__init__() + + self.root = root + + # Data structure to store captions + self.annotations = {} + + # State variables + self.in_table = False + self.current_tag = None + self.current_img = None + + def handle_starttag(self, tag, attrs): + self.current_tag = tag + + if tag == 'table': + self.in_table = True + + def handle_endtag(self, tag): + self.current_tag = None + + if tag == 'table': + self.in_table = False + + def handle_data(self, data): + if self.in_table: + if data == 'Image Not Found': + self.current_img = None + elif self.current_tag == 'a': + img_id = data.split('/')[-2] + img_id = os.path.join(self.root, img_id + '_*.jpg') + img_id = glob.glob(img_id)[0] + self.current_img = img_id + self.annotations[img_id] = [] + elif self.current_tag == 'li' and self.current_img: + img_id = self.current_img + self.annotations[img_id].append(data.strip()) + + +class Flickr8k(VisionDataset): + """`Flickr8k Entities `_ Dataset. + + Args: + root (string): Root directory where images are downloaded to. + ann_file (string): Path to annotation file. + transform (callable, optional): A function/transform that takes in a PIL image + and returns a transformed version. E.g, ``transforms.ToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + """ + + def __init__(self, root, ann_file, transform=None, target_transform=None): + super(Flickr8k, self).__init__(root, transform=transform, + target_transform=target_transform) + self.ann_file = os.path.expanduser(ann_file) + + # Read annotations and store in a dict + parser = Flickr8kParser(self.root) + with open(self.ann_file) as fh: + parser.feed(fh.read()) + self.annotations = parser.annotations + + self.ids = list(sorted(self.annotations.keys())) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: Tuple (image, target). target is a list of captions for the image. + """ + img_id = self.ids[index] + + # Image + img = Image.open(img_id).convert('RGB') + if self.transform is not None: + img = self.transform(img) + + # Captions + target = self.annotations[img_id] + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def __len__(self): + return len(self.ids) + + +class Flickr30k(VisionDataset): + """`Flickr30k Entities `_ Dataset. + + Args: + root (string): Root directory where images are downloaded to. + ann_file (string): Path to annotation file. + transform (callable, optional): A function/transform that takes in a PIL image + and returns a transformed version. E.g, ``transforms.ToTensor`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + """ + + def __init__(self, root, ann_file, transform=None, target_transform=None): + super(Flickr30k, self).__init__(root, transform=transform, + target_transform=target_transform) + self.ann_file = os.path.expanduser(ann_file) + + # Read annotations and store in a dict + self.annotations = defaultdict(list) + with open(self.ann_file) as fh: + for line in fh: + img_id, caption = line.strip().split('\t') + self.annotations[img_id[:-2]].append(caption) + + self.ids = list(sorted(self.annotations.keys())) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: Tuple (image, target). target is a list of captions for the image. + """ + img_id = self.ids[index] + + # Image + filename = os.path.join(self.root, img_id) + img = Image.open(filename).convert('RGB') + if self.transform is not None: + img = self.transform(img) + + # Captions + target = self.annotations[img_id] + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def __len__(self): + return len(self.ids) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py new file mode 100644 index 0000000000..16d092b716 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py @@ -0,0 +1,207 @@ +from .vision import VisionDataset + +from PIL import Image + +import os +import os.path + + +def has_file_allowed_extension(filename, extensions): + """Checks if a file is an allowed extension. + + Args: + filename (string): path to a file + extensions (tuple of strings): extensions to consider (lowercase) + + Returns: + bool: True if the filename ends with one of given extensions + """ + return filename.lower().endswith(extensions) + + +def is_image_file(filename): + """Checks if a file is an allowed image extension. + + Args: + filename (string): path to a file + + Returns: + bool: True if the filename ends with a known image extension + """ + return has_file_allowed_extension(filename, IMG_EXTENSIONS) + + +def make_dataset(directory, class_to_idx, extensions=None, is_valid_file=None): + instances = [] + directory = os.path.expanduser(directory) + both_none = extensions is None and is_valid_file is None + both_something = extensions is not None and is_valid_file is not None + if both_none or both_something: + raise ValueError("Both extensions and is_valid_file cannot be None or not None at the same time") + if extensions is not None: + def is_valid_file(x): + return has_file_allowed_extension(x, extensions) + for target_class in sorted(class_to_idx.keys()): + class_index = class_to_idx[target_class] + target_dir = os.path.join(directory, target_class) + if not os.path.isdir(target_dir): + continue + for root, _, fnames in sorted(os.walk(target_dir, followlinks=True)): + for fname in sorted(fnames): + path = os.path.join(root, fname) + if is_valid_file(path): + item = path, class_index + instances.append(item) + return instances + + +class DatasetFolder(VisionDataset): + """A generic data loader where the samples are arranged in this way: :: + + root/class_x/xxx.ext + root/class_x/xxy.ext + root/class_x/xxz.ext + + root/class_y/123.ext + root/class_y/nsdf3.ext + root/class_y/asd932_.ext + + Args: + root (string): Root directory path. + loader (callable): A function to load a sample given its path. + extensions (tuple[string]): A list of allowed extensions. + both extensions and is_valid_file should not be passed. + transform (callable, optional): A function/transform that takes in + a sample and returns a transformed version. + E.g, ``transforms.RandomCrop`` for images. + target_transform (callable, optional): A function/transform that takes + in the target and transforms it. + is_valid_file (callable, optional): A function that takes path of a file + and check if the file is a valid file (used to check of corrupt files) + both extensions and is_valid_file should not be passed. + + Attributes: + classes (list): List of the class names. + class_to_idx (dict): Dict with items (class_name, class_index). + samples (list): List of (sample path, class_index) tuples + targets (list): The class_index value for each image in the dataset + """ + + def __init__(self, root, loader, extensions=None, transform=None, + target_transform=None, is_valid_file=None): + super(DatasetFolder, self).__init__(root, transform=transform, + target_transform=target_transform) + classes, class_to_idx = self._find_classes(self.root) + samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file) + if len(samples) == 0: + raise (RuntimeError("Found 0 files in subfolders of: " + self.root + "\n" + "Supported extensions are: " + ",".join(extensions))) + + self.loader = loader + self.extensions = extensions + + self.classes = classes + self.class_to_idx = class_to_idx + self.samples = samples + self.targets = [s[1] for s in samples] + + def _find_classes(self, dir): + """ + Finds the class folders in a dataset. + + Args: + dir (string): Root directory path. + + Returns: + tuple: (classes, class_to_idx) where classes are relative to (dir), and class_to_idx is a dictionary. + + Ensures: + No class is a subdirectory of another. + """ + classes = [d.name for d in os.scandir(dir) if d.is_dir()] + classes.sort() + class_to_idx = {classes[i]: i for i in range(len(classes))} + return classes, class_to_idx + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (sample, target) where target is class_index of the target class. + """ + path, target = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + if self.target_transform is not None: + target = self.target_transform(target) + + return sample, target + + def __len__(self): + return len(self.samples) + + +IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp') + + +def pil_loader(path): + # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) + with open(path, 'rb') as f: + img = Image.open(f) + return img.convert('RGB') + + +def accimage_loader(path): + import accimage + try: + return accimage.Image(path) + except IOError: + # Potentially a decoding problem, fall back to PIL.Image + return pil_loader(path) + + +def default_loader(path): + from torchvision import get_image_backend + if get_image_backend() == 'accimage': + return accimage_loader(path) + else: + return pil_loader(path) + + +class ImageFolder(DatasetFolder): + """A generic data loader where the images are arranged in this way: :: + + root/dog/xxx.png + root/dog/xxy.png + root/dog/xxz.png + + root/cat/123.png + root/cat/nsdf3.png + root/cat/asd932_.png + + Args: + root (string): Root directory path. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + loader (callable, optional): A function to load an image given its path. + is_valid_file (callable, optional): A function that takes path of an Image file + and check if the file is a valid file (used to check of corrupt files) + + Attributes: + classes (list): List of the class names. + class_to_idx (dict): Dict with items (class_name, class_index). + imgs (list): List of (image path, class_index) tuples + """ + + def __init__(self, root, transform=None, target_transform=None, + loader=default_loader, is_valid_file=None): + super(ImageFolder, self).__init__(root, loader, IMG_EXTENSIONS if is_valid_file is None else None, + transform=transform, + target_transform=target_transform, + is_valid_file=is_valid_file) + self.imgs = self.samples diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py new file mode 100644 index 0000000000..3b826bfa9a --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py @@ -0,0 +1,130 @@ +import glob +import os + +from .utils import list_dir +from .folder import make_dataset +from .video_utils import VideoClips +from .vision import VisionDataset + + +class HMDB51(VisionDataset): + """ + `HMDB51 `_ + dataset. + + HMDB51 is an action recognition video dataset. + This dataset consider every video as a collection of video clips of fixed size, specified + by ``frames_per_clip``, where the step in frames between each clip is given by + ``step_between_clips``. + + To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5`` + and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two + elements will come from video 1, and the next three elements from video 2. + Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all + frames in a video might be present. + + Internally, it uses a VideoClips object to handle clip creation. + + Args: + root (string): Root directory of the HMDB51 Dataset. + annotation_path (str): Path to the folder containing the split files. + frames_per_clip (int): Number of frames in a clip. + step_between_clips (int): Number of frames between each clip. + fold (int, optional): Which fold to use. Should be between 1 and 3. + train (bool, optional): If ``True``, creates a dataset from the train split, + otherwise from the ``test`` split. + transform (callable, optional): A function/transform that takes in a TxHxWxC video + and returns a transformed version. + + Returns: + video (Tensor[T, H, W, C]): the `T` video frames + audio(Tensor[K, L]): the audio frames, where `K` is the number of channels + and `L` is the number of points + label (int): class of the video clip + """ + + data_url = "http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar" + splits = { + "url": "http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar", + "md5": "15e67781e70dcfbdce2d7dbb9b3344b5" + } + TRAIN_TAG = 1 + TEST_TAG = 2 + + def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1, + frame_rate=None, fold=1, train=True, transform=None, + _precomputed_metadata=None, num_workers=1, _video_width=0, + _video_height=0, _video_min_dimension=0, _audio_samples=0): + super(HMDB51, self).__init__(root) + if fold not in (1, 2, 3): + raise ValueError("fold should be between 1 and 3, got {}".format(fold)) + + extensions = ('avi',) + classes = sorted(list_dir(root)) + class_to_idx = {class_: i for (i, class_) in enumerate(classes)} + self.samples = make_dataset( + self.root, + class_to_idx, + extensions, + ) + + video_paths = [path for (path, _) in self.samples] + video_clips = VideoClips( + video_paths, + frames_per_clip, + step_between_clips, + frame_rate, + _precomputed_metadata, + num_workers=num_workers, + _video_width=_video_width, + _video_height=_video_height, + _video_min_dimension=_video_min_dimension, + _audio_samples=_audio_samples, + ) + self.fold = fold + self.train = train + self.classes = classes + self.video_clips_metadata = video_clips.metadata + self.indices = self._select_fold(video_paths, annotation_path, fold, train) + self.video_clips = video_clips.subset(self.indices) + self.transform = transform + + @property + def metadata(self): + return self.video_clips_metadata + + def _select_fold(self, video_list, annotations_dir, fold, train): + target_tag = self.TRAIN_TAG if train else self.TEST_TAG + split_pattern_name = "*test_split{}.txt".format(fold) + split_pattern_path = os.path.join(annotations_dir, split_pattern_name) + annotation_paths = glob.glob(split_pattern_path) + selected_files = [] + for filepath in annotation_paths: + with open(filepath) as fid: + lines = fid.readlines() + for line in lines: + video_filename, tag_string = line.split() + tag = int(tag_string) + if tag == target_tag: + selected_files.append(video_filename) + selected_files = set(selected_files) + + indices = [] + for video_index, video_path in enumerate(video_list): + if os.path.basename(video_path) in selected_files: + indices.append(video_index) + + return indices + + def __len__(self): + return self.video_clips.num_clips() + + def __getitem__(self, idx): + video, audio, _, video_idx = self.video_clips.get_clip(idx) + sample_index = self.indices[video_idx] + _, class_index = self.samples[sample_index] + + if self.transform is not None: + video = self.transform(video) + + return video, audio, class_index diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py new file mode 100644 index 0000000000..a45ff3cd44 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py @@ -0,0 +1,218 @@ +import warnings +from contextlib import contextmanager +import os +import shutil +import tempfile +import torch +from .folder import ImageFolder +from .utils import check_integrity, extract_archive, verify_str_arg + +ARCHIVE_META = { + 'train': ('ILSVRC2012_img_train.tar', '1d675b47d978889d74fa0da5fadfb00e'), + 'val': ('ILSVRC2012_img_val.tar', '29b22e2961454d5413ddabcf34fc5622'), + 'devkit': ('ILSVRC2012_devkit_t12.tar.gz', 'fa75699e90414af021442c21a62c3abf') +} + +META_FILE = "meta.bin" + + +class ImageNet(ImageFolder): + """`ImageNet `_ 2012 Classification Dataset. + + Args: + root (string): Root directory of the ImageNet Dataset. + split (string, optional): The dataset split, supports ``train``, or ``val``. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + loader (callable, optional): A function to load an image given its path. + + Attributes: + classes (list): List of the class name tuples. + class_to_idx (dict): Dict with items (class_name, class_index). + wnids (list): List of the WordNet IDs. + wnid_to_idx (dict): Dict with items (wordnet_id, class_index). + imgs (list): List of (image path, class_index) tuples + targets (list): The class_index value for each image in the dataset + """ + + def __init__(self, root, split='train', download=None, **kwargs): + if download is True: + msg = ("The dataset is no longer publicly accessible. You need to " + "download the archives externally and place them in the root " + "directory.") + raise RuntimeError(msg) + elif download is False: + msg = ("The use of the download flag is deprecated, since the dataset " + "is no longer publicly accessible.") + warnings.warn(msg, RuntimeWarning) + + root = self.root = os.path.expanduser(root) + self.split = verify_str_arg(split, "split", ("train", "val")) + + self.parse_archives() + wnid_to_classes = load_meta_file(self.root)[0] + + super(ImageNet, self).__init__(self.split_folder, **kwargs) + self.root = root + + self.wnids = self.classes + self.wnid_to_idx = self.class_to_idx + self.classes = [wnid_to_classes[wnid] for wnid in self.wnids] + self.class_to_idx = {cls: idx + for idx, clss in enumerate(self.classes) + for cls in clss} + + def parse_archives(self): + if not check_integrity(os.path.join(self.root, META_FILE)): + parse_devkit_archive(self.root) + + if not os.path.isdir(self.split_folder): + if self.split == 'train': + parse_train_archive(self.root) + elif self.split == 'val': + parse_val_archive(self.root) + + @property + def split_folder(self): + return os.path.join(self.root, self.split) + + def extra_repr(self): + return "Split: {split}".format(**self.__dict__) + + +def load_meta_file(root, file=None): + if file is None: + file = META_FILE + file = os.path.join(root, file) + + if check_integrity(file): + return torch.load(file) + else: + msg = ("The meta file {} is not present in the root directory or is corrupted. " + "This file is automatically created by the ImageNet dataset.") + raise RuntimeError(msg.format(file, root)) + + +def _verify_archive(root, file, md5): + if not check_integrity(os.path.join(root, file), md5): + msg = ("The archive {} is not present in the root directory or is corrupted. " + "You need to download it externally and place it in {}.") + raise RuntimeError(msg.format(file, root)) + + +def parse_devkit_archive(root, file=None): + """Parse the devkit archive of the ImageNet2012 classification dataset and save + the meta information in a binary file. + + Args: + root (str): Root directory containing the devkit archive + file (str, optional): Name of devkit archive. Defaults to + 'ILSVRC2012_devkit_t12.tar.gz' + """ + import scipy.io as sio + + def parse_meta_mat(devkit_root): + metafile = os.path.join(devkit_root, "data", "meta.mat") + meta = sio.loadmat(metafile, squeeze_me=True)['synsets'] + nums_children = list(zip(*meta))[4] + meta = [meta[idx] for idx, num_children in enumerate(nums_children) + if num_children == 0] + idcs, wnids, classes = list(zip(*meta))[:3] + classes = [tuple(clss.split(', ')) for clss in classes] + idx_to_wnid = {idx: wnid for idx, wnid in zip(idcs, wnids)} + wnid_to_classes = {wnid: clss for wnid, clss in zip(wnids, classes)} + return idx_to_wnid, wnid_to_classes + + def parse_val_groundtruth_txt(devkit_root): + file = os.path.join(devkit_root, "data", + "ILSVRC2012_validation_ground_truth.txt") + with open(file, 'r') as txtfh: + val_idcs = txtfh.readlines() + return [int(val_idx) for val_idx in val_idcs] + + @contextmanager + def get_tmp_dir(): + tmp_dir = tempfile.mkdtemp() + try: + yield tmp_dir + finally: + shutil.rmtree(tmp_dir) + + archive_meta = ARCHIVE_META["devkit"] + if file is None: + file = archive_meta[0] + md5 = archive_meta[1] + + _verify_archive(root, file, md5) + + with get_tmp_dir() as tmp_dir: + extract_archive(os.path.join(root, file), tmp_dir) + + devkit_root = os.path.join(tmp_dir, "ILSVRC2012_devkit_t12") + idx_to_wnid, wnid_to_classes = parse_meta_mat(devkit_root) + val_idcs = parse_val_groundtruth_txt(devkit_root) + val_wnids = [idx_to_wnid[idx] for idx in val_idcs] + + torch.save((wnid_to_classes, val_wnids), os.path.join(root, META_FILE)) + + +def parse_train_archive(root, file=None, folder="train"): + """Parse the train images archive of the ImageNet2012 classification dataset and + prepare it for usage with the ImageNet dataset. + + Args: + root (str): Root directory containing the train images archive + file (str, optional): Name of train images archive. Defaults to + 'ILSVRC2012_img_train.tar' + folder (str, optional): Optional name for train images folder. Defaults to + 'train' + """ + archive_meta = ARCHIVE_META["train"] + if file is None: + file = archive_meta[0] + md5 = archive_meta[1] + + _verify_archive(root, file, md5) + + train_root = os.path.join(root, folder) + extract_archive(os.path.join(root, file), train_root) + + archives = [os.path.join(train_root, archive) for archive in os.listdir(train_root)] + for archive in archives: + extract_archive(archive, os.path.splitext(archive)[0], remove_finished=True) + + +def parse_val_archive(root, file=None, wnids=None, folder="val"): + """Parse the validation images archive of the ImageNet2012 classification dataset + and prepare it for usage with the ImageNet dataset. + + Args: + root (str): Root directory containing the validation images archive + file (str, optional): Name of validation images archive. Defaults to + 'ILSVRC2012_img_val.tar' + wnids (list, optional): List of WordNet IDs of the validation images. If None + is given, the IDs are loaded from the meta file in the root directory + folder (str, optional): Optional name for validation images folder. Defaults to + 'val' + """ + archive_meta = ARCHIVE_META["val"] + if file is None: + file = archive_meta[0] + md5 = archive_meta[1] + if wnids is None: + wnids = load_meta_file(root)[1] + + _verify_archive(root, file, md5) + + val_root = os.path.join(root, folder) + extract_archive(os.path.join(root, file), val_root) + + images = sorted([os.path.join(val_root, image) for image in os.listdir(val_root)]) + + for wnid in set(wnids): + os.mkdir(os.path.join(val_root, wnid)) + + for wnid, img_file in zip(wnids, images): + shutil.move(img_file, os.path.join(val_root, wnid, os.path.basename(img_file))) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py new file mode 100644 index 0000000000..07db91cc19 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py @@ -0,0 +1,79 @@ +from .utils import list_dir +from .folder import make_dataset +from .video_utils import VideoClips +from .vision import VisionDataset + + +class Kinetics400(VisionDataset): + """ + `Kinetics-400 `_ + dataset. + + Kinetics-400 is an action recognition video dataset. + This dataset consider every video as a collection of video clips of fixed size, specified + by ``frames_per_clip``, where the step in frames between each clip is given by + ``step_between_clips``. + + To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5`` + and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two + elements will come from video 1, and the next three elements from video 2. + Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all + frames in a video might be present. + + Internally, it uses a VideoClips object to handle clip creation. + + Args: + root (string): Root directory of the Kinetics-400 Dataset. + frames_per_clip (int): number of frames in a clip + step_between_clips (int): number of frames between each clip + transform (callable, optional): A function/transform that takes in a TxHxWxC video + and returns a transformed version. + + Returns: + video (Tensor[T, H, W, C]): the `T` video frames + audio(Tensor[K, L]): the audio frames, where `K` is the number of channels + and `L` is the number of points + label (int): class of the video clip + """ + + def __init__(self, root, frames_per_clip, step_between_clips=1, frame_rate=None, + extensions=('avi',), transform=None, _precomputed_metadata=None, + num_workers=1, _video_width=0, _video_height=0, + _video_min_dimension=0, _audio_samples=0, _audio_channels=0): + super(Kinetics400, self).__init__(root) + + classes = list(sorted(list_dir(root))) + class_to_idx = {classes[i]: i for i in range(len(classes))} + self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) + self.classes = classes + video_list = [x[0] for x in self.samples] + self.video_clips = VideoClips( + video_list, + frames_per_clip, + step_between_clips, + frame_rate, + _precomputed_metadata, + num_workers=num_workers, + _video_width=_video_width, + _video_height=_video_height, + _video_min_dimension=_video_min_dimension, + _audio_samples=_audio_samples, + _audio_channels=_audio_channels, + ) + self.transform = transform + + @property + def metadata(self): + return self.video_clips.metadata + + def __len__(self): + return self.video_clips.num_clips() + + def __getitem__(self, idx): + video, audio, info, video_idx = self.video_clips.get_clip(idx) + label = self.samples[video_idx][1] + + if self.transform is not None: + video = self.transform(video) + + return video, audio, label diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py new file mode 100644 index 0000000000..fc67f8f024 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py @@ -0,0 +1,152 @@ +from .vision import VisionDataset +from PIL import Image +import os +import os.path +import io +import string +from collections.abc import Iterable +import pickle +from .utils import verify_str_arg, iterable_to_str + + +class LSUNClass(VisionDataset): + def __init__(self, root, transform=None, target_transform=None): + import lmdb + super(LSUNClass, self).__init__(root, transform=transform, + target_transform=target_transform) + + self.env = lmdb.open(root, max_readers=1, readonly=True, lock=False, + readahead=False, meminit=False) + with self.env.begin(write=False) as txn: + self.length = txn.stat()['entries'] + cache_file = '_cache_' + ''.join(c for c in root if c in string.ascii_letters) + if os.path.isfile(cache_file): + self.keys = pickle.load(open(cache_file, "rb")) + else: + with self.env.begin(write=False) as txn: + self.keys = [key for key, _ in txn.cursor()] + pickle.dump(self.keys, open(cache_file, "wb")) + + def __getitem__(self, index): + img, target = None, None + env = self.env + with env.begin(write=False) as txn: + imgbuf = txn.get(self.keys[index]) + + buf = io.BytesIO() + buf.write(imgbuf) + buf.seek(0) + img = Image.open(buf).convert('RGB') + + if self.transform is not None: + img = self.transform(img) + + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def __len__(self): + return self.length + + +class LSUN(VisionDataset): + """ + `LSUN `_ dataset. + + Args: + root (string): Root directory for the database files. + classes (string or list): One of {'train', 'val', 'test'} or a list of + categories to load. e,g. ['bedroom_train', 'church_outdoor_train']. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + """ + + def __init__(self, root, classes='train', transform=None, target_transform=None): + super(LSUN, self).__init__(root, transform=transform, + target_transform=target_transform) + self.classes = self._verify_classes(classes) + + # for each class, create an LSUNClassDataset + self.dbs = [] + for c in self.classes: + self.dbs.append(LSUNClass( + root=root + '/' + c + '_lmdb', + transform=transform)) + + self.indices = [] + count = 0 + for db in self.dbs: + count += len(db) + self.indices.append(count) + + self.length = count + + def _verify_classes(self, classes): + categories = ['bedroom', 'bridge', 'church_outdoor', 'classroom', + 'conference_room', 'dining_room', 'kitchen', + 'living_room', 'restaurant', 'tower'] + dset_opts = ['train', 'val', 'test'] + + try: + verify_str_arg(classes, "classes", dset_opts) + if classes == 'test': + classes = [classes] + else: + classes = [c + '_' + classes for c in categories] + except ValueError: + if not isinstance(classes, Iterable): + msg = ("Expected type str or Iterable for argument classes, " + "but got type {}.") + raise ValueError(msg.format(type(classes))) + + classes = list(classes) + msg_fmtstr = ("Expected type str for elements in argument classes, " + "but got type {}.") + for c in classes: + verify_str_arg(c, custom_msg=msg_fmtstr.format(type(c))) + c_short = c.split('_') + category, dset_opt = '_'.join(c_short[:-1]), c_short[-1] + + msg_fmtstr = "Unknown value '{}' for {}. Valid values are {{{}}}." + msg = msg_fmtstr.format(category, "LSUN class", + iterable_to_str(categories)) + verify_str_arg(category, valid_values=categories, custom_msg=msg) + + msg = msg_fmtstr.format(dset_opt, "postfix", iterable_to_str(dset_opts)) + verify_str_arg(dset_opt, valid_values=dset_opts, custom_msg=msg) + + return classes + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: Tuple (image, target) where target is the index of the target category. + """ + target = 0 + sub = 0 + for ind in self.indices: + if index < ind: + break + target += 1 + sub = ind + + db = self.dbs[target] + index = index - sub + + if self.target_transform is not None: + target = self.target_transform(target) + + img, _ = db[index] + return img, target + + def __len__(self): + return self.length + + def extra_repr(self): + return "Classes: {classes}".format(**self.__dict__) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py new file mode 100644 index 0000000000..74bc0c16aa --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py @@ -0,0 +1,485 @@ +from .vision import VisionDataset +import warnings +from PIL import Image +import os +import os.path +import numpy as np +import torch +import codecs +import string +from .utils import download_url, download_and_extract_archive, extract_archive, \ + verify_str_arg + + +class MNIST(VisionDataset): + """`MNIST `_ Dataset. + + Args: + root (string): Root directory of dataset where ``MNIST/processed/training.pt`` + and ``MNIST/processed/test.pt`` exist. + train (bool, optional): If True, creates dataset from ``training.pt``, + otherwise from ``test.pt``. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + """ + + resources = [ + ("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"), + ("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432"), + ("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"), + ("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c") + ] + + training_file = 'training.pt' + test_file = 'test.pt' + classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', + '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine'] + + @property + def train_labels(self): + warnings.warn("train_labels has been renamed targets") + return self.targets + + @property + def test_labels(self): + warnings.warn("test_labels has been renamed targets") + return self.targets + + @property + def train_data(self): + warnings.warn("train_data has been renamed data") + return self.data + + @property + def test_data(self): + warnings.warn("test_data has been renamed data") + return self.data + + def __init__(self, root, train=True, transform=None, target_transform=None, + download=False): + super(MNIST, self).__init__(root, transform=transform, + target_transform=target_transform) + self.train = train # training set or test set + + if download: + self.download() + + if not self._check_exists(): + raise RuntimeError('Dataset not found.' + + ' You can use download=True to download it') + + if self.train: + data_file = self.training_file + else: + data_file = self.test_file + self.data, self.targets = torch.load(os.path.join(self.processed_folder, data_file)) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is index of the target class. + """ + img, target = self.data[index], int(self.targets[index]) + + # doing this so that it is consistent with all other datasets + # to return a PIL Image + img = Image.fromarray(img.numpy(), mode='L') + + if self.transform is not None: + img = self.transform(img) + + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def __len__(self): + return len(self.data) + + @property + def raw_folder(self): + return os.path.join(self.root, self.__class__.__name__, 'raw') + + @property + def processed_folder(self): + return os.path.join(self.root, self.__class__.__name__, 'processed') + + @property + def class_to_idx(self): + return {_class: i for i, _class in enumerate(self.classes)} + + def _check_exists(self): + return (os.path.exists(os.path.join(self.processed_folder, + self.training_file)) and + os.path.exists(os.path.join(self.processed_folder, + self.test_file))) + + def download(self): + """Download the MNIST data if it doesn't exist in processed_folder already.""" + + if self._check_exists(): + return + + os.makedirs(self.raw_folder, exist_ok=True) + os.makedirs(self.processed_folder, exist_ok=True) + + # download files + for url, md5 in self.resources: + filename = url.rpartition('/')[2] + download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5) + + # process and save as torch files + print('Processing...') + + training_set = ( + read_image_file(os.path.join(self.raw_folder, 'train-images-idx3-ubyte')), + read_label_file(os.path.join(self.raw_folder, 'train-labels-idx1-ubyte')) + ) + test_set = ( + read_image_file(os.path.join(self.raw_folder, 't10k-images-idx3-ubyte')), + read_label_file(os.path.join(self.raw_folder, 't10k-labels-idx1-ubyte')) + ) + with open(os.path.join(self.processed_folder, self.training_file), 'wb') as f: + torch.save(training_set, f) + with open(os.path.join(self.processed_folder, self.test_file), 'wb') as f: + torch.save(test_set, f) + + print('Done!') + + def extra_repr(self): + return "Split: {}".format("Train" if self.train is True else "Test") + + +class FashionMNIST(MNIST): + """`Fashion-MNIST `_ Dataset. + + Args: + root (string): Root directory of dataset where ``Fashion-MNIST/processed/training.pt`` + and ``Fashion-MNIST/processed/test.pt`` exist. + train (bool, optional): If True, creates dataset from ``training.pt``, + otherwise from ``test.pt``. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + """ + resources = [ + ("http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz", + "8d4fb7e6c68d591d4c3dfef9ec88bf0d"), + ("http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz", + "25c81989df183df01b3e8a0aad5dffbe"), + ("http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz", + "bef4ecab320f06d8554ea6380940ec79"), + ("http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz", + "bb300cfdad3c16e7a12a480ee83cd310") + ] + classes = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', + 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] + + +class KMNIST(MNIST): + """`Kuzushiji-MNIST `_ Dataset. + + Args: + root (string): Root directory of dataset where ``KMNIST/processed/training.pt`` + and ``KMNIST/processed/test.pt`` exist. + train (bool, optional): If True, creates dataset from ``training.pt``, + otherwise from ``test.pt``. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + """ + resources = [ + ("http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-images-idx3-ubyte.gz", "bdb82020997e1d708af4cf47b453dcf7"), + ("http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz", "e144d726b3acfaa3e44228e80efcd344"), + ("http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-images-idx3-ubyte.gz", "5c965bf0a639b31b8f53240b1b52f4d7"), + ("http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-labels-idx1-ubyte.gz", "7320c461ea6c1c855c0b718fb2a4b134") + ] + classes = ['o', 'ki', 'su', 'tsu', 'na', 'ha', 'ma', 'ya', 're', 'wo'] + + +class EMNIST(MNIST): + """`EMNIST `_ Dataset. + + Args: + root (string): Root directory of dataset where ``EMNIST/processed/training.pt`` + and ``EMNIST/processed/test.pt`` exist. + split (string): The dataset has 6 different splits: ``byclass``, ``bymerge``, + ``balanced``, ``letters``, ``digits`` and ``mnist``. This argument specifies + which one to use. + train (bool, optional): If True, creates dataset from ``training.pt``, + otherwise from ``test.pt``. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + """ + # Updated URL from https://www.nist.gov/node/1298471/emnist-dataset since the + # _official_ download link + # https://cloudstor.aarnet.edu.au/plus/s/ZNmuFiuQTqZlu9W/download + # is (currently) unavailable + url = 'http://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip' + md5 = "58c8d27c78d21e728a6bc7b3cc06412e" + splits = ('byclass', 'bymerge', 'balanced', 'letters', 'digits', 'mnist') + # Merged Classes assumes Same structure for both uppercase and lowercase version + _merged_classes = set(['C', 'I', 'J', 'K', 'L', 'M', 'O', 'P', 'S', 'U', 'V', 'W', 'X', 'Y', 'Z']) + _all_classes = set(list(string.digits + string.ascii_letters)) + classes_split_dict = { + 'byclass': list(_all_classes), + 'bymerge': sorted(list(_all_classes - _merged_classes)), + 'balanced': sorted(list(_all_classes - _merged_classes)), + 'letters': list(string.ascii_lowercase), + 'digits': list(string.digits), + 'mnist': list(string.digits), + } + + def __init__(self, root, split, **kwargs): + self.split = verify_str_arg(split, "split", self.splits) + self.training_file = self._training_file(split) + self.test_file = self._test_file(split) + super(EMNIST, self).__init__(root, **kwargs) + self.classes = self.classes_split_dict[self.split] + + @staticmethod + def _training_file(split): + return 'training_{}.pt'.format(split) + + @staticmethod + def _test_file(split): + return 'test_{}.pt'.format(split) + + def download(self): + """Download the EMNIST data if it doesn't exist in processed_folder already.""" + import shutil + + if self._check_exists(): + return + + os.makedirs(self.raw_folder, exist_ok=True) + os.makedirs(self.processed_folder, exist_ok=True) + + # download files + print('Downloading and extracting zip archive') + download_and_extract_archive(self.url, download_root=self.raw_folder, filename="emnist.zip", + remove_finished=True, md5=self.md5) + gzip_folder = os.path.join(self.raw_folder, 'gzip') + for gzip_file in os.listdir(gzip_folder): + if gzip_file.endswith('.gz'): + extract_archive(os.path.join(gzip_folder, gzip_file), gzip_folder) + + # process and save as torch files + for split in self.splits: + print('Processing ' + split) + training_set = ( + read_image_file(os.path.join(gzip_folder, 'emnist-{}-train-images-idx3-ubyte'.format(split))), + read_label_file(os.path.join(gzip_folder, 'emnist-{}-train-labels-idx1-ubyte'.format(split))) + ) + test_set = ( + read_image_file(os.path.join(gzip_folder, 'emnist-{}-test-images-idx3-ubyte'.format(split))), + read_label_file(os.path.join(gzip_folder, 'emnist-{}-test-labels-idx1-ubyte'.format(split))) + ) + with open(os.path.join(self.processed_folder, self._training_file(split)), 'wb') as f: + torch.save(training_set, f) + with open(os.path.join(self.processed_folder, self._test_file(split)), 'wb') as f: + torch.save(test_set, f) + shutil.rmtree(gzip_folder) + + print('Done!') + + +class QMNIST(MNIST): + """`QMNIST `_ Dataset. + + Args: + root (string): Root directory of dataset whose ``processed'' + subdir contains torch binary files with the datasets. + what (string,optional): Can be 'train', 'test', 'test10k', + 'test50k', or 'nist' for respectively the mnist compatible + training set, the 60k qmnist testing set, the 10k qmnist + examples that match the mnist testing set, the 50k + remaining qmnist testing examples, or all the nist + digits. The default is to select 'train' or 'test' + according to the compatibility argument 'train'. + compat (bool,optional): A boolean that says whether the target + for each example is class number (for compatibility with + the MNIST dataloader) or a torch vector containing the + full qmnist information. Default=True. + download (bool, optional): If true, downloads the dataset from + the internet and puts it in root directory. If dataset is + already downloaded, it is not downloaded again. + transform (callable, optional): A function/transform that + takes in an PIL image and returns a transformed + version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform + that takes in the target and transforms it. + train (bool,optional,compatibility): When argument 'what' is + not specified, this boolean decides whether to load the + training set ot the testing set. Default: True. + + """ + + subsets = { + 'train': 'train', + 'test': 'test', + 'test10k': 'test', + 'test50k': 'test', + 'nist': 'nist' + } + resources = { + 'train': [('https://raw.githubusercontent.com/facebookresearch/qmnist/master/qmnist-train-images-idx3-ubyte.gz', + 'ed72d4157d28c017586c42bc6afe6370'), + ('https://raw.githubusercontent.com/facebookresearch/qmnist/master/qmnist-train-labels-idx2-int.gz', + '0058f8dd561b90ffdd0f734c6a30e5e4')], + 'test': [('https://raw.githubusercontent.com/facebookresearch/qmnist/master/qmnist-test-images-idx3-ubyte.gz', + '1394631089c404de565df7b7aeaf9412'), + ('https://raw.githubusercontent.com/facebookresearch/qmnist/master/qmnist-test-labels-idx2-int.gz', + '5b5b05890a5e13444e108efe57b788aa')], + 'nist': [('https://raw.githubusercontent.com/facebookresearch/qmnist/master/xnist-images-idx3-ubyte.xz', + '7f124b3b8ab81486c9d8c2749c17f834'), + ('https://raw.githubusercontent.com/facebookresearch/qmnist/master/xnist-labels-idx2-int.xz', + '5ed0e788978e45d4a8bd4b7caec3d79d')] + } + classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', + '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine'] + + def __init__(self, root, what=None, compat=True, train=True, **kwargs): + if what is None: + what = 'train' if train else 'test' + self.what = verify_str_arg(what, "what", tuple(self.subsets.keys())) + self.compat = compat + self.data_file = what + '.pt' + self.training_file = self.data_file + self.test_file = self.data_file + super(QMNIST, self).__init__(root, train, **kwargs) + + def download(self): + """Download the QMNIST data if it doesn't exist in processed_folder already. + Note that we only download what has been asked for (argument 'what'). + """ + if self._check_exists(): + return + os.makedirs(self.raw_folder, exist_ok=True) + os.makedirs(self.processed_folder, exist_ok=True) + split = self.resources[self.subsets[self.what]] + files = [] + + # download data files if not already there + for url, md5 in split: + filename = url.rpartition('/')[2] + file_path = os.path.join(self.raw_folder, filename) + if not os.path.isfile(file_path): + download_url(url, root=self.raw_folder, filename=filename, md5=md5) + files.append(file_path) + + # process and save as torch files + print('Processing...') + data = read_sn3_pascalvincent_tensor(files[0]) + assert(data.dtype == torch.uint8) + assert(data.ndimension() == 3) + targets = read_sn3_pascalvincent_tensor(files[1]).long() + assert(targets.ndimension() == 2) + if self.what == 'test10k': + data = data[0:10000, :, :].clone() + targets = targets[0:10000, :].clone() + if self.what == 'test50k': + data = data[10000:, :, :].clone() + targets = targets[10000:, :].clone() + with open(os.path.join(self.processed_folder, self.data_file), 'wb') as f: + torch.save((data, targets), f) + + def __getitem__(self, index): + # redefined to handle the compat flag + img, target = self.data[index], self.targets[index] + img = Image.fromarray(img.numpy(), mode='L') + if self.transform is not None: + img = self.transform(img) + if self.compat: + target = int(target[0]) + if self.target_transform is not None: + target = self.target_transform(target) + return img, target + + def extra_repr(self): + return "Split: {}".format(self.what) + + +def get_int(b): + return int(codecs.encode(b, 'hex'), 16) + + +def open_maybe_compressed_file(path): + """Return a file object that possibly decompresses 'path' on the fly. + Decompression occurs when argument `path` is a string and ends with '.gz' or '.xz'. + """ + if not isinstance(path, torch._six.string_classes): + return path + if path.endswith('.gz'): + import gzip + return gzip.open(path, 'rb') + if path.endswith('.xz'): + import lzma + return lzma.open(path, 'rb') + return open(path, 'rb') + + +def read_sn3_pascalvincent_tensor(path, strict=True): + """Read a SN3 file in "Pascal Vincent" format (Lush file 'libidx/idx-io.lsh'). + Argument may be a filename, compressed filename, or file object. + """ + # typemap + if not hasattr(read_sn3_pascalvincent_tensor, 'typemap'): + read_sn3_pascalvincent_tensor.typemap = { + 8: (torch.uint8, np.uint8, np.uint8), + 9: (torch.int8, np.int8, np.int8), + 11: (torch.int16, np.dtype('>i2'), 'i2'), + 12: (torch.int32, np.dtype('>i4'), 'i4'), + 13: (torch.float32, np.dtype('>f4'), 'f4'), + 14: (torch.float64, np.dtype('>f8'), 'f8')} + # read + with open_maybe_compressed_file(path) as f: + data = f.read() + # parse + magic = get_int(data[0:4]) + nd = magic % 256 + ty = magic // 256 + assert nd >= 1 and nd <= 3 + assert ty >= 8 and ty <= 14 + m = read_sn3_pascalvincent_tensor.typemap[ty] + s = [get_int(data[4 * (i + 1): 4 * (i + 2)]) for i in range(nd)] + parsed = np.frombuffer(data, dtype=m[1], offset=(4 * (nd + 1))) + assert parsed.shape[0] == np.prod(s) or not strict + return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s) + + +def read_label_file(path): + with open(path, 'rb') as f: + x = read_sn3_pascalvincent_tensor(f, strict=False) + assert(x.dtype == torch.uint8) + assert(x.ndimension() == 1) + return x.long() + + +def read_image_file(path): + with open(path, 'rb') as f: + x = read_sn3_pascalvincent_tensor(f, strict=False) + assert(x.dtype == torch.uint8) + assert(x.ndimension() == 3) + return x diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py new file mode 100644 index 0000000000..dd86128488 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py @@ -0,0 +1,91 @@ +from PIL import Image +from os.path import join +import os +from .vision import VisionDataset +from .utils import download_and_extract_archive, check_integrity, list_dir, list_files + + +class Omniglot(VisionDataset): + """`Omniglot `_ Dataset. + Args: + root (string): Root directory of dataset where directory + ``omniglot-py`` exists. + background (bool, optional): If True, creates dataset from the "background" set, otherwise + creates from the "evaluation" set. This terminology is defined by the authors. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If true, downloads the dataset zip files from the internet and + puts it in root directory. If the zip files are already downloaded, they are not + downloaded again. + """ + folder = 'omniglot-py' + download_url_prefix = 'https://github.com/brendenlake/omniglot/raw/master/python' + zips_md5 = { + 'images_background': '68d2efa1b9178cc56df9314c21c6e718', + 'images_evaluation': '6b91aef0f799c5bb55b94e3f2daec811' + } + + def __init__(self, root, background=True, transform=None, target_transform=None, + download=False): + super(Omniglot, self).__init__(join(root, self.folder), transform=transform, + target_transform=target_transform) + self.background = background + + if download: + self.download() + + if not self._check_integrity(): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + self.target_folder = join(self.root, self._get_target_folder()) + self._alphabets = list_dir(self.target_folder) + self._characters = sum([[join(a, c) for c in list_dir(join(self.target_folder, a))] + for a in self._alphabets], []) + self._character_images = [[(image, idx) for image in list_files(join(self.target_folder, character), '.png')] + for idx, character in enumerate(self._characters)] + self._flat_character_images = sum(self._character_images, []) + + def __len__(self): + return len(self._flat_character_images) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is index of the target character class. + """ + image_name, character_class = self._flat_character_images[index] + image_path = join(self.target_folder, self._characters[character_class], image_name) + image = Image.open(image_path, mode='r').convert('L') + + if self.transform: + image = self.transform(image) + + if self.target_transform: + character_class = self.target_transform(character_class) + + return image, character_class + + def _check_integrity(self): + zip_filename = self._get_target_folder() + if not check_integrity(join(self.root, zip_filename + '.zip'), self.zips_md5[zip_filename]): + return False + return True + + def download(self): + if self._check_integrity(): + print('Files already downloaded and verified') + return + + filename = self._get_target_folder() + zip_filename = filename + '.zip' + url = self.download_url_prefix + '/' + zip_filename + download_and_extract_archive(url, self.root, filename=zip_filename, md5=self.zips_md5[filename]) + + def _get_target_folder(self): + return 'images_background' if self.background else 'images_evaluation' diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py new file mode 100644 index 0000000000..47591e3db8 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py @@ -0,0 +1,209 @@ +import os +import numpy as np +from PIL import Image + +import torch +from .vision import VisionDataset + +from .utils import download_url + + +class PhotoTour(VisionDataset): + """`Learning Local Image Descriptors Data `_ Dataset. + + + Args: + root (string): Root directory where images are. + name (string): Name of the dataset to load. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + + """ + urls = { + 'notredame_harris': [ + 'http://matthewalunbrown.com/patchdata/notredame_harris.zip', + 'notredame_harris.zip', + '69f8c90f78e171349abdf0307afefe4d' + ], + 'yosemite_harris': [ + 'http://matthewalunbrown.com/patchdata/yosemite_harris.zip', + 'yosemite_harris.zip', + 'a73253d1c6fbd3ba2613c45065c00d46' + ], + 'liberty_harris': [ + 'http://matthewalunbrown.com/patchdata/liberty_harris.zip', + 'liberty_harris.zip', + 'c731fcfb3abb4091110d0ae8c7ba182c' + ], + 'notredame': [ + 'http://icvl.ee.ic.ac.uk/vbalnt/notredame.zip', + 'notredame.zip', + '509eda8535847b8c0a90bbb210c83484' + ], + 'yosemite': [ + 'http://icvl.ee.ic.ac.uk/vbalnt/yosemite.zip', + 'yosemite.zip', + '533b2e8eb7ede31be40abc317b2fd4f0' + ], + 'liberty': [ + 'http://icvl.ee.ic.ac.uk/vbalnt/liberty.zip', + 'liberty.zip', + 'fdd9152f138ea5ef2091746689176414' + ], + } + mean = {'notredame': 0.4854, 'yosemite': 0.4844, 'liberty': 0.4437, + 'notredame_harris': 0.4854, 'yosemite_harris': 0.4844, 'liberty_harris': 0.4437} + std = {'notredame': 0.1864, 'yosemite': 0.1818, 'liberty': 0.2019, + 'notredame_harris': 0.1864, 'yosemite_harris': 0.1818, 'liberty_harris': 0.2019} + lens = {'notredame': 468159, 'yosemite': 633587, 'liberty': 450092, + 'liberty_harris': 379587, 'yosemite_harris': 450912, 'notredame_harris': 325295} + image_ext = 'bmp' + info_file = 'info.txt' + matches_files = 'm50_100000_100000_0.txt' + + def __init__(self, root, name, train=True, transform=None, download=False): + super(PhotoTour, self).__init__(root, transform=transform) + self.name = name + self.data_dir = os.path.join(self.root, name) + self.data_down = os.path.join(self.root, '{}.zip'.format(name)) + self.data_file = os.path.join(self.root, '{}.pt'.format(name)) + + self.train = train + self.mean = self.mean[name] + self.std = self.std[name] + + if download: + self.download() + + if not self._check_datafile_exists(): + raise RuntimeError('Dataset not found.' + + ' You can use download=True to download it') + + # load the serialized data + self.data, self.labels, self.matches = torch.load(self.data_file) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (data1, data2, matches) + """ + if self.train: + data = self.data[index] + if self.transform is not None: + data = self.transform(data) + return data + m = self.matches[index] + data1, data2 = self.data[m[0]], self.data[m[1]] + if self.transform is not None: + data1 = self.transform(data1) + data2 = self.transform(data2) + return data1, data2, m[2] + + def __len__(self): + if self.train: + return self.lens[self.name] + return len(self.matches) + + def _check_datafile_exists(self): + return os.path.exists(self.data_file) + + def _check_downloaded(self): + return os.path.exists(self.data_dir) + + def download(self): + if self._check_datafile_exists(): + print('# Found cached data {}'.format(self.data_file)) + return + + if not self._check_downloaded(): + # download files + url = self.urls[self.name][0] + filename = self.urls[self.name][1] + md5 = self.urls[self.name][2] + fpath = os.path.join(self.root, filename) + + download_url(url, self.root, filename, md5) + + print('# Extracting data {}\n'.format(self.data_down)) + + import zipfile + with zipfile.ZipFile(fpath, 'r') as z: + z.extractall(self.data_dir) + + os.unlink(fpath) + + # process and save as torch files + print('# Caching data {}'.format(self.data_file)) + + dataset = ( + read_image_file(self.data_dir, self.image_ext, self.lens[self.name]), + read_info_file(self.data_dir, self.info_file), + read_matches_files(self.data_dir, self.matches_files) + ) + + with open(self.data_file, 'wb') as f: + torch.save(dataset, f) + + def extra_repr(self): + return "Split: {}".format("Train" if self.train is True else "Test") + + +def read_image_file(data_dir, image_ext, n): + """Return a Tensor containing the patches + """ + + def PIL2array(_img): + """Convert PIL image type to numpy 2D array + """ + return np.array(_img.getdata(), dtype=np.uint8).reshape(64, 64) + + def find_files(_data_dir, _image_ext): + """Return a list with the file names of the images containing the patches + """ + files = [] + # find those files with the specified extension + for file_dir in os.listdir(_data_dir): + if file_dir.endswith(_image_ext): + files.append(os.path.join(_data_dir, file_dir)) + return sorted(files) # sort files in ascend order to keep relations + + patches = [] + list_files = find_files(data_dir, image_ext) + + for fpath in list_files: + img = Image.open(fpath) + for y in range(0, 1024, 64): + for x in range(0, 1024, 64): + patch = img.crop((x, y, x + 64, y + 64)) + patches.append(PIL2array(patch)) + return torch.ByteTensor(np.array(patches[:n])) + + +def read_info_file(data_dir, info_file): + """Return a Tensor containing the list of labels + Read the file and keep only the ID of the 3D point. + """ + labels = [] + with open(os.path.join(data_dir, info_file), 'r') as f: + labels = [int(line.split()[0]) for line in f] + return torch.LongTensor(labels) + + +def read_matches_files(data_dir, matches_file): + """Return a Tensor containing the ground truth matches + Read the file and keep only 3D point ID. + Matches are represented with a 1, non matches with a 0. + """ + matches = [] + with open(os.path.join(data_dir, matches_file), 'r') as f: + for line in f: + line_split = line.split() + matches.append([int(line_split[0]), int(line_split[3]), + int(line_split[1] == line_split[4])]) + return torch.LongTensor(matches) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/__init__.py new file mode 100644 index 0000000000..870322d39b --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/__init__.py @@ -0,0 +1,3 @@ +from .clip_sampler import DistributedSampler, UniformClipSampler, RandomClipSampler + +__all__ = ('DistributedSampler', 'UniformClipSampler', 'RandomClipSampler') diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py new file mode 100644 index 0000000000..2432a6d20d --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py @@ -0,0 +1,174 @@ +import math +import torch +from torch.utils.data import Sampler +import torch.distributed as dist +from torchvision.datasets.video_utils import VideoClips + + +class DistributedSampler(Sampler): + """ + Extension of DistributedSampler, as discussed in + https://github.com/pytorch/pytorch/issues/23430 + + Example: + dataset: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + num_replicas: 4 + shuffle: False + + when group_size = 1 + RANK | shard_dataset + ========================= + rank_0 | [0, 4, 8, 12] + rank_1 | [1, 5, 9, 13] + rank_2 | [2, 6, 10, 0] + rank_3 | [3, 7, 11, 1] + + when group_size = 2 + + RANK | shard_dataset + ========================= + rank_0 | [0, 1, 8, 9] + rank_1 | [2, 3, 10, 11] + rank_2 | [4, 5, 12, 13] + rank_3 | [6, 7, 0, 1] + + """ + + def __init__(self, dataset, num_replicas=None, rank=None, shuffle=False, group_size=1): + if num_replicas is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + num_replicas = dist.get_world_size() + if rank is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + rank = dist.get_rank() + assert len(dataset) % group_size == 0, ( + "dataset length must be a multiplier of group size" + "dataset length: %d, group size: %d" % (len(dataset), group_size) + ) + self.dataset = dataset + self.group_size = group_size + self.num_replicas = num_replicas + self.rank = rank + self.epoch = 0 + dataset_group_length = len(dataset) // group_size + self.num_group_samples = int( + math.ceil(dataset_group_length * 1.0 / self.num_replicas) + ) + self.num_samples = self.num_group_samples * group_size + self.total_size = self.num_samples * self.num_replicas + self.shuffle = shuffle + + def __iter__(self): + # deterministically shuffle based on epoch + g = torch.Generator() + g.manual_seed(self.epoch) + if self.shuffle: + indices = torch.randperm(len(self.dataset), generator=g).tolist() + else: + indices = list(range(len(self.dataset))) + + # add extra samples to make it evenly divisible + indices += indices[:(self.total_size - len(indices))] + assert len(indices) == self.total_size + + total_group_size = self.total_size // self.group_size + indices = torch.reshape( + torch.LongTensor(indices), (total_group_size, self.group_size) + ) + + # subsample + indices = indices[self.rank:total_group_size:self.num_replicas, :] + indices = torch.reshape(indices, (-1,)).tolist() + assert len(indices) == self.num_samples + + if isinstance(self.dataset, Sampler): + orig_indices = list(iter(self.dataset)) + indices = [orig_indices[i] for i in indices] + + return iter(indices) + + def __len__(self): + return self.num_samples + + def set_epoch(self, epoch): + self.epoch = epoch + + +class UniformClipSampler(Sampler): + """ + Sample `num_video_clips_per_video` clips for each video, equally spaced. + When number of unique clips in the video is fewer than num_video_clips_per_video, + repeat the clips until `num_video_clips_per_video` clips are collected + + Arguments: + video_clips (VideoClips): video clips to sample from + num_clips_per_video (int): number of clips to be sampled per video + """ + def __init__(self, video_clips, num_clips_per_video): + if not isinstance(video_clips, VideoClips): + raise TypeError("Expected video_clips to be an instance of VideoClips, " + "got {}".format(type(video_clips))) + self.video_clips = video_clips + self.num_clips_per_video = num_clips_per_video + + def __iter__(self): + idxs = [] + s = 0 + # select num_clips_per_video for each video, uniformly spaced + for c in self.video_clips.clips: + length = len(c) + if length == 0: + # corner case where video decoding fails + continue + + sampled = ( + torch.linspace(s, s + length - 1, steps=self.num_clips_per_video) + .floor() + .to(torch.int64) + ) + s += length + idxs.append(sampled) + idxs = torch.cat(idxs).tolist() + return iter(idxs) + + def __len__(self): + return sum( + self.num_clips_per_video for c in self.video_clips.clips if len(c) > 0 + ) + + +class RandomClipSampler(Sampler): + """ + Samples at most `max_video_clips_per_video` clips for each video randomly + + Arguments: + video_clips (VideoClips): video clips to sample from + max_clips_per_video (int): maximum number of clips to be sampled per video + """ + def __init__(self, video_clips, max_clips_per_video): + if not isinstance(video_clips, VideoClips): + raise TypeError("Expected video_clips to be an instance of VideoClips, " + "got {}".format(type(video_clips))) + self.video_clips = video_clips + self.max_clips_per_video = max_clips_per_video + + def __iter__(self): + idxs = [] + s = 0 + # select at most max_clips_per_video for each video, randomly + for c in self.video_clips.clips: + length = len(c) + size = min(length, self.max_clips_per_video) + sampled = torch.randperm(length)[:size] + s + s += length + idxs.append(sampled) + idxs = torch.cat(idxs) + # shuffle all clips randomly + perm = torch.randperm(len(idxs)) + idxs = idxs[perm].tolist() + return iter(idxs) + + def __len__(self): + return sum(min(len(c), self.max_clips_per_video) for c in self.video_clips.clips) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py new file mode 100644 index 0000000000..c4713f7257 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py @@ -0,0 +1,124 @@ +import os +import shutil +from .vision import VisionDataset + +import numpy as np + +from PIL import Image +from .utils import download_url, verify_str_arg +from .voc import download_extract + + +class SBDataset(VisionDataset): + """`Semantic Boundaries Dataset `_ + + The SBD currently contains annotations from 11355 images taken from the PASCAL VOC 2011 dataset. + + .. note :: + + Please note that the train and val splits included with this dataset are different from + the splits in the PASCAL VOC dataset. In particular some "train" images might be part of + VOC2012 val. + If you are interested in testing on VOC 2012 val, then use `image_set='train_noval'`, + which excludes all val images. + + .. warning:: + + This class needs `scipy `_ to load target files from `.mat` format. + + Args: + root (string): Root directory of the Semantic Boundaries Dataset + image_set (string, optional): Select the image_set to use, ``train``, ``val`` or ``train_noval``. + Image set ``train_noval`` excludes VOC 2012 val images. + mode (string, optional): Select target type. Possible values 'boundaries' or 'segmentation'. + In case of 'boundaries', the target is an array of shape `[num_classes, H, W]`, + where `num_classes=20`. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. Input sample is PIL image and target is a numpy array + if `mode='boundaries'` or PIL image if `mode='segmentation'`. + """ + + url = "http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz" + md5 = "82b4d87ceb2ed10f6038a1cba92111cb" + filename = "benchmark.tgz" + + voc_train_url = "http://home.bharathh.info/pubs/codes/SBD/train_noval.txt" + voc_split_filename = "train_noval.txt" + voc_split_md5 = "79bff800c5f0b1ec6b21080a3c066722" + + def __init__(self, + root, + image_set='train', + mode='boundaries', + download=False, + transforms=None): + + try: + from scipy.io import loadmat + self._loadmat = loadmat + except ImportError: + raise RuntimeError("Scipy is not found. This dataset needs to have scipy installed: " + "pip install scipy") + + super(SBDataset, self).__init__(root, transforms) + self.image_set = verify_str_arg(image_set, "image_set", + ("train", "val", "train_noval")) + self.mode = verify_str_arg(mode, "mode", ("segmentation", "boundaries")) + self.num_classes = 20 + + sbd_root = self.root + image_dir = os.path.join(sbd_root, 'img') + mask_dir = os.path.join(sbd_root, 'cls') + + if download: + download_extract(self.url, self.root, self.filename, self.md5) + extracted_ds_root = os.path.join(self.root, "benchmark_RELEASE", "dataset") + for f in ["cls", "img", "inst", "train.txt", "val.txt"]: + old_path = os.path.join(extracted_ds_root, f) + shutil.move(old_path, sbd_root) + download_url(self.voc_train_url, sbd_root, self.voc_split_filename, + self.voc_split_md5) + + if not os.path.isdir(sbd_root): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + split_f = os.path.join(sbd_root, image_set.rstrip('\n') + '.txt') + + with open(os.path.join(split_f), "r") as f: + file_names = [x.strip() for x in f.readlines()] + + self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names] + self.masks = [os.path.join(mask_dir, x + ".mat") for x in file_names] + assert (len(self.images) == len(self.masks)) + + self._get_target = self._get_segmentation_target \ + if self.mode == "segmentation" else self._get_boundaries_target + + def _get_segmentation_target(self, filepath): + mat = self._loadmat(filepath) + return Image.fromarray(mat['GTcls'][0]['Segmentation'][0]) + + def _get_boundaries_target(self, filepath): + mat = self._loadmat(filepath) + return np.concatenate([np.expand_dims(mat['GTcls'][0]['Boundaries'][0][i][0].toarray(), axis=0) + for i in range(self.num_classes)], axis=0) + + def __getitem__(self, index): + img = Image.open(self.images[index]).convert('RGB') + target = self._get_target(self.masks[index]) + + if self.transforms is not None: + img, target = self.transforms(img, target) + + return img, target + + def __len__(self): + return len(self.images) + + def extra_repr(self): + lines = ["Image set: {image_set}", "Mode: {mode}"] + return '\n'.join(lines).format(**self.__dict__) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py new file mode 100644 index 0000000000..70cb68344b --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py @@ -0,0 +1,107 @@ +from PIL import Image +from .utils import download_url, check_integrity + +import os +from .vision import VisionDataset + + +class SBU(VisionDataset): + """`SBU Captioned Photo `_ Dataset. + + Args: + root (string): Root directory of dataset where tarball + ``SBUCaptionedPhotoDataset.tar.gz`` exists. + transform (callable, optional): A function/transform that takes in a PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + url = "http://www.cs.virginia.edu/~vicente/sbucaptions/SBUCaptionedPhotoDataset.tar.gz" + filename = "SBUCaptionedPhotoDataset.tar.gz" + md5_checksum = '9aec147b3488753cf758b4d493422285' + + def __init__(self, root, transform=None, target_transform=None, download=True): + super(SBU, self).__init__(root, transform=transform, + target_transform=target_transform) + + if download: + self.download() + + if not self._check_integrity(): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + # Read the caption for each photo + self.photos = [] + self.captions = [] + + file1 = os.path.join(self.root, 'dataset', 'SBU_captioned_photo_dataset_urls.txt') + file2 = os.path.join(self.root, 'dataset', 'SBU_captioned_photo_dataset_captions.txt') + + for line1, line2 in zip(open(file1), open(file2)): + url = line1.rstrip() + photo = os.path.basename(url) + filename = os.path.join(self.root, 'dataset', photo) + if os.path.exists(filename): + caption = line2.rstrip() + self.photos.append(photo) + self.captions.append(caption) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is a caption for the photo. + """ + filename = os.path.join(self.root, 'dataset', self.photos[index]) + img = Image.open(filename).convert('RGB') + if self.transform is not None: + img = self.transform(img) + + target = self.captions[index] + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def __len__(self): + """The number of photos in the dataset.""" + return len(self.photos) + + def _check_integrity(self): + """Check the md5 checksum of the downloaded tarball.""" + root = self.root + fpath = os.path.join(root, self.filename) + if not check_integrity(fpath, self.md5_checksum): + return False + return True + + def download(self): + """Download and extract the tarball, and download each individual photo.""" + import tarfile + + if self._check_integrity(): + print('Files already downloaded and verified') + return + + download_url(self.url, self.root, self.filename, self.md5_checksum) + + # Extract file + with tarfile.open(os.path.join(self.root, self.filename), 'r:gz') as tar: + tar.extractall(path=self.root) + + # Download individual photos + with open(os.path.join(self.root, 'dataset', 'SBU_captioned_photo_dataset_urls.txt')) as fh: + for line in fh: + url = line.rstrip() + try: + download_url(url, os.path.join(self.root, 'dataset')) + except OSError: + # The images point to public images on Flickr. + # Note: Images might be removed by users at anytime. + pass diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py new file mode 100644 index 0000000000..12c92c4a35 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py @@ -0,0 +1,84 @@ +from PIL import Image +import os +import os.path +import numpy as np +from .vision import VisionDataset +from .utils import download_url, check_integrity + + +class SEMEION(VisionDataset): + """`SEMEION `_ Dataset. + Args: + root (string): Root directory of dataset where directory + ``semeion.py`` exists. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + url = "http://archive.ics.uci.edu/ml/machine-learning-databases/semeion/semeion.data" + filename = "semeion.data" + md5_checksum = 'cb545d371d2ce14ec121470795a77432' + + def __init__(self, root, transform=None, target_transform=None, download=True): + super(SEMEION, self).__init__(root, transform=transform, + target_transform=target_transform) + + if download: + self.download() + + if not self._check_integrity(): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + self.data = [] + self.labels = [] + fp = os.path.join(self.root, self.filename) + data = np.loadtxt(fp) + # convert value to 8 bit unsigned integer + # color (white #255) the pixels + self.data = (data[:, :256] * 255).astype('uint8') + self.data = np.reshape(self.data, (-1, 16, 16)) + self.labels = np.nonzero(data[:, 256:])[1] + + def __getitem__(self, index): + """ + Args: + index (int): Index + Returns: + tuple: (image, target) where target is index of the target class. + """ + img, target = self.data[index], int(self.labels[index]) + + # doing this so that it is consistent with all other datasets + # to return a PIL Image + img = Image.fromarray(img, mode='L') + + if self.transform is not None: + img = self.transform(img) + + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def __len__(self): + return len(self.data) + + def _check_integrity(self): + root = self.root + fpath = os.path.join(root, self.filename) + if not check_integrity(fpath, self.md5_checksum): + return False + return True + + def download(self): + if self._check_integrity(): + print('Files already downloaded and verified') + return + + root = self.root + download_url(self.url, root, self.filename, self.md5_checksum) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py new file mode 100644 index 0000000000..6bec45afe2 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py @@ -0,0 +1,176 @@ +from PIL import Image +import os +import os.path +import numpy as np + +from .vision import VisionDataset +from .utils import check_integrity, download_and_extract_archive, verify_str_arg + + +class STL10(VisionDataset): + """`STL10 `_ Dataset. + + Args: + root (string): Root directory of dataset where directory + ``stl10_binary`` exists. + split (string): One of {'train', 'test', 'unlabeled', 'train+unlabeled'}. + Accordingly dataset is selected. + folds (int, optional): One of {0-9} or None. + For training, loads one of the 10 pre-defined folds of 1k samples for the + standard evaluation procedure. If no value is passed, loads the 5k samples. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + + """ + base_folder = 'stl10_binary' + url = "http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz" + filename = "stl10_binary.tar.gz" + tgz_md5 = '91f7769df0f17e558f3565bffb0c7dfb' + class_names_file = 'class_names.txt' + folds_list_file = 'fold_indices.txt' + train_list = [ + ['train_X.bin', '918c2871b30a85fa023e0c44e0bee87f'], + ['train_y.bin', '5a34089d4802c674881badbb80307741'], + ['unlabeled_X.bin', '5242ba1fed5e4be9e1e742405eb56ca4'] + ] + + test_list = [ + ['test_X.bin', '7f263ba9f9e0b06b93213547f721ac82'], + ['test_y.bin', '36f9794fa4beb8a2c72628de14fa638e'] + ] + splits = ('train', 'train+unlabeled', 'unlabeled', 'test') + + def __init__(self, root, split='train', folds=None, transform=None, + target_transform=None, download=False): + super(STL10, self).__init__(root, transform=transform, + target_transform=target_transform) + self.split = verify_str_arg(split, "split", self.splits) + self.folds = self._verify_folds(folds) + + if download: + self.download() + elif not self._check_integrity(): + raise RuntimeError( + 'Dataset not found or corrupted. ' + 'You can use download=True to download it') + + # now load the picked numpy arrays + if self.split == 'train': + self.data, self.labels = self.__loadfile( + self.train_list[0][0], self.train_list[1][0]) + self.__load_folds(folds) + + elif self.split == 'train+unlabeled': + self.data, self.labels = self.__loadfile( + self.train_list[0][0], self.train_list[1][0]) + self.__load_folds(folds) + unlabeled_data, _ = self.__loadfile(self.train_list[2][0]) + self.data = np.concatenate((self.data, unlabeled_data)) + self.labels = np.concatenate( + (self.labels, np.asarray([-1] * unlabeled_data.shape[0]))) + + elif self.split == 'unlabeled': + self.data, _ = self.__loadfile(self.train_list[2][0]) + self.labels = np.asarray([-1] * self.data.shape[0]) + else: # self.split == 'test': + self.data, self.labels = self.__loadfile( + self.test_list[0][0], self.test_list[1][0]) + + class_file = os.path.join( + self.root, self.base_folder, self.class_names_file) + if os.path.isfile(class_file): + with open(class_file) as f: + self.classes = f.read().splitlines() + + def _verify_folds(self, folds): + if folds is None: + return folds + elif isinstance(folds, int): + if folds in range(10): + return folds + msg = ("Value for argument folds should be in the range [0, 10), " + "but got {}.") + raise ValueError(msg.format(folds)) + else: + msg = "Expected type None or int for argument folds, but got type {}." + raise ValueError(msg.format(type(folds))) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is index of the target class. + """ + if self.labels is not None: + img, target = self.data[index], int(self.labels[index]) + else: + img, target = self.data[index], None + + # doing this so that it is consistent with all other datasets + # to return a PIL Image + img = Image.fromarray(np.transpose(img, (1, 2, 0))) + + if self.transform is not None: + img = self.transform(img) + + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def __len__(self): + return self.data.shape[0] + + def __loadfile(self, data_file, labels_file=None): + labels = None + if labels_file: + path_to_labels = os.path.join( + self.root, self.base_folder, labels_file) + with open(path_to_labels, 'rb') as f: + labels = np.fromfile(f, dtype=np.uint8) - 1 # 0-based + + path_to_data = os.path.join(self.root, self.base_folder, data_file) + with open(path_to_data, 'rb') as f: + # read whole file in uint8 chunks + everything = np.fromfile(f, dtype=np.uint8) + images = np.reshape(everything, (-1, 3, 96, 96)) + images = np.transpose(images, (0, 1, 3, 2)) + + return images, labels + + def _check_integrity(self): + root = self.root + for fentry in (self.train_list + self.test_list): + filename, md5 = fentry[0], fentry[1] + fpath = os.path.join(root, self.base_folder, filename) + if not check_integrity(fpath, md5): + return False + return True + + def download(self): + if self._check_integrity(): + print('Files already downloaded and verified') + return + download_and_extract_archive(self.url, self.root, filename=self.filename, md5=self.tgz_md5) + self._check_integrity() + + def extra_repr(self): + return "Split: {split}".format(**self.__dict__) + + def __load_folds(self, folds): + # loads one of the folds if specified + if folds is None: + return + path_to_folds = os.path.join( + self.root, self.base_folder, self.folds_list_file) + with open(path_to_folds, 'r') as f: + str_idx = f.read().splitlines()[folds] + list_idx = np.fromstring(str_idx, dtype=np.uint8, sep=' ') + self.data, self.labels = self.data[list_idx, :, :, :], self.labels[list_idx] diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py new file mode 100644 index 0000000000..d96d0f3f43 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py @@ -0,0 +1,114 @@ +from .vision import VisionDataset +from PIL import Image +import os +import os.path +import numpy as np +from .utils import download_url, check_integrity, verify_str_arg + + +class SVHN(VisionDataset): + """`SVHN `_ Dataset. + Note: The SVHN dataset assigns the label `10` to the digit `0`. However, in this Dataset, + we assign the label `0` to the digit `0` to be compatible with PyTorch loss functions which + expect the class labels to be in the range `[0, C-1]` + + .. warning:: + + This class needs `scipy `_ to load data from `.mat` format. + + Args: + root (string): Root directory of dataset where directory + ``SVHN`` exists. + split (string): One of {'train', 'test', 'extra'}. + Accordingly dataset is selected. 'extra' is Extra training set. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + + """ + + split_list = { + 'train': ["http://ufldl.stanford.edu/housenumbers/train_32x32.mat", + "train_32x32.mat", "e26dedcc434d2e4c54c9b2d4a06d8373"], + 'test': ["http://ufldl.stanford.edu/housenumbers/test_32x32.mat", + "test_32x32.mat", "eb5a983be6a315427106f1b164d9cef3"], + 'extra': ["http://ufldl.stanford.edu/housenumbers/extra_32x32.mat", + "extra_32x32.mat", "a93ce644f1a588dc4d68dda5feec44a7"]} + + def __init__(self, root, split='train', transform=None, target_transform=None, + download=False): + super(SVHN, self).__init__(root, transform=transform, + target_transform=target_transform) + self.split = verify_str_arg(split, "split", tuple(self.split_list.keys())) + self.url = self.split_list[split][0] + self.filename = self.split_list[split][1] + self.file_md5 = self.split_list[split][2] + + if download: + self.download() + + if not self._check_integrity(): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + # import here rather than at top of file because this is + # an optional dependency for torchvision + import scipy.io as sio + + # reading(loading) mat file as array + loaded_mat = sio.loadmat(os.path.join(self.root, self.filename)) + + self.data = loaded_mat['X'] + # loading from the .mat file gives an np array of type np.uint8 + # converting to np.int64, so that we have a LongTensor after + # the conversion from the numpy array + # the squeeze is needed to obtain a 1D tensor + self.labels = loaded_mat['y'].astype(np.int64).squeeze() + + # the svhn dataset assigns the class label "10" to the digit 0 + # this makes it inconsistent with several loss functions + # which expect the class labels to be in the range [0, C-1] + np.place(self.labels, self.labels == 10, 0) + self.data = np.transpose(self.data, (3, 2, 0, 1)) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is index of the target class. + """ + img, target = self.data[index], int(self.labels[index]) + + # doing this so that it is consistent with all other datasets + # to return a PIL Image + img = Image.fromarray(np.transpose(img, (1, 2, 0))) + + if self.transform is not None: + img = self.transform(img) + + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def __len__(self): + return len(self.data) + + def _check_integrity(self): + root = self.root + md5 = self.split_list[self.split][2] + fpath = os.path.join(root, self.filename) + return check_integrity(fpath, md5) + + def download(self): + md5 = self.split_list[self.split][2] + download_url(self.url, self.root, self.filename, md5) + + def extra_repr(self): + return "Split: {split}".format(**self.__dict__) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py new file mode 100644 index 0000000000..43d8124bd4 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py @@ -0,0 +1,107 @@ +import glob +import os + +from .utils import list_dir +from .folder import make_dataset +from .video_utils import VideoClips +from .vision import VisionDataset + + +class UCF101(VisionDataset): + """ + `UCF101 `_ dataset. + + UCF101 is an action recognition video dataset. + This dataset consider every video as a collection of video clips of fixed size, specified + by ``frames_per_clip``, where the step in frames between each clip is given by + ``step_between_clips``. + + To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5`` + and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two + elements will come from video 1, and the next three elements from video 2. + Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all + frames in a video might be present. + + Internally, it uses a VideoClips object to handle clip creation. + + Args: + root (string): Root directory of the UCF101 Dataset. + annotation_path (str): path to the folder containing the split files + frames_per_clip (int): number of frames in a clip. + step_between_clips (int, optional): number of frames between each clip. + fold (int, optional): which fold to use. Should be between 1 and 3. + train (bool, optional): if ``True``, creates a dataset from the train split, + otherwise from the ``test`` split. + transform (callable, optional): A function/transform that takes in a TxHxWxC video + and returns a transformed version. + + Returns: + video (Tensor[T, H, W, C]): the `T` video frames + audio(Tensor[K, L]): the audio frames, where `K` is the number of channels + and `L` is the number of points + label (int): class of the video clip + """ + + def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1, + frame_rate=None, fold=1, train=True, transform=None, + _precomputed_metadata=None, num_workers=1, _video_width=0, + _video_height=0, _video_min_dimension=0, _audio_samples=0): + super(UCF101, self).__init__(root) + if not 1 <= fold <= 3: + raise ValueError("fold should be between 1 and 3, got {}".format(fold)) + + extensions = ('avi',) + self.fold = fold + self.train = train + + classes = list(sorted(list_dir(root))) + class_to_idx = {classes[i]: i for i in range(len(classes))} + self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None) + self.classes = classes + video_list = [x[0] for x in self.samples] + video_clips = VideoClips( + video_list, + frames_per_clip, + step_between_clips, + frame_rate, + _precomputed_metadata, + num_workers=num_workers, + _video_width=_video_width, + _video_height=_video_height, + _video_min_dimension=_video_min_dimension, + _audio_samples=_audio_samples, + ) + self.video_clips_metadata = video_clips.metadata + self.indices = self._select_fold(video_list, annotation_path, fold, train) + self.video_clips = video_clips.subset(self.indices) + self.transform = transform + + @property + def metadata(self): + return self.video_clips_metadata + + def _select_fold(self, video_list, annotation_path, fold, train): + name = "train" if train else "test" + name = "{}list{:02d}.txt".format(name, fold) + f = os.path.join(annotation_path, name) + selected_files = [] + with open(f, "r") as fid: + data = fid.readlines() + data = [x.strip().split(" ") for x in data] + data = [x[0] for x in data] + selected_files.extend(data) + selected_files = set(selected_files) + indices = [i for i in range(len(video_list)) if video_list[i][len(self.root) + 1:] in selected_files] + return indices + + def __len__(self): + return self.video_clips.num_clips() + + def __getitem__(self, idx): + video, audio, info, video_idx = self.video_clips.get_clip(idx) + label = self.samples[self.indices[video_idx]][1] + + if self.transform is not None: + video = self.transform(video) + + return video, audio, label diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py new file mode 100644 index 0000000000..06f1fd0596 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py @@ -0,0 +1,84 @@ +from PIL import Image +import os +import numpy as np + +from .utils import download_url +from .vision import VisionDataset + + +class USPS(VisionDataset): + """`USPS `_ Dataset. + The data-format is : [label [index:value ]*256 \\n] * num_lines, where ``label`` lies in ``[1, 10]``. + The value for each pixel lies in ``[-1, 1]``. Here we transform the ``label`` into ``[0, 9]`` + and make pixel values in ``[0, 255]``. + + Args: + root (string): Root directory of dataset to store``USPS`` data files. + train (bool, optional): If True, creates dataset from ``usps.bz2``, + otherwise from ``usps.t.bz2``. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + + """ + split_list = { + 'train': [ + "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/usps.bz2", + "usps.bz2", 'ec16c51db3855ca6c91edd34d0e9b197' + ], + 'test': [ + "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/usps.t.bz2", + "usps.t.bz2", '8ea070ee2aca1ac39742fdd1ef5ed118' + ], + } + + def __init__(self, root, train=True, transform=None, target_transform=None, + download=False): + super(USPS, self).__init__(root, transform=transform, + target_transform=target_transform) + split = 'train' if train else 'test' + url, filename, checksum = self.split_list[split] + full_path = os.path.join(self.root, filename) + + if download and not os.path.exists(full_path): + download_url(url, self.root, filename, md5=checksum) + + import bz2 + with bz2.open(full_path) as fp: + raw_data = [l.decode().split() for l in fp.readlines()] + imgs = [[x.split(':')[-1] for x in data[1:]] for data in raw_data] + imgs = np.asarray(imgs, dtype=np.float32).reshape((-1, 16, 16)) + imgs = ((imgs + 1) / 2 * 255).astype(dtype=np.uint8) + targets = [int(d[0]) - 1 for d in raw_data] + + self.data = imgs + self.targets = targets + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is index of the target class. + """ + img, target = self.data[index], int(self.targets[index]) + + # doing this so that it is consistent with all other datasets + # to return a PIL Image + img = Image.fromarray(img, mode='L') + + if self.transform is not None: + img = self.transform(img) + + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def __len__(self): + return len(self.data) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py new file mode 100644 index 0000000000..6689eef649 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py @@ -0,0 +1,282 @@ +import os +import os.path +import hashlib +import gzip +import errno +import tarfile +import zipfile + +import torch +from torch.utils.model_zoo import tqdm + + +def gen_bar_updater(): + pbar = tqdm(total=None) + + def bar_update(count, block_size, total_size): + if pbar.total is None and total_size: + pbar.total = total_size + progress_bytes = count * block_size + pbar.update(progress_bytes - pbar.n) + + return bar_update + + +def calculate_md5(fpath, chunk_size=1024 * 1024): + md5 = hashlib.md5() + with open(fpath, 'rb') as f: + for chunk in iter(lambda: f.read(chunk_size), b''): + md5.update(chunk) + return md5.hexdigest() + + +def check_md5(fpath, md5, **kwargs): + return md5 == calculate_md5(fpath, **kwargs) + + +def check_integrity(fpath, md5=None): + if not os.path.isfile(fpath): + return False + if md5 is None: + return True + return check_md5(fpath, md5) + + +def download_url(url, root, filename=None, md5=None): + """Download a file from a url and place it in root. + + Args: + url (str): URL to download file from + root (str): Directory to place downloaded file in + filename (str, optional): Name to save the file under. If None, use the basename of the URL + md5 (str, optional): MD5 checksum of the download. If None, do not check + """ + import urllib + + root = os.path.expanduser(root) + if not filename: + filename = os.path.basename(url) + fpath = os.path.join(root, filename) + + os.makedirs(root, exist_ok=True) + + # check if file is already present locally + if check_integrity(fpath, md5): + print('Using downloaded and verified file: ' + fpath) + else: # download the file + try: + print('Downloading ' + url + ' to ' + fpath) + urllib.request.urlretrieve( + url, fpath, + reporthook=gen_bar_updater() + ) + except (urllib.error.URLError, IOError) as e: + if url[:5] == 'https': + url = url.replace('https:', 'http:') + print('Failed download. Trying https -> http instead.' + ' Downloading ' + url + ' to ' + fpath) + urllib.request.urlretrieve( + url, fpath, + reporthook=gen_bar_updater() + ) + else: + raise e + # check integrity of downloaded file + if not check_integrity(fpath, md5): + raise RuntimeError("File not found or corrupted.") + + +def list_dir(root, prefix=False): + """List all directories at a given root + + Args: + root (str): Path to directory whose folders need to be listed + prefix (bool, optional): If true, prepends the path to each result, otherwise + only returns the name of the directories found + """ + root = os.path.expanduser(root) + directories = list( + filter( + lambda p: os.path.isdir(os.path.join(root, p)), + os.listdir(root) + ) + ) + + if prefix is True: + directories = [os.path.join(root, d) for d in directories] + + return directories + + +def list_files(root, suffix, prefix=False): + """List all files ending with a suffix at a given root + + Args: + root (str): Path to directory whose folders need to be listed + suffix (str or tuple): Suffix of the files to match, e.g. '.png' or ('.jpg', '.png'). + It uses the Python "str.endswith" method and is passed directly + prefix (bool, optional): If true, prepends the path to each result, otherwise + only returns the name of the files found + """ + root = os.path.expanduser(root) + files = list( + filter( + lambda p: os.path.isfile(os.path.join(root, p)) and p.endswith(suffix), + os.listdir(root) + ) + ) + + if prefix is True: + files = [os.path.join(root, d) for d in files] + + return files + + +def download_file_from_google_drive(file_id, root, filename=None, md5=None): + """Download a Google Drive file from and place it in root. + + Args: + file_id (str): id of file to be downloaded + root (str): Directory to place downloaded file in + filename (str, optional): Name to save the file under. If None, use the id of the file. + md5 (str, optional): MD5 checksum of the download. If None, do not check + """ + # Based on https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url + import requests + url = "https://docs.google.com/uc?export=download" + + root = os.path.expanduser(root) + if not filename: + filename = file_id + fpath = os.path.join(root, filename) + + os.makedirs(root, exist_ok=True) + + if os.path.isfile(fpath) and check_integrity(fpath, md5): + print('Using downloaded and verified file: ' + fpath) + else: + session = requests.Session() + + response = session.get(url, params={'id': file_id}, stream=True) + token = _get_confirm_token(response) + + if token: + params = {'id': file_id, 'confirm': token} + response = session.get(url, params=params, stream=True) + + _save_response_content(response, fpath) + + +def _get_confirm_token(response): + for key, value in response.cookies.items(): + if key.startswith('download_warning'): + return value + + return None + + +def _save_response_content(response, destination, chunk_size=32768): + with open(destination, "wb") as f: + pbar = tqdm(total=None) + progress = 0 + for chunk in response.iter_content(chunk_size): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + progress += len(chunk) + pbar.update(progress - pbar.n) + pbar.close() + + +def _is_tarxz(filename): + return filename.endswith(".tar.xz") + + +def _is_tar(filename): + return filename.endswith(".tar") + + +def _is_targz(filename): + return filename.endswith(".tar.gz") + + +def _is_tgz(filename): + return filename.endswith(".tgz") + + +def _is_gzip(filename): + return filename.endswith(".gz") and not filename.endswith(".tar.gz") + + +def _is_zip(filename): + return filename.endswith(".zip") + + +def extract_archive(from_path, to_path=None, remove_finished=False): + if to_path is None: + to_path = os.path.dirname(from_path) + + if _is_tar(from_path): + with tarfile.open(from_path, 'r') as tar: + tar.extractall(path=to_path) + elif _is_targz(from_path) or _is_tgz(from_path): + with tarfile.open(from_path, 'r:gz') as tar: + tar.extractall(path=to_path) + elif _is_tarxz(from_path): + with tarfile.open(from_path, 'r:xz') as tar: + tar.extractall(path=to_path) + elif _is_gzip(from_path): + to_path = os.path.join(to_path, os.path.splitext(os.path.basename(from_path))[0]) + with open(to_path, "wb") as out_f, gzip.GzipFile(from_path) as zip_f: + out_f.write(zip_f.read()) + elif _is_zip(from_path): + with zipfile.ZipFile(from_path, 'r') as z: + z.extractall(to_path) + else: + raise ValueError("Extraction of {} not supported".format(from_path)) + + if remove_finished: + os.remove(from_path) + + +def download_and_extract_archive(url, download_root, extract_root=None, filename=None, + md5=None, remove_finished=False): + download_root = os.path.expanduser(download_root) + if extract_root is None: + extract_root = download_root + if not filename: + filename = os.path.basename(url) + + download_url(url, download_root, filename, md5) + + archive = os.path.join(download_root, filename) + print("Extracting {} to {}".format(archive, extract_root)) + extract_archive(archive, extract_root, remove_finished) + + +def iterable_to_str(iterable): + return "'" + "', '".join([str(item) for item in iterable]) + "'" + + +def verify_str_arg(value, arg=None, valid_values=None, custom_msg=None): + if not isinstance(value, torch._six.string_classes): + if arg is None: + msg = "Expected type str, but got type {type}." + else: + msg = "Expected type str for argument {arg}, but got type {type}." + msg = msg.format(type=type(value), arg=arg) + raise ValueError(msg) + + if valid_values is None: + return value + + if value not in valid_values: + if custom_msg is not None: + msg = custom_msg + else: + msg = ("Unknown value '{value}' for argument {arg}. " + "Valid values are {{{valid_values}}}.") + msg = msg.format(value=value, arg=arg, + valid_values=iterable_to_str(valid_values)) + raise ValueError(msg) + + return value diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py new file mode 100644 index 0000000000..5c9244e545 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py @@ -0,0 +1,367 @@ +import bisect +import math +from fractions import Fraction + +import torch +from torchvision.io import ( + _probe_video_from_file, + _read_video_from_file, + _read_video_timestamps_from_file, + read_video, + read_video_timestamps, +) + +from .utils import tqdm + + +def pts_convert(pts, timebase_from, timebase_to, round_func=math.floor): + """convert pts between different time bases + Args: + pts: presentation timestamp, float + timebase_from: original timebase. Fraction + timebase_to: new timebase. Fraction + round_func: rounding function. + """ + new_pts = Fraction(pts, 1) * timebase_from / timebase_to + return round_func(new_pts) + + +def unfold(tensor, size, step, dilation=1): + """ + similar to tensor.unfold, but with the dilation + and specialized for 1d tensors + + Returns all consecutive windows of `size` elements, with + `step` between windows. The distance between each element + in a window is given by `dilation`. + """ + assert tensor.dim() == 1 + o_stride = tensor.stride(0) + numel = tensor.numel() + new_stride = (step * o_stride, dilation * o_stride) + new_size = ((numel - (dilation * (size - 1) + 1)) // step + 1, size) + if new_size[0] < 1: + new_size = (0, size) + return torch.as_strided(tensor, new_size, new_stride) + + +class _DummyDataset(object): + """ + Dummy dataset used for DataLoader in VideoClips. + Defined at top level so it can be pickled when forking. + """ + + def __init__(self, x): + self.x = x + + def __len__(self): + return len(self.x) + + def __getitem__(self, idx): + return read_video_timestamps(self.x[idx]) + + +class VideoClips(object): + """ + Given a list of video files, computes all consecutive subvideos of size + `clip_length_in_frames`, where the distance between each subvideo in the + same video is defined by `frames_between_clips`. + If `frame_rate` is specified, it will also resample all the videos to have + the same frame rate, and the clips will refer to this frame rate. + + Creating this instance the first time is time-consuming, as it needs to + decode all the videos in `video_paths`. It is recommended that you + cache the results after instantiation of the class. + + Recreating the clips for different clip lengths is fast, and can be done + with the `compute_clips` method. + + Arguments: + video_paths (List[str]): paths to the video files + clip_length_in_frames (int): size of a clip in number of frames + frames_between_clips (int): step (in frames) between each clip + frame_rate (int, optional): if specified, it will resample the video + so that it has `frame_rate`, and then the clips will be defined + on the resampled video + num_workers (int): how many subprocesses to use for data loading. + 0 means that the data will be loaded in the main process. (default: 0) + """ + + def __init__( + self, + video_paths, + clip_length_in_frames=16, + frames_between_clips=1, + frame_rate=None, + _precomputed_metadata=None, + num_workers=0, + _video_width=0, + _video_height=0, + _video_min_dimension=0, + _video_max_dimension=0, + _audio_samples=0, + _audio_channels=0, + ): + + self.video_paths = video_paths + self.num_workers = num_workers + + # these options are not valid for pyav backend + self._video_width = _video_width + self._video_height = _video_height + self._video_min_dimension = _video_min_dimension + self._video_max_dimension = _video_max_dimension + self._audio_samples = _audio_samples + self._audio_channels = _audio_channels + + if _precomputed_metadata is None: + self._compute_frame_pts() + else: + self._init_from_metadata(_precomputed_metadata) + self.compute_clips(clip_length_in_frames, frames_between_clips, frame_rate) + + def _collate_fn(self, x): + return x + + def _compute_frame_pts(self): + self.video_pts = [] + self.video_fps = [] + + # strategy: use a DataLoader to parallelize read_video_timestamps + # so need to create a dummy dataset first + import torch.utils.data + + dl = torch.utils.data.DataLoader( + _DummyDataset(self.video_paths), + batch_size=16, + num_workers=self.num_workers, + collate_fn=self._collate_fn, + ) + + with tqdm(total=len(dl)) as pbar: + for batch in dl: + pbar.update(1) + clips, fps = list(zip(*batch)) + clips = [torch.as_tensor(c) for c in clips] + self.video_pts.extend(clips) + self.video_fps.extend(fps) + + def _init_from_metadata(self, metadata): + self.video_paths = metadata["video_paths"] + assert len(self.video_paths) == len(metadata["video_pts"]) + self.video_pts = metadata["video_pts"] + assert len(self.video_paths) == len(metadata["video_fps"]) + self.video_fps = metadata["video_fps"] + + @property + def metadata(self): + _metadata = { + "video_paths": self.video_paths, + "video_pts": self.video_pts, + "video_fps": self.video_fps, + } + return _metadata + + def subset(self, indices): + video_paths = [self.video_paths[i] for i in indices] + video_pts = [self.video_pts[i] for i in indices] + video_fps = [self.video_fps[i] for i in indices] + metadata = { + "video_paths": video_paths, + "video_pts": video_pts, + "video_fps": video_fps, + } + return type(self)( + video_paths, + self.num_frames, + self.step, + self.frame_rate, + _precomputed_metadata=metadata, + num_workers=self.num_workers, + _video_width=self._video_width, + _video_height=self._video_height, + _video_min_dimension=self._video_min_dimension, + _video_max_dimension=self._video_max_dimension, + _audio_samples=self._audio_samples, + _audio_channels=self._audio_channels, + ) + + @staticmethod + def compute_clips_for_video(video_pts, num_frames, step, fps, frame_rate): + if fps is None: + # if for some reason the video doesn't have fps (because doesn't have a video stream) + # set the fps to 1. The value doesn't matter, because video_pts is empty anyway + fps = 1 + if frame_rate is None: + frame_rate = fps + total_frames = len(video_pts) * (float(frame_rate) / fps) + idxs = VideoClips._resample_video_idx( + int(math.floor(total_frames)), fps, frame_rate + ) + video_pts = video_pts[idxs] + clips = unfold(video_pts, num_frames, step) + if isinstance(idxs, slice): + idxs = [idxs] * len(clips) + else: + idxs = unfold(idxs, num_frames, step) + return clips, idxs + + def compute_clips(self, num_frames, step, frame_rate=None): + """ + Compute all consecutive sequences of clips from video_pts. + Always returns clips of size `num_frames`, meaning that the + last few frames in a video can potentially be dropped. + + Arguments: + num_frames (int): number of frames for the clip + step (int): distance between two clips + """ + self.num_frames = num_frames + self.step = step + self.frame_rate = frame_rate + self.clips = [] + self.resampling_idxs = [] + for video_pts, fps in zip(self.video_pts, self.video_fps): + clips, idxs = self.compute_clips_for_video( + video_pts, num_frames, step, fps, frame_rate + ) + self.clips.append(clips) + self.resampling_idxs.append(idxs) + clip_lengths = torch.as_tensor([len(v) for v in self.clips]) + self.cumulative_sizes = clip_lengths.cumsum(0).tolist() + + def __len__(self): + return self.num_clips() + + def num_videos(self): + return len(self.video_paths) + + def num_clips(self): + """ + Number of subclips that are available in the video list. + """ + return self.cumulative_sizes[-1] + + def get_clip_location(self, idx): + """ + Converts a flattened representation of the indices into a video_idx, clip_idx + representation. + """ + video_idx = bisect.bisect_right(self.cumulative_sizes, idx) + if video_idx == 0: + clip_idx = idx + else: + clip_idx = idx - self.cumulative_sizes[video_idx - 1] + return video_idx, clip_idx + + @staticmethod + def _resample_video_idx(num_frames, original_fps, new_fps): + step = float(original_fps) / new_fps + if step.is_integer(): + # optimization: if step is integer, don't need to perform + # advanced indexing + step = int(step) + return slice(None, None, step) + idxs = torch.arange(num_frames, dtype=torch.float32) * step + idxs = idxs.floor().to(torch.int64) + return idxs + + def get_clip(self, idx): + """ + Gets a subclip from a list of videos. + + Arguments: + idx (int): index of the subclip. Must be between 0 and num_clips(). + + Returns: + video (Tensor) + audio (Tensor) + info (Dict) + video_idx (int): index of the video in `video_paths` + """ + if idx >= self.num_clips(): + raise IndexError( + "Index {} out of range " + "({} number of clips)".format(idx, self.num_clips()) + ) + video_idx, clip_idx = self.get_clip_location(idx) + video_path = self.video_paths[video_idx] + clip_pts = self.clips[video_idx][clip_idx] + + from torchvision import get_video_backend + + backend = get_video_backend() + + if backend == "pyav": + # check for invalid options + if self._video_width != 0: + raise ValueError("pyav backend doesn't support _video_width != 0") + if self._video_height != 0: + raise ValueError("pyav backend doesn't support _video_height != 0") + if self._video_min_dimension != 0: + raise ValueError( + "pyav backend doesn't support _video_min_dimension != 0" + ) + if self._video_max_dimension != 0: + raise ValueError( + "pyav backend doesn't support _video_max_dimension != 0" + ) + if self._audio_samples != 0: + raise ValueError("pyav backend doesn't support _audio_samples != 0") + + if backend == "pyav": + start_pts = clip_pts[0].item() + end_pts = clip_pts[-1].item() + video, audio, info = read_video(video_path, start_pts, end_pts) + else: + info = _probe_video_from_file(video_path) + video_fps = info.video_fps + audio_fps = None + + video_start_pts = clip_pts[0].item() + video_end_pts = clip_pts[-1].item() + + audio_start_pts, audio_end_pts = 0, -1 + audio_timebase = Fraction(0, 1) + video_timebase = Fraction( + info.video_timebase.numerator, info.video_timebase.denominator + ) + if info.has_audio: + audio_timebase = Fraction( + info.audio_timebase.numerator, info.audio_timebase.denominator + ) + audio_start_pts = pts_convert( + video_start_pts, video_timebase, audio_timebase, math.floor + ) + audio_end_pts = pts_convert( + video_end_pts, video_timebase, audio_timebase, math.ceil + ) + audio_fps = info.audio_sample_rate + video, audio, info = _read_video_from_file( + video_path, + video_width=self._video_width, + video_height=self._video_height, + video_min_dimension=self._video_min_dimension, + video_max_dimension=self._video_max_dimension, + video_pts_range=(video_start_pts, video_end_pts), + video_timebase=video_timebase, + audio_samples=self._audio_samples, + audio_channels=self._audio_channels, + audio_pts_range=(audio_start_pts, audio_end_pts), + audio_timebase=audio_timebase, + ) + + info = {"video_fps": video_fps} + if audio_fps is not None: + info["audio_fps"] = audio_fps + + if self.frame_rate is not None: + resampling_idx = self.resampling_idxs[video_idx][clip_idx] + if isinstance(resampling_idx, torch.Tensor): + resampling_idx = resampling_idx - resampling_idx[0] + video = video[resampling_idx] + info["video_fps"] = self.frame_rate + assert len(video) == self.num_frames, "{} x {}".format( + video.shape, self.num_frames + ) + return video, audio, info, video_idx diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py new file mode 100644 index 0000000000..7ee5a84dfc --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py @@ -0,0 +1,80 @@ +import os +import torch +import torch.utils.data as data + + +class VisionDataset(data.Dataset): + _repr_indent = 4 + + def __init__(self, root, transforms=None, transform=None, target_transform=None): + if isinstance(root, torch._six.string_classes): + root = os.path.expanduser(root) + self.root = root + + has_transforms = transforms is not None + has_separate_transform = transform is not None or target_transform is not None + if has_transforms and has_separate_transform: + raise ValueError("Only transforms or transform/target_transform can " + "be passed as argument") + + # for backwards-compatibility + self.transform = transform + self.target_transform = target_transform + + if has_separate_transform: + transforms = StandardTransform(transform, target_transform) + self.transforms = transforms + + def __getitem__(self, index): + raise NotImplementedError + + def __len__(self): + raise NotImplementedError + + def __repr__(self): + head = "Dataset " + self.__class__.__name__ + body = ["Number of datapoints: {}".format(self.__len__())] + if self.root is not None: + body.append("Root location: {}".format(self.root)) + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return '\n'.join(lines) + + def _format_transform_repr(self, transform, head): + lines = transform.__repr__().splitlines() + return (["{}{}".format(head, lines[0])] + + ["{}{}".format(" " * len(head), line) for line in lines[1:]]) + + def extra_repr(self): + return "" + + +class StandardTransform(object): + def __init__(self, transform=None, target_transform=None): + self.transform = transform + self.target_transform = target_transform + + def __call__(self, input, target): + if self.transform is not None: + input = self.transform(input) + if self.target_transform is not None: + target = self.target_transform(target) + return input, target + + def _format_transform_repr(self, transform, head): + lines = transform.__repr__().splitlines() + return (["{}{}".format(head, lines[0])] + + ["{}{}".format(" " * len(head), line) for line in lines[1:]]) + + def __repr__(self): + body = [self.__class__.__name__] + if self.transform is not None: + body += self._format_transform_repr(self.transform, + "Transform: ") + if self.target_transform is not None: + body += self._format_transform_repr(self.target_transform, + "Target transform: ") + + return '\n'.join(body) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py new file mode 100644 index 0000000000..2be53c4fcc --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py @@ -0,0 +1,242 @@ +import os +import tarfile +import collections +from .vision import VisionDataset +import xml.etree.ElementTree as ET +from PIL import Image +from .utils import download_url, check_integrity, verify_str_arg + +DATASET_YEAR_DICT = { + '2012': { + 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', + 'filename': 'VOCtrainval_11-May-2012.tar', + 'md5': '6cd6e144f989b92b3379bac3b3de84fd', + 'base_dir': os.path.join('VOCdevkit', 'VOC2012') + }, + '2011': { + 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2011/VOCtrainval_25-May-2011.tar', + 'filename': 'VOCtrainval_25-May-2011.tar', + 'md5': '6c3384ef61512963050cb5d687e5bf1e', + 'base_dir': os.path.join('TrainVal', 'VOCdevkit', 'VOC2011') + }, + '2010': { + 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar', + 'filename': 'VOCtrainval_03-May-2010.tar', + 'md5': 'da459979d0c395079b5c75ee67908abb', + 'base_dir': os.path.join('VOCdevkit', 'VOC2010') + }, + '2009': { + 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2009/VOCtrainval_11-May-2009.tar', + 'filename': 'VOCtrainval_11-May-2009.tar', + 'md5': '59065e4b188729180974ef6572f6a212', + 'base_dir': os.path.join('VOCdevkit', 'VOC2009') + }, + '2008': { + 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2008/VOCtrainval_14-Jul-2008.tar', + 'filename': 'VOCtrainval_11-May-2012.tar', + 'md5': '2629fa636546599198acfcfbfcf1904a', + 'base_dir': os.path.join('VOCdevkit', 'VOC2008') + }, + '2007': { + 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar', + 'filename': 'VOCtrainval_06-Nov-2007.tar', + 'md5': 'c52e279531787c972589f7e41ab4ae64', + 'base_dir': os.path.join('VOCdevkit', 'VOC2007') + }, + '2007-test': { + 'url': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar', + 'filename': 'VOCtest_06-Nov-2007.tar', + 'md5': 'b6e924de25625d8de591ea690078ad9f', + 'base_dir': os.path.join('VOCdevkit', 'VOC2007') + } +} + + +class VOCSegmentation(VisionDataset): + """`Pascal VOC `_ Segmentation Dataset. + + Args: + root (string): Root directory of the VOC Dataset. + year (string, optional): The dataset year, supports years 2007 to 2012. + image_set (string, optional): Select the image_set to use, ``train``, ``trainval`` or ``val`` + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__(self, + root, + year='2012', + image_set='train', + download=False, + transform=None, + target_transform=None, + transforms=None): + super(VOCSegmentation, self).__init__(root, transforms, transform, target_transform) + self.year = year + if year == "2007" and image_set == "test": + year = "2007-test" + self.url = DATASET_YEAR_DICT[year]['url'] + self.filename = DATASET_YEAR_DICT[year]['filename'] + self.md5 = DATASET_YEAR_DICT[year]['md5'] + valid_sets = ["train", "trainval", "val"] + if year == "2007-test": + valid_sets.append("test") + self.image_set = verify_str_arg(image_set, "image_set", valid_sets) + base_dir = DATASET_YEAR_DICT[year]['base_dir'] + voc_root = os.path.join(self.root, base_dir) + image_dir = os.path.join(voc_root, 'JPEGImages') + mask_dir = os.path.join(voc_root, 'SegmentationClass') + + if download: + download_extract(self.url, self.root, self.filename, self.md5) + + if not os.path.isdir(voc_root): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + splits_dir = os.path.join(voc_root, 'ImageSets/Segmentation') + + split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt') + + with open(os.path.join(split_f), "r") as f: + file_names = [x.strip() for x in f.readlines()] + + self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names] + self.masks = [os.path.join(mask_dir, x + ".png") for x in file_names] + assert (len(self.images) == len(self.masks)) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is the image segmentation. + """ + img = Image.open(self.images[index]).convert('RGB') + target = Image.open(self.masks[index]) + + if self.transforms is not None: + img, target = self.transforms(img, target) + + return img, target + + def __len__(self): + return len(self.images) + + +class VOCDetection(VisionDataset): + """`Pascal VOC `_ Detection Dataset. + + Args: + root (string): Root directory of the VOC Dataset. + year (string, optional): The dataset year, supports years 2007 to 2012. + image_set (string, optional): Select the image_set to use, ``train``, ``trainval`` or ``val`` + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + (default: alphabetic indexing of VOC's 20 classes). + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, required): A function/transform that takes in the + target and transforms it. + transforms (callable, optional): A function/transform that takes input sample and its target as entry + and returns a transformed version. + """ + + def __init__(self, + root, + year='2012', + image_set='train', + download=False, + transform=None, + target_transform=None, + transforms=None): + super(VOCDetection, self).__init__(root, transforms, transform, target_transform) + self.year = year + if year == "2007" and image_set == "test": + year = "2007-test" + self.url = DATASET_YEAR_DICT[year]['url'] + self.filename = DATASET_YEAR_DICT[year]['filename'] + self.md5 = DATASET_YEAR_DICT[year]['md5'] + valid_sets = ["train", "trainval", "val"] + if year == "2007-test": + valid_sets.append("test") + self.image_set = verify_str_arg(image_set, "image_set", valid_sets) + + base_dir = DATASET_YEAR_DICT[year]['base_dir'] + voc_root = os.path.join(self.root, base_dir) + image_dir = os.path.join(voc_root, 'JPEGImages') + annotation_dir = os.path.join(voc_root, 'Annotations') + + if download: + download_extract(self.url, self.root, self.filename, self.md5) + + if not os.path.isdir(voc_root): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + splits_dir = os.path.join(voc_root, 'ImageSets/Main') + + split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt') + + with open(os.path.join(split_f), "r") as f: + file_names = [x.strip() for x in f.readlines()] + + self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names] + self.annotations = [os.path.join(annotation_dir, x + ".xml") for x in file_names] + assert (len(self.images) == len(self.annotations)) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is a dictionary of the XML tree. + """ + img = Image.open(self.images[index]).convert('RGB') + target = self.parse_voc_xml( + ET.parse(self.annotations[index]).getroot()) + + if self.transforms is not None: + img, target = self.transforms(img, target) + + return img, target + + def __len__(self): + return len(self.images) + + def parse_voc_xml(self, node): + voc_dict = {} + children = list(node) + if children: + def_dic = collections.defaultdict(list) + for dc in map(self.parse_voc_xml, children): + for ind, v in dc.items(): + def_dic[ind].append(v) + if node.tag == 'annotation': + def_dic['object'] = [def_dic['object']] + voc_dict = { + node.tag: + {ind: v[0] if len(v) == 1 else v + for ind, v in def_dic.items()} + } + if node.text: + text = node.text.strip() + if not children: + voc_dict[node.tag] = text + return voc_dict + + +def download_extract(url, root, filename, md5): + download_url(url, root, filename, md5) + with tarfile.open(os.path.join(root, filename), "r") as tar: + tar.extractall(path=root) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py new file mode 100644 index 0000000000..db3356aa67 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py @@ -0,0 +1,58 @@ +_HAS_OPS = False + + +def _register_extensions(): + import os + import importlib + import torch + + # load the custom_op_library and register the custom ops + lib_dir = os.path.dirname(__file__) + loader_details = ( + importlib.machinery.ExtensionFileLoader, + importlib.machinery.EXTENSION_SUFFIXES + ) + + extfinder = importlib.machinery.FileFinder(lib_dir, loader_details) + ext_specs = extfinder.find_spec("_C") + if ext_specs is None: + raise ImportError + torch.ops.load_library(ext_specs.origin) + + +try: + _register_extensions() + _HAS_OPS = True +except (ImportError, OSError): + pass + + +def _check_cuda_version(): + """ + Make sure that CUDA versions match between the pytorch install and torchvision install + """ + if not _HAS_OPS: + return -1 + import torch + _version = torch.ops.torchvision._cuda_version() + if _version != -1 and torch.version.cuda is not None: + tv_version = str(_version) + if int(tv_version) < 10000: + tv_major = int(tv_version[0]) + tv_minor = int(tv_version[2]) + else: + tv_major = int(tv_version[0:2]) + tv_minor = int(tv_version[3]) + t_version = torch.version.cuda + t_version = t_version.split('.') + t_major = int(t_version[0]) + t_minor = int(t_version[1]) + if t_major != tv_major or t_minor != tv_minor: + raise RuntimeError("Detected that PyTorch and torchvision were compiled with different CUDA versions. " + "PyTorch has CUDA Version={}.{} and torchvision has CUDA Version={}.{}. " + "Please reinstall the torchvision that matches your PyTorch install." + .format(t_major, t_minor, tv_major, tv_minor)) + return _version + + +_check_cuda_version() diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/__init__.py new file mode 100644 index 0000000000..cbbf560412 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/__init__.py @@ -0,0 +1,34 @@ +from ._video_opt import ( + Timebase, + VideoMetaData, + _HAS_VIDEO_OPT, + _probe_video_from_file, + _probe_video_from_memory, + _read_video_from_file, + _read_video_from_memory, + _read_video_timestamps_from_file, + _read_video_timestamps_from_memory, +) +from .video import ( + read_video, + read_video_timestamps, + write_video, +) + + +__all__ = [ + "write_video", + "read_video", + "read_video_timestamps", + "_read_video_from_file", + "_read_video_timestamps_from_file", + "_probe_video_from_file", + "_read_video_from_memory", + "_read_video_timestamps_from_memory", + "_probe_video_from_memory", + "_HAS_VIDEO_OPT", + "_read_video_clip_from_memory", + "_read_video_meta_data", + "VideoMetaData", + "Timebase", +] diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py new file mode 100644 index 0000000000..da37c66cfa --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py @@ -0,0 +1,551 @@ + +import importlib +import math +import os +import warnings +from fractions import Fraction +from typing import List, Tuple + +import numpy as np +import torch + + +_HAS_VIDEO_OPT = False + +try: + lib_dir = os.path.join(os.path.dirname(__file__), "..") + + loader_details = ( + importlib.machinery.ExtensionFileLoader, + importlib.machinery.EXTENSION_SUFFIXES + ) + + extfinder = importlib.machinery.FileFinder(lib_dir, loader_details) + ext_specs = extfinder.find_spec("video_reader") + if ext_specs is not None: + torch.ops.load_library(ext_specs.origin) + _HAS_VIDEO_OPT = True +except (ImportError, OSError): + pass + + +default_timebase = Fraction(0, 1) + + +# simple class for torch scripting +# the complex Fraction class from fractions module is not scriptable +@torch.jit.script +class Timebase(object): + __annotations__ = {"numerator": int, "denominator": int} + __slots__ = ["numerator", "denominator"] + + def __init__( + self, + numerator, # type: int + denominator, # type: int + ): + # type: (...) -> None + self.numerator = numerator + self.denominator = denominator + + +@torch.jit.script +class VideoMetaData(object): + __annotations__ = { + "has_video": bool, + "video_timebase": Timebase, + "video_duration": float, + "video_fps": float, + "has_audio": bool, + "audio_timebase": Timebase, + "audio_duration": float, + "audio_sample_rate": float, + } + __slots__ = [ + "has_video", + "video_timebase", + "video_duration", + "video_fps", + "has_audio", + "audio_timebase", + "audio_duration", + "audio_sample_rate", + ] + + def __init__(self): + self.has_video = False + self.video_timebase = Timebase(0, 1) + self.video_duration = 0.0 + self.video_fps = 0.0 + self.has_audio = False + self.audio_timebase = Timebase(0, 1) + self.audio_duration = 0.0 + self.audio_sample_rate = 0.0 + + +def _validate_pts(pts_range): + # type: (List[int]) + if pts_range[1] > 0: + assert ( + pts_range[0] <= pts_range[1] + ), """Start pts should not be smaller than end pts, got + start pts: %d and end pts: %d""" % ( + pts_range[0], + pts_range[1], + ) + + +def _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration): + # type: (torch.Tensor,torch.Tensor,torch.Tensor,torch.Tensor,torch.Tensor,torch.Tensor) -> VideoMetaData + """ + Build update VideoMetaData struct with info about the video + """ + meta = VideoMetaData() + if vtimebase.numel() > 0: + meta.video_timebase = Timebase( + int(vtimebase[0].item()), int(vtimebase[1].item()) + ) + timebase = vtimebase[0].item() / float(vtimebase[1].item()) + if vduration.numel() > 0: + meta.has_video = True + meta.video_duration = float(vduration.item()) * timebase + if vfps.numel() > 0: + meta.video_fps = float(vfps.item()) + if atimebase.numel() > 0: + meta.audio_timebase = Timebase( + int(atimebase[0].item()), int(atimebase[1].item()) + ) + timebase = atimebase[0].item() / float(atimebase[1].item()) + if aduration.numel() > 0: + meta.has_audio = True + meta.audio_duration = float(aduration.item()) * timebase + if asample_rate.numel() > 0: + meta.audio_sample_rate = float(asample_rate.item()) + + return meta + + +def _align_audio_frames(aframes, aframe_pts, audio_pts_range): + # type: (torch.Tensor, torch.Tensor, List[int]) -> torch.Tensor + start, end = aframe_pts[0], aframe_pts[-1] + num_samples = aframes.size(0) + step_per_aframe = float(end - start + 1) / float(num_samples) + s_idx = 0 + e_idx = num_samples + if start < audio_pts_range[0]: + s_idx = int((audio_pts_range[0] - start) / step_per_aframe) + if end > audio_pts_range[1]: + e_idx = int((audio_pts_range[1] - end) / step_per_aframe) + return aframes[s_idx:e_idx, :] + + +def _read_video_from_file( + filename, + seek_frame_margin=0.25, + read_video_stream=True, + video_width=0, + video_height=0, + video_min_dimension=0, + video_max_dimension=0, + video_pts_range=(0, -1), + video_timebase=default_timebase, + read_audio_stream=True, + audio_samples=0, + audio_channels=0, + audio_pts_range=(0, -1), + audio_timebase=default_timebase, +): + """ + Reads a video from a file, returning both the video frames as well as + the audio frames + + Args + ---------- + filename : str + path to the video file + seek_frame_margin: double, optional + seeking frame in the stream is imprecise. Thus, when video_start_pts + is specified, we seek the pts earlier by seek_frame_margin seconds + read_video_stream: int, optional + whether read video stream. If yes, set to 1. Otherwise, 0 + video_width/video_height/video_min_dimension/video_max_dimension: int + together decide the size of decoded frames + - When video_width = 0, video_height = 0, video_min_dimension = 0, + and video_max_dimension = 0, keep the orignal frame resolution + - When video_width = 0, video_height = 0, video_min_dimension != 0, + and video_max_dimension = 0, keep the aspect ratio and resize the + frame so that shorter edge size is video_min_dimension + - When video_width = 0, video_height = 0, video_min_dimension = 0, + and video_max_dimension != 0, keep the aspect ratio and resize + the frame so that longer edge size is video_max_dimension + - When video_width = 0, video_height = 0, video_min_dimension != 0, + and video_max_dimension != 0, resize the frame so that shorter + edge size is video_min_dimension, and longer edge size is + video_max_dimension. The aspect ratio may not be preserved + - When video_width = 0, video_height != 0, video_min_dimension = 0, + and video_max_dimension = 0, keep the aspect ratio and resize + the frame so that frame video_height is $video_height + - When video_width != 0, video_height == 0, video_min_dimension = 0, + and video_max_dimension = 0, keep the aspect ratio and resize + the frame so that frame video_width is $video_width + - When video_width != 0, video_height != 0, video_min_dimension = 0, + and video_max_dimension = 0, resize the frame so that frame + video_width and video_height are set to $video_width and + $video_height, respectively + video_pts_range : list(int), optional + the start and end presentation timestamp of video stream + video_timebase: Fraction, optional + a Fraction rational number which denotes timebase in video stream + read_audio_stream: int, optional + whether read audio stream. If yes, set to 1. Otherwise, 0 + audio_samples: int, optional + audio sampling rate + audio_channels: int optional + audio channels + audio_pts_range : list(int), optional + the start and end presentation timestamp of audio stream + audio_timebase: Fraction, optional + a Fraction rational number which denotes time base in audio stream + + Returns + ------- + vframes : Tensor[T, H, W, C] + the `T` video frames + aframes : Tensor[L, K] + the audio frames, where `L` is the number of points and + `K` is the number of audio_channels + info : Dict + metadata for the video and audio. Can contain the fields video_fps (float) + and audio_fps (int) + """ + _validate_pts(video_pts_range) + _validate_pts(audio_pts_range) + + result = torch.ops.video_reader.read_video_from_file( + filename, + seek_frame_margin, + 0, # getPtsOnly + read_video_stream, + video_width, + video_height, + video_min_dimension, + video_max_dimension, + video_pts_range[0], + video_pts_range[1], + video_timebase.numerator, + video_timebase.denominator, + read_audio_stream, + audio_samples, + audio_channels, + audio_pts_range[0], + audio_pts_range[1], + audio_timebase.numerator, + audio_timebase.denominator, + ) + vframes, _vframe_pts, vtimebase, vfps, vduration, \ + aframes, aframe_pts, atimebase, asample_rate, aduration = ( + result + ) + info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration) + if aframes.numel() > 0: + # when audio stream is found + aframes = _align_audio_frames(aframes, aframe_pts, audio_pts_range) + return vframes, aframes, info + + +def _read_video_timestamps_from_file(filename): + """ + Decode all video- and audio frames in the video. Only pts + (presentation timestamp) is returned. The actual frame pixel data is not + copied. Thus, it is much faster than read_video(...) + """ + result = torch.ops.video_reader.read_video_from_file( + filename, + 0, # seek_frame_margin + 1, # getPtsOnly + 1, # read_video_stream + 0, # video_width + 0, # video_height + 0, # video_min_dimension + 0, # video_max_dimension + 0, # video_start_pts + -1, # video_end_pts + 0, # video_timebase_num + 1, # video_timebase_den + 1, # read_audio_stream + 0, # audio_samples + 0, # audio_channels + 0, # audio_start_pts + -1, # audio_end_pts + 0, # audio_timebase_num + 1, # audio_timebase_den + ) + _vframes, vframe_pts, vtimebase, vfps, vduration, \ + _aframes, aframe_pts, atimebase, asample_rate, aduration = (result) + info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration) + + vframe_pts = vframe_pts.numpy().tolist() + aframe_pts = aframe_pts.numpy().tolist() + return vframe_pts, aframe_pts, info + + +def _probe_video_from_file(filename): + """ + Probe a video file and return VideoMetaData with info about the video + """ + result = torch.ops.video_reader.probe_video_from_file(filename) + vtimebase, vfps, vduration, atimebase, asample_rate, aduration = result + info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration) + return info + + +def _read_video_from_memory( + video_data, # type: torch.Tensor + seek_frame_margin=0.25, # type: float + read_video_stream=1, # type: int + video_width=0, # type: int + video_height=0, # type: int + video_min_dimension=0, # type: int + video_max_dimension=0, # type: int + video_pts_range=(0, -1), # type: List[int] + video_timebase_numerator=0, # type: int + video_timebase_denominator=1, # type: int + read_audio_stream=1, # type: int + audio_samples=0, # type: int + audio_channels=0, # type: int + audio_pts_range=(0, -1), # type: List[int] + audio_timebase_numerator=0, # type: int + audio_timebase_denominator=1, # type: int +): + # type: (...) -> Tuple[torch.Tensor, torch.Tensor] + """ + Reads a video from memory, returning both the video frames as well as + the audio frames + This function is torchscriptable. + + Args + ---------- + video_data : data type could be 1) torch.Tensor, dtype=torch.int8 or 2) python bytes + compressed video content stored in either 1) torch.Tensor 2) python bytes + seek_frame_margin: double, optional + seeking frame in the stream is imprecise. Thus, when video_start_pts is specified, + we seek the pts earlier by seek_frame_margin seconds + read_video_stream: int, optional + whether read video stream. If yes, set to 1. Otherwise, 0 + video_width/video_height/video_min_dimension/video_max_dimension: int + together decide the size of decoded frames + - When video_width = 0, video_height = 0, video_min_dimension = 0, + and video_max_dimension = 0, keep the orignal frame resolution + - When video_width = 0, video_height = 0, video_min_dimension != 0, + and video_max_dimension = 0, keep the aspect ratio and resize the + frame so that shorter edge size is video_min_dimension + - When video_width = 0, video_height = 0, video_min_dimension = 0, + and video_max_dimension != 0, keep the aspect ratio and resize + the frame so that longer edge size is video_max_dimension + - When video_width = 0, video_height = 0, video_min_dimension != 0, + and video_max_dimension != 0, resize the frame so that shorter + edge size is video_min_dimension, and longer edge size is + video_max_dimension. The aspect ratio may not be preserved + - When video_width = 0, video_height != 0, video_min_dimension = 0, + and video_max_dimension = 0, keep the aspect ratio and resize + the frame so that frame video_height is $video_height + - When video_width != 0, video_height == 0, video_min_dimension = 0, + and video_max_dimension = 0, keep the aspect ratio and resize + the frame so that frame video_width is $video_width + - When video_width != 0, video_height != 0, video_min_dimension = 0, + and video_max_dimension = 0, resize the frame so that frame + video_width and video_height are set to $video_width and + $video_height, respectively + video_pts_range : list(int), optional + the start and end presentation timestamp of video stream + video_timebase_numerator / video_timebase_denominator: optional + a rational number which denotes timebase in video stream + read_audio_stream: int, optional + whether read audio stream. If yes, set to 1. Otherwise, 0 + audio_samples: int, optional + audio sampling rate + audio_channels: int optional + audio audio_channels + audio_pts_range : list(int), optional + the start and end presentation timestamp of audio stream + audio_timebase_numerator / audio_timebase_denominator: optional + a rational number which denotes time base in audio stream + + Returns + ------- + vframes : Tensor[T, H, W, C] + the `T` video frames + aframes : Tensor[L, K] + the audio frames, where `L` is the number of points and + `K` is the number of channels + """ + + _validate_pts(video_pts_range) + _validate_pts(audio_pts_range) + + result = torch.ops.video_reader.read_video_from_memory( + video_data, + seek_frame_margin, + 0, # getPtsOnly + read_video_stream, + video_width, + video_height, + video_min_dimension, + video_max_dimension, + video_pts_range[0], + video_pts_range[1], + video_timebase_numerator, + video_timebase_denominator, + read_audio_stream, + audio_samples, + audio_channels, + audio_pts_range[0], + audio_pts_range[1], + audio_timebase_numerator, + audio_timebase_denominator, + ) + + vframes, _vframe_pts, vtimebase, vfps, vduration, \ + aframes, aframe_pts, atimebase, asample_rate, aduration = ( + result + ) + + if aframes.numel() > 0: + # when audio stream is found + aframes = _align_audio_frames(aframes, aframe_pts, audio_pts_range) + + return vframes, aframes + + +def _read_video_timestamps_from_memory(video_data): + """ + Decode all frames in the video. Only pts (presentation timestamp) is returned. + The actual frame pixel data is not copied. Thus, read_video_timestamps(...) + is much faster than read_video(...) + """ + if not isinstance(video_data, torch.Tensor): + video_data = torch.from_numpy(np.frombuffer(video_data, dtype=np.uint8)) + result = torch.ops.video_reader.read_video_from_memory( + video_data, + 0, # seek_frame_margin + 1, # getPtsOnly + 1, # read_video_stream + 0, # video_width + 0, # video_height + 0, # video_min_dimension + 0, # video_max_dimension + 0, # video_start_pts + -1, # video_end_pts + 0, # video_timebase_num + 1, # video_timebase_den + 1, # read_audio_stream + 0, # audio_samples + 0, # audio_channels + 0, # audio_start_pts + -1, # audio_end_pts + 0, # audio_timebase_num + 1, # audio_timebase_den + ) + _vframes, vframe_pts, vtimebase, vfps, vduration, \ + _aframes, aframe_pts, atimebase, asample_rate, aduration = ( + result + ) + info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration) + + vframe_pts = vframe_pts.numpy().tolist() + aframe_pts = aframe_pts.numpy().tolist() + return vframe_pts, aframe_pts, info + + +def _probe_video_from_memory(video_data): + # type: (torch.Tensor) -> VideoMetaData + """ + Probe a video in memory and return VideoMetaData with info about the video + This function is torchscriptable + """ + if not isinstance(video_data, torch.Tensor): + video_data = torch.from_numpy(np.frombuffer(video_data, dtype=np.uint8)) + result = torch.ops.video_reader.probe_video_from_memory(video_data) + vtimebase, vfps, vduration, atimebase, asample_rate, aduration = result + info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration) + return info + + +def _read_video(filename, start_pts=0, end_pts=None, pts_unit="pts"): + if end_pts is None: + end_pts = float("inf") + + if pts_unit == "pts": + warnings.warn( + "The pts_unit 'pts' gives wrong results and will be removed in a " + + "follow-up version. Please use pts_unit 'sec'." + ) + + info = _probe_video_from_file(filename) + + has_video = info.has_video + has_audio = info.has_audio + + def get_pts(time_base): + start_offset = start_pts + end_offset = end_pts + if pts_unit == "sec": + start_offset = int(math.floor(start_pts * (1 / time_base))) + if end_offset != float("inf"): + end_offset = int(math.ceil(end_pts * (1 / time_base))) + if end_offset == float("inf"): + end_offset = -1 + return start_offset, end_offset + + video_pts_range = (0, -1) + video_timebase = default_timebase + if has_video: + video_timebase = Fraction( + info.video_timebase.numerator, info.video_timebase.denominator + ) + video_pts_range = get_pts(video_timebase) + + audio_pts_range = (0, -1) + audio_timebase = default_timebase + if has_audio: + audio_timebase = Fraction( + info.audio_timebase.numerator, info.audio_timebase.denominator + ) + audio_pts_range = get_pts(audio_timebase) + + vframes, aframes, info = _read_video_from_file( + filename, + read_video_stream=True, + video_pts_range=video_pts_range, + video_timebase=video_timebase, + read_audio_stream=True, + audio_pts_range=audio_pts_range, + audio_timebase=audio_timebase, + ) + _info = {} + if has_video: + _info["video_fps"] = info.video_fps + if has_audio: + _info["audio_fps"] = info.audio_sample_rate + + return vframes, aframes, _info + + +def _read_video_timestamps(filename, pts_unit="pts"): + if pts_unit == "pts": + warnings.warn( + "The pts_unit 'pts' gives wrong results and will be removed in a " + + "follow-up version. Please use pts_unit 'sec'." + ) + + pts, _, info = _read_video_timestamps_from_file(filename) + + if pts_unit == "sec": + video_time_base = Fraction( + info.video_timebase.numerator, info.video_timebase.denominator + ) + pts = [x * video_time_base for x in pts] + + video_fps = info.video_fps if info.has_video else None + + return pts, video_fps diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py new file mode 100644 index 0000000000..40d1cfeed8 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py @@ -0,0 +1,349 @@ +import gc +import math +import re +import warnings +from typing import Tuple, List + +import numpy as np +import torch + +from . import _video_opt +from ._video_opt import VideoMetaData + + +try: + import av + + av.logging.set_level(av.logging.ERROR) + if not hasattr(av.video.frame.VideoFrame, "pict_type"): + av = ImportError( + """\ +Your version of PyAV is too old for the necessary video operations in torchvision. +If you are on Python 3.5, you will have to build from source (the conda-forge +packages are not up-to-date). See +https://github.com/mikeboers/PyAV#installation for instructions on how to +install PyAV on your system. +""" + ) +except ImportError: + av = ImportError( + """\ +PyAV is not installed, and is necessary for the video operations in torchvision. +See https://github.com/mikeboers/PyAV#installation for instructions on how to +install PyAV on your system. +""" + ) + + +def _check_av_available(): + if isinstance(av, Exception): + raise av + + +def _av_available(): + return not isinstance(av, Exception) + + +# PyAV has some reference cycles +_CALLED_TIMES = 0 +_GC_COLLECTION_INTERVAL = 10 + + +def write_video(filename, video_array, fps, video_codec="libx264", options=None): + """ + Writes a 4d tensor in [T, H, W, C] format in a video file + + Parameters + ---------- + filename : str + path where the video will be saved + video_array : Tensor[T, H, W, C] + tensor containing the individual frames, as a uint8 tensor in [T, H, W, C] format + fps : Number + frames per second + """ + _check_av_available() + video_array = torch.as_tensor(video_array, dtype=torch.uint8).numpy() + + container = av.open(filename, mode="w") + + stream = container.add_stream(video_codec, rate=fps) + stream.width = video_array.shape[2] + stream.height = video_array.shape[1] + stream.pix_fmt = "yuv420p" if video_codec != "libx264rgb" else "rgb24" + stream.options = options or {} + + for img in video_array: + frame = av.VideoFrame.from_ndarray(img, format="rgb24") + frame.pict_type = "NONE" + for packet in stream.encode(frame): + container.mux(packet) + + # Flush stream + for packet in stream.encode(): + container.mux(packet) + + # Close the file + container.close() + + +def _read_from_stream( + container, start_offset, end_offset, pts_unit, stream, stream_name +): + global _CALLED_TIMES, _GC_COLLECTION_INTERVAL + _CALLED_TIMES += 1 + if _CALLED_TIMES % _GC_COLLECTION_INTERVAL == _GC_COLLECTION_INTERVAL - 1: + gc.collect() + + if pts_unit == "sec": + start_offset = int(math.floor(start_offset * (1 / stream.time_base))) + if end_offset != float("inf"): + end_offset = int(math.ceil(end_offset * (1 / stream.time_base))) + else: + warnings.warn( + "The pts_unit 'pts' gives wrong results and will be removed in a " + + "follow-up version. Please use pts_unit 'sec'." + ) + + frames = {} + should_buffer = False + max_buffer_size = 5 + if stream.type == "video": + # DivX-style packed B-frames can have out-of-order pts (2 frames in a single pkt) + # so need to buffer some extra frames to sort everything + # properly + extradata = stream.codec_context.extradata + # overly complicated way of finding if `divx_packed` is set, following + # https://github.com/FFmpeg/FFmpeg/commit/d5a21172283572af587b3d939eba0091484d3263 + if extradata and b"DivX" in extradata: + # can't use regex directly because of some weird characters sometimes... + pos = extradata.find(b"DivX") + d = extradata[pos:] + o = re.search(br"DivX(\d+)Build(\d+)(\w)", d) + if o is None: + o = re.search(br"DivX(\d+)b(\d+)(\w)", d) + if o is not None: + should_buffer = o.group(3) == b"p" + seek_offset = start_offset + # some files don't seek to the right location, so better be safe here + seek_offset = max(seek_offset - 1, 0) + if should_buffer: + # FIXME this is kind of a hack, but we will jump to the previous keyframe + # so this will be safe + seek_offset = max(seek_offset - max_buffer_size, 0) + try: + # TODO check if stream needs to always be the video stream here or not + container.seek(seek_offset, any_frame=False, backward=True, stream=stream) + except av.AVError: + # TODO add some warnings in this case + # print("Corrupted file?", container.name) + return [] + buffer_count = 0 + try: + for _idx, frame in enumerate(container.decode(**stream_name)): + frames[frame.pts] = frame + if frame.pts >= end_offset: + if should_buffer and buffer_count < max_buffer_size: + buffer_count += 1 + continue + break + except av.AVError: + # TODO add a warning + pass + # ensure that the results are sorted wrt the pts + result = [ + frames[i] for i in sorted(frames) if start_offset <= frames[i].pts <= end_offset + ] + if len(frames) > 0 and start_offset > 0 and start_offset not in frames: + # if there is no frame that exactly matches the pts of start_offset + # add the last frame smaller than start_offset, to guarantee that + # we will have all the necessary data. This is most useful for audio + preceding_frames = [i for i in frames if i < start_offset] + if len(preceding_frames) > 0: + first_frame_pts = max(preceding_frames) + result.insert(0, frames[first_frame_pts]) + return result + + +def _align_audio_frames(aframes, audio_frames, ref_start, ref_end): + start, end = audio_frames[0].pts, audio_frames[-1].pts + total_aframes = aframes.shape[1] + step_per_aframe = (end - start + 1) / total_aframes + s_idx = 0 + e_idx = total_aframes + if start < ref_start: + s_idx = int((ref_start - start) / step_per_aframe) + if end > ref_end: + e_idx = int((ref_end - end) / step_per_aframe) + return aframes[:, s_idx:e_idx] + + +def read_video(filename, start_pts=0, end_pts=None, pts_unit="pts"): + """ + Reads a video from a file, returning both the video frames as well as + the audio frames + + Parameters + ---------- + filename : str + path to the video file + start_pts : int if pts_unit = 'pts', optional + float / Fraction if pts_unit = 'sec', optional + the start presentation time of the video + end_pts : int if pts_unit = 'pts', optional + float / Fraction if pts_unit = 'sec', optional + the end presentation time + pts_unit : str, optional + unit in which start_pts and end_pts values will be interpreted, either 'pts' or 'sec'. Defaults to 'pts'. + + Returns + ------- + vframes : Tensor[T, H, W, C] + the `T` video frames + aframes : Tensor[K, L] + the audio frames, where `K` is the number of channels and `L` is the + number of points + info : Dict + metadata for the video and audio. Can contain the fields video_fps (float) + and audio_fps (int) + """ + + from torchvision import get_video_backend + + if get_video_backend() != "pyav": + return _video_opt._read_video(filename, start_pts, end_pts, pts_unit) + + _check_av_available() + + if end_pts is None: + end_pts = float("inf") + + if end_pts < start_pts: + raise ValueError( + "end_pts should be larger than start_pts, got " + "start_pts={} and end_pts={}".format(start_pts, end_pts) + ) + + info = {} + video_frames = [] + audio_frames = [] + + try: + container = av.open(filename, metadata_errors="ignore") + except av.AVError: + # TODO raise a warning? + pass + else: + if container.streams.video: + video_frames = _read_from_stream( + container, + start_pts, + end_pts, + pts_unit, + container.streams.video[0], + {"video": 0}, + ) + video_fps = container.streams.video[0].average_rate + # guard against potentially corrupted files + if video_fps is not None: + info["video_fps"] = float(video_fps) + + if container.streams.audio: + audio_frames = _read_from_stream( + container, + start_pts, + end_pts, + pts_unit, + container.streams.audio[0], + {"audio": 0}, + ) + info["audio_fps"] = container.streams.audio[0].rate + + container.close() + + vframes = [frame.to_rgb().to_ndarray() for frame in video_frames] + aframes = [frame.to_ndarray() for frame in audio_frames] + + if vframes: + vframes = torch.as_tensor(np.stack(vframes)) + else: + vframes = torch.empty((0, 1, 1, 3), dtype=torch.uint8) + + if aframes: + aframes = np.concatenate(aframes, 1) + aframes = torch.as_tensor(aframes) + aframes = _align_audio_frames(aframes, audio_frames, start_pts, end_pts) + else: + aframes = torch.empty((1, 0), dtype=torch.float32) + + return vframes, aframes, info + + +def _can_read_timestamps_from_packets(container): + extradata = container.streams[0].codec_context.extradata + if extradata is None: + return False + if b"Lavc" in extradata: + return True + return False + + +def read_video_timestamps(filename, pts_unit="pts"): + """ + List the video frames timestamps. + + Note that the function decodes the whole video frame-by-frame. + + Parameters + ---------- + filename : str + path to the video file + pts_unit : str, optional + unit in which timestamp values will be returned either 'pts' or 'sec'. Defaults to 'pts'. + + Returns + ------- + pts : List[int] if pts_unit = 'pts' + List[Fraction] if pts_unit = 'sec' + presentation timestamps for each one of the frames in the video. + video_fps : int + the frame rate for the video + + """ + from torchvision import get_video_backend + + if get_video_backend() != "pyav": + return _video_opt._read_video_timestamps(filename, pts_unit) + + _check_av_available() + + video_frames = [] + video_fps = None + + try: + container = av.open(filename, metadata_errors="ignore") + except av.AVError: + # TODO add a warning + pass + else: + if container.streams.video: + video_stream = container.streams.video[0] + video_time_base = video_stream.time_base + if _can_read_timestamps_from_packets(container): + # fast path + video_frames = [ + x for x in container.demux(video=0) if x.pts is not None + ] + else: + video_frames = _read_from_stream( + container, 0, float("inf"), pts_unit, video_stream, {"video": 0} + ) + video_fps = float(video_stream.average_rate) + container.close() + + pts = [x.pts for x in video_frames] + + if pts_unit == "sec": + pts = [x * video_time_base for x in pts] + + return pts, video_fps diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/__init__.py new file mode 100644 index 0000000000..283e544e98 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/__init__.py @@ -0,0 +1,14 @@ +from .alexnet import * +from .resnet import * +from .vgg import * +from .squeezenet import * +from .inception import * +from .densenet import * +from .googlenet import * +from .mobilenet import * +from .mnasnet import * +from .shufflenetv2 import * +from . import segmentation +from . import detection +from . import video +from . import quantization diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py new file mode 100644 index 0000000000..f4e1cd8450 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py @@ -0,0 +1,83 @@ +from collections import OrderedDict +from typing import Dict, Optional + +from torch import nn + + +class IntermediateLayerGetter(nn.ModuleDict): + """ + Module wrapper that returns intermediate layers from a model + + It has a strong assumption that the modules have been registered + into the model in the same order as they are used. + This means that one should **not** reuse the same nn.Module + twice in the forward if you want this to work. + + Additionally, it is only able to query submodules that are directly + assigned to the model. So if `model` is passed, `model.feature1` can + be returned, but not `model.feature1.layer2`. + + Args: + model (nn.Module): model on which we will extract the features + return_layers (Dict[name, new_name]): a dict containing the names + of the modules for which the activations will be returned as + the key of the dict, and the value of the dict is the name + of the returned activation (which the user can specify). + + Examples:: + + >>> m = torchvision.models.resnet18(pretrained=True) + >>> # extract layer1 and layer3, giving as names `feat1` and feat2` + >>> new_m = torchvision.models._utils.IntermediateLayerGetter(m, + >>> {'layer1': 'feat1', 'layer3': 'feat2'}) + >>> out = new_m(torch.rand(1, 3, 224, 224)) + >>> print([(k, v.shape) for k, v in out.items()]) + >>> [('feat1', torch.Size([1, 64, 56, 56])), + >>> ('feat2', torch.Size([1, 256, 14, 14]))] + """ + + _version = 2 + __annotations__ = { + "return_layers": Dict[str, str], + } + + def __init__(self, model: nn.Module, return_layers: Dict[str, str]) -> None: + if not set(return_layers).issubset([name for name, _ in model.named_children()]): + raise ValueError("return_layers are not present in model") + orig_return_layers = return_layers + return_layers = {str(k): str(v) for k, v in return_layers.items()} + layers = OrderedDict() + for name, module in model.named_children(): + layers[name] = module + if name in return_layers: + del return_layers[name] + if not return_layers: + break + + super().__init__(layers) + self.return_layers = orig_return_layers + + def forward(self, x): + out = OrderedDict() + for name, module in self.items(): + x = module(x) + if name in self.return_layers: + out_name = self.return_layers[name] + out[out_name] = x + return out + + +def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int: + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py new file mode 100644 index 0000000000..291041d7b5 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py @@ -0,0 +1,67 @@ +from collections import OrderedDict + +import torch +from torch import nn +from torch.jit.annotations import Dict + + +class IntermediateLayerGetter(nn.ModuleDict): + """ + Module wrapper that returns intermediate layers from a model + + It has a strong assumption that the modules have been registered + into the model in the same order as they are used. + This means that one should **not** reuse the same nn.Module + twice in the forward if you want this to work. + + Additionally, it is only able to query submodules that are directly + assigned to the model. So if `model` is passed, `model.feature1` can + be returned, but not `model.feature1.layer2`. + + Arguments: + model (nn.Module): model on which we will extract the features + return_layers (Dict[name, new_name]): a dict containing the names + of the modules for which the activations will be returned as + the key of the dict, and the value of the dict is the name + of the returned activation (which the user can specify). + + Examples:: + + >>> m = torchvision.models.resnet18(pretrained=True) + >>> # extract layer1 and layer3, giving as names `feat1` and feat2` + >>> new_m = torchvision.models._utils.IntermediateLayerGetter(m, + >>> {'layer1': 'feat1', 'layer3': 'feat2'}) + >>> out = new_m(torch.rand(1, 3, 224, 224)) + >>> print([(k, v.shape) for k, v in out.items()]) + >>> [('feat1', torch.Size([1, 64, 56, 56])), + >>> ('feat2', torch.Size([1, 256, 14, 14]))] + """ + _version = 2 + __annotations__ = { + "return_layers": Dict[str, str], + } + + def __init__(self, model, return_layers): + if not set(return_layers).issubset([name for name, _ in model.named_children()]): + raise ValueError("return_layers are not present in model") + orig_return_layers = return_layers + return_layers = {str(k): str(v) for k, v in return_layers.items()} + layers = OrderedDict() + for name, module in model.named_children(): + layers[name] = module + if name in return_layers: + del return_layers[name] + if not return_layers: + break + + super(IntermediateLayerGetter, self).__init__(layers) + self.return_layers = orig_return_layers + + def forward(self, x): + out = OrderedDict() + for name, module in self.items(): + x = module(x) + if name in self.return_layers: + out_name = self.return_layers[name] + out[out_name] = x + return out diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py new file mode 100644 index 0000000000..a0126312d1 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py @@ -0,0 +1,65 @@ +import torch +import torch.nn as nn +from .utils import load_state_dict_from_url + + +__all__ = ['AlexNet', 'alexnet'] + + +model_urls = { + 'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth', +} + + +class AlexNet(nn.Module): + + def __init__(self, num_classes=1000): + super(AlexNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(64, 192, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(192, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + ) + + def forward(self, x): + x = self.features(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.classifier(x) + return x + + +def alexnet(pretrained=False, progress=True, **kwargs): + r"""AlexNet model architecture from the + `"One weird trick..." `_ paper. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + model = AlexNet(**kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls['alexnet'], + progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py new file mode 100644 index 0000000000..822dde0925 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py @@ -0,0 +1,279 @@ +import re +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp +from collections import OrderedDict +from .utils import load_state_dict_from_url +from torch import Tensor +from torch.jit.annotations import List + + +__all__ = ['DenseNet', 'densenet121', 'densenet169', 'densenet201', 'densenet161'] + +model_urls = { + 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth', + 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth', + 'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth', + 'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth', +} + + +class _DenseLayer(nn.Module): + def __init__(self, num_input_features, growth_rate, bn_size, drop_rate, memory_efficient=False): + super(_DenseLayer, self).__init__() + self.add_module('norm1', nn.BatchNorm2d(num_input_features)), + self.add_module('relu1', nn.ReLU(inplace=True)), + self.add_module('conv1', nn.Conv2d(num_input_features, bn_size * + growth_rate, kernel_size=1, stride=1, + bias=False)), + self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)), + self.add_module('relu2', nn.ReLU(inplace=True)), + self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate, + kernel_size=3, stride=1, padding=1, + bias=False)), + self.drop_rate = float(drop_rate) + self.memory_efficient = memory_efficient + + def bn_function(self, inputs): + # type: (List[Tensor]) -> Tensor + concated_features = torch.cat(inputs, 1) + bottleneck_output = self.conv1(self.relu1(self.norm1(concated_features))) # noqa: T484 + return bottleneck_output + + # todo: rewrite when torchscript supports any + def any_requires_grad(self, input): + # type: (List[Tensor]) -> bool + for tensor in input: + if tensor.requires_grad: + return True + return False + + @torch.jit.unused # noqa: T484 + def call_checkpoint_bottleneck(self, input): + # type: (List[Tensor]) -> Tensor + def closure(*inputs): + return self.bn_function(*inputs) + + return cp.checkpoint(closure, input) + + @torch.jit._overload_method # noqa: F811 + def forward(self, input): + # type: (List[Tensor]) -> (Tensor) + pass + + @torch.jit._overload_method # noqa: F811 + def forward(self, input): + # type: (Tensor) -> (Tensor) + pass + + # torchscript does not yet support *args, so we overload method + # allowing it to take either a List[Tensor] or single Tensor + def forward(self, input): # noqa: F811 + if isinstance(input, Tensor): + prev_features = [input] + else: + prev_features = input + + if self.memory_efficient and self.any_requires_grad(prev_features): + if torch.jit.is_scripting(): + raise Exception("Memory Efficient not supported in JIT") + + bottleneck_output = self.call_checkpoint_bottleneck(prev_features) + else: + bottleneck_output = self.bn_function(prev_features) + + new_features = self.conv2(self.relu2(self.norm2(bottleneck_output))) + if self.drop_rate > 0: + new_features = F.dropout(new_features, p=self.drop_rate, + training=self.training) + return new_features + + +class _DenseBlock(nn.ModuleDict): + _version = 2 + + def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate, memory_efficient=False): + super(_DenseBlock, self).__init__() + for i in range(num_layers): + layer = _DenseLayer( + num_input_features + i * growth_rate, + growth_rate=growth_rate, + bn_size=bn_size, + drop_rate=drop_rate, + memory_efficient=memory_efficient, + ) + self.add_module('denselayer%d' % (i + 1), layer) + + def forward(self, init_features): + features = [init_features] + for name, layer in self.items(): + new_features = layer(features) + features.append(new_features) + return torch.cat(features, 1) + + +class _Transition(nn.Sequential): + def __init__(self, num_input_features, num_output_features): + super(_Transition, self).__init__() + self.add_module('norm', nn.BatchNorm2d(num_input_features)) + self.add_module('relu', nn.ReLU(inplace=True)) + self.add_module('conv', nn.Conv2d(num_input_features, num_output_features, + kernel_size=1, stride=1, bias=False)) + self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) + + +class DenseNet(nn.Module): + r"""Densenet-BC model class, based on + `"Densely Connected Convolutional Networks" `_ + + Args: + growth_rate (int) - how many filters to add each layer (`k` in paper) + block_config (list of 4 ints) - how many layers in each pooling block + num_init_features (int) - the number of filters to learn in the first convolution layer + bn_size (int) - multiplicative factor for number of bottle neck layers + (i.e. bn_size * k features in the bottleneck layer) + drop_rate (float) - dropout rate after each dense layer + num_classes (int) - number of classification classes + memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, + but slower. Default: *False*. See `"paper" `_ + """ + + def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), + num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000, memory_efficient=False): + + super(DenseNet, self).__init__() + + # First convolution + self.features = nn.Sequential(OrderedDict([ + ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, + padding=3, bias=False)), + ('norm0', nn.BatchNorm2d(num_init_features)), + ('relu0', nn.ReLU(inplace=True)), + ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), + ])) + + # Each denseblock + num_features = num_init_features + for i, num_layers in enumerate(block_config): + block = _DenseBlock( + num_layers=num_layers, + num_input_features=num_features, + bn_size=bn_size, + growth_rate=growth_rate, + drop_rate=drop_rate, + memory_efficient=memory_efficient + ) + self.features.add_module('denseblock%d' % (i + 1), block) + num_features = num_features + num_layers * growth_rate + if i != len(block_config) - 1: + trans = _Transition(num_input_features=num_features, + num_output_features=num_features // 2) + self.features.add_module('transition%d' % (i + 1), trans) + num_features = num_features // 2 + + # Final batch norm + self.features.add_module('norm5', nn.BatchNorm2d(num_features)) + + # Linear layer + self.classifier = nn.Linear(num_features, num_classes) + + # Official init from torch repo. + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.constant_(m.bias, 0) + + def forward(self, x): + features = self.features(x) + out = F.relu(features, inplace=True) + out = F.adaptive_avg_pool2d(out, (1, 1)) + out = torch.flatten(out, 1) + out = self.classifier(out) + return out + + +def _load_state_dict(model, model_url, progress): + # '.'s are no longer allowed in module names, but previous _DenseLayer + # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'. + # They are also in the checkpoints in model_urls. This pattern is used + # to find such keys. + pattern = re.compile( + r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') + + state_dict = load_state_dict_from_url(model_url, progress=progress) + for key in list(state_dict.keys()): + res = pattern.match(key) + if res: + new_key = res.group(1) + res.group(2) + state_dict[new_key] = state_dict[key] + del state_dict[key] + model.load_state_dict(state_dict) + + +def _densenet(arch, growth_rate, block_config, num_init_features, pretrained, progress, + **kwargs): + model = DenseNet(growth_rate, block_config, num_init_features, **kwargs) + if pretrained: + _load_state_dict(model, model_urls[arch], progress) + return model + + +def densenet121(pretrained=False, progress=True, **kwargs): + r"""Densenet-121 model from + `"Densely Connected Convolutional Networks" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, + but slower. Default: *False*. See `"paper" `_ + """ + return _densenet('densenet121', 32, (6, 12, 24, 16), 64, pretrained, progress, + **kwargs) + + +def densenet161(pretrained=False, progress=True, **kwargs): + r"""Densenet-161 model from + `"Densely Connected Convolutional Networks" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, + but slower. Default: *False*. See `"paper" `_ + """ + return _densenet('densenet161', 48, (6, 12, 36, 24), 96, pretrained, progress, + **kwargs) + + +def densenet169(pretrained=False, progress=True, **kwargs): + r"""Densenet-169 model from + `"Densely Connected Convolutional Networks" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, + but slower. Default: *False*. See `"paper" `_ + """ + return _densenet('densenet169', 32, (6, 12, 32, 32), 64, pretrained, progress, + **kwargs) + + +def densenet201(pretrained=False, progress=True, **kwargs): + r"""Densenet-201 model from + `"Densely Connected Convolutional Networks" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, + but slower. Default: *False*. See `"paper" `_ + """ + return _densenet('densenet201', 32, (6, 12, 48, 32), 64, pretrained, progress, + **kwargs) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/__init__.py new file mode 100644 index 0000000000..cdfb6cf23c --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/__init__.py @@ -0,0 +1,5 @@ +from .faster_rcnn import * +from .mask_rcnn import * +from .keypoint_rcnn import * +from .ssd import * +from .ssdlite import * diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py new file mode 100644 index 0000000000..ce70d93be0 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py @@ -0,0 +1,406 @@ +import math +from collections import OrderedDict +from typing import List, Tuple + +import numpy as np +import torch +from torch import Tensor, nn +from torchvision.ops.misc import FrozenBatchNorm2d +import pdb + + +class BalancedPositiveNegativeSampler: + """ + This class samples batches, ensuring that they contain a fixed proportion of positives + """ + + def __init__(self, batch_size_per_image: int, positive_fraction: float) -> None: + """ + Args: + batch_size_per_image (int): number of elements to be selected per image + positive_fraction (float): percentage of positive elements per batch + """ + self.batch_size_per_image = batch_size_per_image + self.positive_fraction = positive_fraction + + def __call__(self, matched_idxs: List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]: + """ + Args: + matched idxs: list of tensors containing -1, 0 or positive values. + Each tensor corresponds to a specific image. + -1 values are ignored, 0 are considered as negatives and > 0 as + positives. + + Returns: + pos_idx (list[tensor]) + neg_idx (list[tensor]) + + Returns two lists of binary masks for each image. + The first list contains the positive elements that were selected, + and the second list the negative example. + """ + pos_idx = [] + neg_idx = [] + for matched_idxs_per_image in matched_idxs: + positive = torch.where(matched_idxs_per_image >= 1)[0] + negative = torch.where(matched_idxs_per_image == 0)[0] + + num_pos = int(self.batch_size_per_image * self.positive_fraction) + # protect against not enough positive examples + num_pos = min(positive.numel(), num_pos) + num_neg = self.batch_size_per_image - num_pos + # protect against not enough negative examples + num_neg = min(negative.numel(), num_neg) + + # randomly select positive and negative examples + perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] + perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] + + pos_idx_per_image = positive[perm1] + neg_idx_per_image = negative[perm2] + + # create binary mask from indices + pos_idx_per_image_mask = torch.zeros_like(matched_idxs_per_image, dtype=torch.uint8) + neg_idx_per_image_mask = torch.zeros_like(matched_idxs_per_image, dtype=torch.uint8) + + pos_idx_per_image_mask[pos_idx_per_image] = 1 + neg_idx_per_image_mask[neg_idx_per_image] = 1 + + pos_idx.append(pos_idx_per_image_mask) + neg_idx.append(neg_idx_per_image_mask) + + return pos_idx, neg_idx + + +# @torch.jit._script_if_tracing +def encode_boxes(reference_boxes: Tensor, proposals: Tensor, weights: Tensor) -> Tensor: + """ + Encode a set of proposals with respect to some + reference boxes + + Args: + reference_boxes (Tensor): reference boxes + proposals (Tensor): boxes to be encoded + weights (Tensor[4]): the weights for ``(x, y, w, h)`` + """ + + # perform some unpacking to make it JIT-fusion friendly + wx = weights[0] + wy = weights[1] + ww = weights[2] + wh = weights[3] + + proposals_x1 = proposals[:, 0].unsqueeze(1) + proposals_y1 = proposals[:, 1].unsqueeze(1) + proposals_x2 = proposals[:, 2].unsqueeze(1) + proposals_y2 = proposals[:, 3].unsqueeze(1) + + reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1) + reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1) + reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1) + reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1) + + # implementation starts here + ex_widths = proposals_x2 - proposals_x1 + ex_heights = proposals_y2 - proposals_y1 + ex_ctr_x = proposals_x1 + 0.5 * ex_widths + ex_ctr_y = proposals_y1 + 0.5 * ex_heights + # ex_widths += 1e-32 + # ex_heights += 1e-32 + + gt_widths = reference_boxes_x2 - reference_boxes_x1 + gt_widths_mask = gt_widths == 0 + gt_widths_temp = gt_widths_mask * ex_widths + gt_widths = gt_widths + gt_widths_temp + + gt_heights = reference_boxes_y2 - reference_boxes_y1 + gt_heights_mask = gt_heights == 0 + gt_heights_temp = gt_heights_mask * ex_heights + gt_heights = gt_heights + gt_heights_temp + + gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths + gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights + + targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths + targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights + + targets_dw = ww * torch.log(gt_widths / ex_widths) + targets_dh = wh * torch.log(gt_heights / ex_heights) + + targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) + return targets + + +class BoxCoder: + """ + This class encodes and decodes a set of bounding boxes into + the representation used for training the regressors. + """ + + def __init__( + self, weights: Tuple[float, float, float, float], bbox_xform_clip: float = math.log(1000.0 / 16) + ) -> None: + """ + Args: + weights (4-element tuple) + bbox_xform_clip (float) + """ + self.weights = weights + self.bbox_xform_clip = bbox_xform_clip + + def encode(self, reference_boxes: List[Tensor], proposals: List[Tensor]) -> List[Tensor]: + boxes_per_image = [len(b) for b in reference_boxes] + reference_boxes = torch.cat(reference_boxes, dim=0) + proposals = torch.cat(proposals, dim=0) + targets = self.encode_single(reference_boxes, proposals) + return targets.split(boxes_per_image, 0) + + def encode_single(self, reference_boxes: Tensor, proposals: Tensor) -> Tensor: + """ + Encode a set of proposals with respect to some + reference boxes + + Args: + reference_boxes (Tensor): reference boxes + proposals (Tensor): boxes to be encoded + """ + dtype = reference_boxes.dtype + device = reference_boxes.device + weights = torch.as_tensor(self.weights, dtype=dtype, device=device) + targets = encode_boxes(reference_boxes, proposals, weights) + + return targets + + def decode(self, rel_codes: Tensor, boxes: List[Tensor]) -> Tensor: + assert isinstance(boxes, (list, tuple)) + assert isinstance(rel_codes, torch.Tensor) + boxes_per_image = [b.size(0) for b in boxes] + concat_boxes = torch.cat(boxes, dim=0) + box_sum = 0 + for val in boxes_per_image: + box_sum += val + if box_sum > 0: + rel_codes = rel_codes.reshape(box_sum, -1) + pred_boxes = self.decode_single(rel_codes, concat_boxes) + if box_sum > 0: + pred_boxes = pred_boxes.reshape(box_sum, -1, 4) + return pred_boxes + + def decode_single(self, rel_codes: Tensor, boxes: Tensor) -> Tensor: + """ + From a set of original boxes and encoded relative box offsets, + get the decoded boxes. + + Args: + rel_codes (Tensor): encoded boxes + boxes (Tensor): reference boxes. + """ + + boxes = boxes.to(rel_codes.dtype) + + widths = boxes[:, 2] - boxes[:, 0] + heights = boxes[:, 3] - boxes[:, 1] + ctr_x = boxes[:, 0] + 0.5 * widths + ctr_y = boxes[:, 1] + 0.5 * heights + + wx, wy, ww, wh = self.weights + dx = rel_codes[:, 0::4] / wx + dy = rel_codes[:, 1::4] / wy + dw = rel_codes[:, 2::4] / ww + dh = rel_codes[:, 3::4] / wh + + # Prevent sending too large values into torch.exp() + dw = torch.clamp(dw, max=self.bbox_xform_clip) + dh = torch.clamp(dh, max=self.bbox_xform_clip) + + pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] + pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] + pred_w = torch.exp(dw) * widths[:, None] + pred_h = torch.exp(dh) * heights[:, None] + + # Distance from center to box's corner. + c_to_c_h = torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h + c_to_c_w = torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w + + pred_boxes1 = pred_ctr_x - c_to_c_w + pred_boxes2 = pred_ctr_y - c_to_c_h + pred_boxes3 = pred_ctr_x + c_to_c_w + pred_boxes4 = pred_ctr_y + c_to_c_h + pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1) + return pred_boxes + + +class Matcher: + """ + This class assigns to each predicted "element" (e.g., a box) a ground-truth + element. Each predicted element will have exactly zero or one matches; each + ground-truth element may be assigned to zero or more predicted elements. + + Matching is based on the MxN match_quality_matrix, that characterizes how well + each (ground-truth, predicted)-pair match. For example, if the elements are + boxes, the matrix may contain box IoU overlap values. + + The matcher returns a tensor of size N containing the index of the ground-truth + element m that matches to prediction n. If there is no match, a negative value + is returned. + """ + + BELOW_LOW_THRESHOLD = -1 + BETWEEN_THRESHOLDS = -2 + + __annotations__ = { + "BELOW_LOW_THRESHOLD": int, + "BETWEEN_THRESHOLDS": int, + } + + def __init__(self, high_threshold: float, low_threshold: float, allow_low_quality_matches: bool = False) -> None: + """ + Args: + high_threshold (float): quality values greater than or equal to + this value are candidate matches. + low_threshold (float): a lower quality threshold used to stratify + matches into three levels: + 1) matches >= high_threshold + 2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold) + 3) BELOW_LOW_THRESHOLD matches in [0, low_threshold) + allow_low_quality_matches (bool): if True, produce additional matches + for predictions that have only low-quality match candidates. See + set_low_quality_matches_ for more details. + """ + self.BELOW_LOW_THRESHOLD = -1 + self.BETWEEN_THRESHOLDS = -2 + assert low_threshold <= high_threshold + self.high_threshold = high_threshold + self.low_threshold = low_threshold + self.allow_low_quality_matches = allow_low_quality_matches + + def __call__(self, match_quality_matrix: Tensor) -> Tensor: + """ + Args: + match_quality_matrix (Tensor[float]): an MxN tensor, containing the + pairwise quality between M ground-truth elements and N predicted elements. + + Returns: + matches (Tensor[int64]): an N tensor where N[i] is a matched gt in + [0, M - 1] or a negative value indicating that prediction i could not + be matched. + """ + if match_quality_matrix.numel() == 0: + # empty targets or proposals not supported during training + if match_quality_matrix.shape[0] == 0: + raise ValueError("No ground-truth boxes available for one of the images during training") + else: + raise ValueError("No proposal boxes available for one of the images during training") + + # match_quality_matrix is M (gt) x N (predicted) + # Max over gt elements (dim 0) to find best gt candidate for each prediction + matched_vals, matches = match_quality_matrix.max(dim=0) + if self.allow_low_quality_matches: + all_matches = matches.clone() + else: + all_matches = None # type: ignore[assignment] + + # Assign candidate matches with low quality to negative (unassigned) values + below_low_threshold = matched_vals < self.low_threshold + between_thresholds = (matched_vals >= self.low_threshold) & (matched_vals < self.high_threshold) + matches[below_low_threshold] = self.BELOW_LOW_THRESHOLD + matches[between_thresholds] = self.BETWEEN_THRESHOLDS + + if self.allow_low_quality_matches: + assert all_matches is not None + self.set_low_quality_matches_(matches, all_matches, match_quality_matrix) + + return matches + + def set_low_quality_matches_(self, matches: Tensor, all_matches: Tensor, match_quality_matrix: Tensor) -> None: + """ + Produce additional matches for predictions that have only low-quality matches. + Specifically, for each ground-truth find the set of predictions that have + maximum overlap with it (including ties); for each prediction in that set, if + it is unmatched, then match it to the ground-truth with which it has the highest + quality value. + """ + # For each gt, find the prediction with which it has highest quality + highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) + # Find highest quality match available, even if it is low, including ties + gt_pred_pairs_of_highest_quality = torch.where(match_quality_matrix == highest_quality_foreach_gt[:, None]) + # Example gt_pred_pairs_of_highest_quality: + # tensor([[ 0, 39796], + # [ 1, 32055], + # [ 1, 32070], + # [ 2, 39190], + # [ 2, 40255], + # [ 3, 40390], + # [ 3, 41455], + # [ 4, 45470], + # [ 5, 45325], + # [ 5, 46390]]) + # Each row is a (gt index, prediction index) + # Note how gt items 1, 2, 3, and 5 each have two ties + + pred_inds_to_update = gt_pred_pairs_of_highest_quality[1] + matches[pred_inds_to_update] = all_matches[pred_inds_to_update] + + +class SSDMatcher(Matcher): + def __init__(self, threshold: float) -> None: + super().__init__(threshold, threshold, allow_low_quality_matches=False) + + def __call__(self, match_quality_matrix: Tensor) -> Tensor: + matches = super().__call__(match_quality_matrix) + + # For each gt, find the prediction with which it has the highest quality + _, highest_quality_pred_foreach_gt = match_quality_matrix.max(dim=1) + matches[highest_quality_pred_foreach_gt] = torch.arange( + highest_quality_pred_foreach_gt.size(0), dtype=torch.int64, device=highest_quality_pred_foreach_gt.device + ) + + return matches + + +def overwrite_eps(model: nn.Module, eps: float) -> None: + """ + This method overwrites the default eps values of all the + FrozenBatchNorm2d layers of the model with the provided value. + This is necessary to address the BC-breaking change introduced + by the bug-fix at pytorch/vision#2933. The overwrite is applied + only when the pretrained weights are loaded to maintain compatibility + with previous versions. + + Args: + model (nn.Module): The model on which we perform the overwrite. + eps (float): The new value of eps. + """ + for module in model.modules(): + if isinstance(module, FrozenBatchNorm2d): + module.eps = eps + + +def retrieve_out_channels(model: nn.Module, size: Tuple[int, int]) -> List[int]: + """ + This method retrieves the number of output channels of a specific model. + + Args: + model (nn.Module): The model for which we estimate the out_channels. + It should return a single Tensor or an OrderedDict[Tensor]. + size (Tuple[int, int]): The size (wxh) of the input. + + Returns: + out_channels (List[int]): A list of the output channels of the model. + """ + in_training = model.training + model.eval() + + with torch.no_grad(): + # Use dummy data to retrieve the feature map sizes to avoid hard-coding their values + device = next(model.parameters()).device + tmp_img = torch.zeros((1, 3, size[1], size[0]), device=device) + features = model(tmp_img) + if isinstance(features, torch.Tensor): + features = OrderedDict([("0", features)]) + out_channels = [x.size(1) for x in features.values()] + + if in_training: + model.train() + + return out_channels diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py new file mode 100644 index 0000000000..c48576328d --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py @@ -0,0 +1,348 @@ +import math + +import torch +from torch.jit.annotations import List, Tuple +from torch import Tensor +import torchvision + + +# TODO: https://github.com/pytorch/pytorch/issues/26727 +def zeros_like(tensor, dtype): + # type: (Tensor, int) -> Tensor + return torch.zeros_like(tensor, dtype=dtype, layout=tensor.layout, + device=tensor.device, pin_memory=tensor.is_pinned()) + + +@torch.jit.script +class BalancedPositiveNegativeSampler(object): + """ + This class samples batches, ensuring that they contain a fixed proportion of positives + """ + + def __init__(self, batch_size_per_image, positive_fraction): + # type: (int, float) + """ + Arguments: + batch_size_per_image (int): number of elements to be selected per image + positive_fraction (float): percentace of positive elements per batch + """ + self.batch_size_per_image = batch_size_per_image + self.positive_fraction = positive_fraction + + def __call__(self, matched_idxs): + # type: (List[Tensor]) + """ + Arguments: + matched idxs: list of tensors containing -1, 0 or positive values. + Each tensor corresponds to a specific image. + -1 values are ignored, 0 are considered as negatives and > 0 as + positives. + + Returns: + pos_idx (list[tensor]) + neg_idx (list[tensor]) + + Returns two lists of binary masks for each image. + The first list contains the positive elements that were selected, + and the second list the negative example. + """ + pos_idx = [] + neg_idx = [] + for matched_idxs_per_image in matched_idxs: + positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) + negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) + + num_pos = int(self.batch_size_per_image * self.positive_fraction) + # protect against not enough positive examples + num_pos = min(positive.numel(), num_pos) + num_neg = self.batch_size_per_image - num_pos + # protect against not enough negative examples + num_neg = min(negative.numel(), num_neg) + + # randomly select positive and negative examples + perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] + perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] + + pos_idx_per_image = positive[perm1] + neg_idx_per_image = negative[perm2] + + # create binary mask from indices + pos_idx_per_image_mask = zeros_like( + matched_idxs_per_image, dtype=torch.uint8 + ) + neg_idx_per_image_mask = zeros_like( + matched_idxs_per_image, dtype=torch.uint8 + ) + + pos_idx_per_image_mask[pos_idx_per_image] = torch.tensor(1, dtype=torch.uint8) + neg_idx_per_image_mask[neg_idx_per_image] = torch.tensor(1, dtype=torch.uint8) + + pos_idx.append(pos_idx_per_image_mask) + neg_idx.append(neg_idx_per_image_mask) + + return pos_idx, neg_idx + + +@torch.jit.script +def encode_boxes(reference_boxes, proposals, weights): + # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor + """ + Encode a set of proposals with respect to some + reference boxes + + Arguments: + reference_boxes (Tensor): reference boxes + proposals (Tensor): boxes to be encoded + """ + + # perform some unpacking to make it JIT-fusion friendly + wx = weights[0] + wy = weights[1] + ww = weights[2] + wh = weights[3] + + proposals_x1 = proposals[:, 0].unsqueeze(1) + proposals_y1 = proposals[:, 1].unsqueeze(1) + proposals_x2 = proposals[:, 2].unsqueeze(1) + proposals_y2 = proposals[:, 3].unsqueeze(1) + + reference_boxes_x1 = reference_boxes[:, 0].unsqueeze(1) + reference_boxes_y1 = reference_boxes[:, 1].unsqueeze(1) + reference_boxes_x2 = reference_boxes[:, 2].unsqueeze(1) + reference_boxes_y2 = reference_boxes[:, 3].unsqueeze(1) + + # implementation starts here + ex_widths = proposals_x2 - proposals_x1 + ex_heights = proposals_y2 - proposals_y1 + ex_ctr_x = proposals_x1 + 0.5 * ex_widths + ex_ctr_y = proposals_y1 + 0.5 * ex_heights + + gt_widths = reference_boxes_x2 - reference_boxes_x1 + gt_heights = reference_boxes_y2 - reference_boxes_y1 + gt_ctr_x = reference_boxes_x1 + 0.5 * gt_widths + gt_ctr_y = reference_boxes_y1 + 0.5 * gt_heights + + targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths + targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights + targets_dw = ww * torch.log(gt_widths / ex_widths) + targets_dh = wh * torch.log(gt_heights / ex_heights) + + targets = torch.cat((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) + return targets + + +@torch.jit.script +class BoxCoder(object): + """ + This class encodes and decodes a set of bounding boxes into + the representation used for training the regressors. + """ + + def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): + # type: (Tuple[float, float, float, float], float) + """ + Arguments: + weights (4-element tuple) + bbox_xform_clip (float) + """ + self.weights = weights + self.bbox_xform_clip = bbox_xform_clip + + def encode(self, reference_boxes, proposals): + # type: (List[Tensor], List[Tensor]) + boxes_per_image = [len(b) for b in reference_boxes] + reference_boxes = torch.cat(reference_boxes, dim=0) + proposals = torch.cat(proposals, dim=0) + targets = self.encode_single(reference_boxes, proposals) + return targets.split(boxes_per_image, 0) + + def encode_single(self, reference_boxes, proposals): + """ + Encode a set of proposals with respect to some + reference boxes + + Arguments: + reference_boxes (Tensor): reference boxes + proposals (Tensor): boxes to be encoded + """ + dtype = reference_boxes.dtype + device = reference_boxes.device + weights = torch.as_tensor(self.weights, dtype=dtype, device=device) + targets = encode_boxes(reference_boxes, proposals, weights) + + return targets + + def decode(self, rel_codes, boxes): + # type: (Tensor, List[Tensor]) + assert isinstance(boxes, (list, tuple)) + assert isinstance(rel_codes, torch.Tensor) + boxes_per_image = [b.size(0) for b in boxes] + concat_boxes = torch.cat(boxes, dim=0) + box_sum = 0 + for val in boxes_per_image: + box_sum += val + pred_boxes = self.decode_single( + rel_codes.reshape(box_sum, -1), concat_boxes + ) + return pred_boxes.reshape(box_sum, -1, 4) + + def decode_single(self, rel_codes, boxes): + """ + From a set of original boxes and encoded relative box offsets, + get the decoded boxes. + + Arguments: + rel_codes (Tensor): encoded boxes + boxes (Tensor): reference boxes. + """ + + boxes = boxes.to(rel_codes.dtype) + + widths = boxes[:, 2] - boxes[:, 0] + heights = boxes[:, 3] - boxes[:, 1] + ctr_x = boxes[:, 0] + 0.5 * widths + ctr_y = boxes[:, 1] + 0.5 * heights + + wx, wy, ww, wh = self.weights + dx = rel_codes[:, 0::4] / wx + dy = rel_codes[:, 1::4] / wy + dw = rel_codes[:, 2::4] / ww + dh = rel_codes[:, 3::4] / wh + + # Prevent sending too large values into torch.exp() + dw = torch.clamp(dw, max=self.bbox_xform_clip) + dh = torch.clamp(dh, max=self.bbox_xform_clip) + + pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] + pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] + pred_w = torch.exp(dw) * widths[:, None] + pred_h = torch.exp(dh) * heights[:, None] + + pred_boxes1 = pred_ctr_x - torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w + pred_boxes2 = pred_ctr_y - torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h + pred_boxes3 = pred_ctr_x + torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w + pred_boxes4 = pred_ctr_y + torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h + pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1) + return pred_boxes + + +@torch.jit.script +class Matcher(object): + """ + This class assigns to each predicted "element" (e.g., a box) a ground-truth + element. Each predicted element will have exactly zero or one matches; each + ground-truth element may be assigned to zero or more predicted elements. + + Matching is based on the MxN match_quality_matrix, that characterizes how well + each (ground-truth, predicted)-pair match. For example, if the elements are + boxes, the matrix may contain box IoU overlap values. + + The matcher returns a tensor of size N containing the index of the ground-truth + element m that matches to prediction n. If there is no match, a negative value + is returned. + """ + + BELOW_LOW_THRESHOLD = -1 + BETWEEN_THRESHOLDS = -2 + + __annotations__ = { + 'BELOW_LOW_THRESHOLD': int, + 'BETWEEN_THRESHOLDS': int, + } + + def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False): + # type: (float, float, bool) + """ + Args: + high_threshold (float): quality values greater than or equal to + this value are candidate matches. + low_threshold (float): a lower quality threshold used to stratify + matches into three levels: + 1) matches >= high_threshold + 2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold) + 3) BELOW_LOW_THRESHOLD matches in [0, low_threshold) + allow_low_quality_matches (bool): if True, produce additional matches + for predictions that have only low-quality match candidates. See + set_low_quality_matches_ for more details. + """ + self.BELOW_LOW_THRESHOLD = -1 + self.BETWEEN_THRESHOLDS = -2 + assert low_threshold <= high_threshold + self.high_threshold = high_threshold + self.low_threshold = low_threshold + self.allow_low_quality_matches = allow_low_quality_matches + + def __call__(self, match_quality_matrix): + """ + Args: + match_quality_matrix (Tensor[float]): an MxN tensor, containing the + pairwise quality between M ground-truth elements and N predicted elements. + + Returns: + matches (Tensor[int64]): an N tensor where N[i] is a matched gt in + [0, M - 1] or a negative value indicating that prediction i could not + be matched. + """ + if match_quality_matrix.numel() == 0: + # empty targets or proposals not supported during training + if match_quality_matrix.shape[0] == 0: + raise ValueError( + "No ground-truth boxes available for one of the images " + "during training") + else: + raise ValueError( + "No proposal boxes available for one of the images " + "during training") + + # match_quality_matrix is M (gt) x N (predicted) + # Max over gt elements (dim 0) to find best gt candidate for each prediction + matched_vals, matches = match_quality_matrix.max(dim=0) + if self.allow_low_quality_matches: + all_matches = matches.clone() + else: + all_matches = None + + # Assign candidate matches with low quality to negative (unassigned) values + below_low_threshold = matched_vals < self.low_threshold + between_thresholds = (matched_vals >= self.low_threshold) & ( + matched_vals < self.high_threshold + ) + matches[below_low_threshold] = torch.tensor(self.BELOW_LOW_THRESHOLD) + matches[between_thresholds] = torch.tensor(self.BETWEEN_THRESHOLDS) + + if self.allow_low_quality_matches: + assert all_matches is not None + self.set_low_quality_matches_(matches, all_matches, match_quality_matrix) + + return matches + + def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix): + """ + Produce additional matches for predictions that have only low-quality matches. + Specifically, for each ground-truth find the set of predictions that have + maximum overlap with it (including ties); for each prediction in that set, if + it is unmatched, then match it to the ground-truth with which it has the highest + quality value. + """ + # For each gt, find the prediction with which it has highest quality + highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) + # Find highest quality match available, even if it is low, including ties + gt_pred_pairs_of_highest_quality = torch.nonzero( + match_quality_matrix == highest_quality_foreach_gt[:, None] + ) + # Example gt_pred_pairs_of_highest_quality: + # tensor([[ 0, 39796], + # [ 1, 32055], + # [ 1, 32070], + # [ 2, 39190], + # [ 2, 40255], + # [ 3, 40390], + # [ 3, 41455], + # [ 4, 45470], + # [ 5, 45325], + # [ 5, 46390]]) + # Each row is a (gt index, prediction index) + # Note how gt items 1, 2, 3, and 5 each have two ties + + pred_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1] + matches[pred_inds_to_update] = all_matches[pred_inds_to_update] diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py new file mode 100644 index 0000000000..1d6298eabe --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py @@ -0,0 +1,279 @@ +import math +from typing import List, Optional + +import torch +from torch import nn, Tensor + +from .image_list import ImageList +import numpy as np + +class AnchorGenerator(nn.Module): + """ + Module that generates anchors for a set of feature maps and + image sizes. + + The module support computing anchors at multiple sizes and aspect ratios + per feature map. This module assumes aspect ratio = height / width for + each anchor. + + sizes and aspect_ratios should have the same number of elements, and it should + correspond to the number of feature maps. + + sizes[i] and aspect_ratios[i] can have an arbitrary number of elements, + and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors + per spatial location for feature map i. + + Args: + sizes (Tuple[Tuple[int]]): + aspect_ratios (Tuple[Tuple[float]]): + """ + + __annotations__ = { + "cell_anchors": List[torch.Tensor], + } + + def __init__( + self, + sizes=((128, 256, 512),), + aspect_ratios=((0.5, 1.0, 2.0),), + ): + super().__init__() + + if not isinstance(sizes[0], (list, tuple)): + # TODO change this + sizes = tuple((s,) for s in sizes) + if not isinstance(aspect_ratios[0], (list, tuple)): + aspect_ratios = (aspect_ratios,) * len(sizes) + + assert len(sizes) == len(aspect_ratios) + + self.sizes = sizes + self.aspect_ratios = aspect_ratios + self.cell_anchors = [ + self.generate_anchors(size, aspect_ratio) for size, aspect_ratio in zip(sizes, aspect_ratios) + ] + + # TODO: https://github.com/pytorch/pytorch/issues/26792 + # For every (aspect_ratios, scales) combination, output a zero-centered anchor with those values. + # (scales, aspect_ratios) are usually an element of zip(self.scales, self.aspect_ratios) + # This method assumes aspect ratio = height / width for an anchor. + def generate_anchors( + self, + scales: List[int], + aspect_ratios: List[float], + dtype: torch.dtype = torch.float32, + device: torch.device = torch.device("cpu"), + ): + scales = torch.as_tensor(scales, dtype=dtype, device=device) + aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device) + h_ratios = torch.sqrt(aspect_ratios) + w_ratios = 1 / h_ratios + + ws = (w_ratios[:, None] * scales[None, :]).view(-1) + hs = (h_ratios[:, None] * scales[None, :]).view(-1) + + base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2 + return base_anchors.round() + + def set_cell_anchors(self, dtype: torch.dtype, device: torch.device): + self.cell_anchors = [cell_anchor.to(dtype=dtype, device=device) for cell_anchor in self.cell_anchors] + + def num_anchors_per_location(self): + return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)] + + # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2), + # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a. + def grid_anchors(self, grid_sizes: List[List[int]], strides: List[List[Tensor]]) -> List[Tensor]: + anchors = [] + cell_anchors = self.cell_anchors + assert cell_anchors is not None + + if not (len(grid_sizes) == len(strides) == len(cell_anchors)): + raise ValueError( + "Anchors should be Tuple[Tuple[int]] because each feature " + "map could potentially have different sizes and aspect ratios. " + "There needs to be a match between the number of " + "feature maps passed and the number of sizes / aspect ratios specified." + ) + + for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors): + grid_height, grid_width = size + stride_height, stride_width = stride + device = base_anchors.device + + # For output anchor, compute [x_center, y_center, x_center, y_center] + shifts_x = torch.arange(0, grid_width, dtype=torch.int32, device=device) * stride_width + shifts_y = torch.arange(0, grid_height, dtype=torch.int32, device=device) * stride_height + # shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x, indexing="ij") + shift_y, shift_x = np.meshgrid(shifts_y, shifts_x, indexing="ij") + shift_y = torch.from_numpy(shift_y) + shift_x = torch.from_numpy(shift_x) + + shift_x = shift_x.reshape(-1) + shift_y = shift_y.reshape(-1) + shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1) + + # For every (base anchor, output anchor) pair, + # offset each zero-centered base anchor by the center of the output anchor. + anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4)) + + return anchors + + def forward(self, image_list: ImageList, feature_maps: List[Tensor]) -> List[Tensor]: + grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps] + image_size = image_list.tensors.shape[-2:] + dtype, device = feature_maps[0].dtype, feature_maps[0].device + strides = [ + [ + torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device), + torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device), + ] + for g in grid_sizes + ] + self.set_cell_anchors(dtype, device) + anchors_over_all_feature_maps = self.grid_anchors(grid_sizes, strides) + anchors: List[List[torch.Tensor]] = [] + for _ in range(len(image_list.image_sizes)): + anchors_in_image = [anchors_per_feature_map for anchors_per_feature_map in anchors_over_all_feature_maps] + anchors.append(anchors_in_image) + anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors] + return anchors + + +class DefaultBoxGenerator(nn.Module): + """ + This module generates the default boxes of SSD for a set of feature maps and image sizes. + + Args: + aspect_ratios (List[List[int]]): A list with all the aspect ratios used in each feature map. + min_ratio (float): The minimum scale :math:`\text{s}_{\text{min}}` of the default boxes used in the estimation + of the scales of each feature map. It is used only if the ``scales`` parameter is not provided. + max_ratio (float): The maximum scale :math:`\text{s}_{\text{max}}` of the default boxes used in the estimation + of the scales of each feature map. It is used only if the ``scales`` parameter is not provided. + scales (List[float]], optional): The scales of the default boxes. If not provided it will be estimated using + the ``min_ratio`` and ``max_ratio`` parameters. + steps (List[int]], optional): It's a hyper-parameter that affects the tiling of defalt boxes. If not provided + it will be estimated from the data. + clip (bool): Whether the standardized values of default boxes should be clipped between 0 and 1. The clipping + is applied while the boxes are encoded in format ``(cx, cy, w, h)``. + """ + + def __init__( + self, + aspect_ratios: List[List[int]], + min_ratio: float = 0.15, + max_ratio: float = 0.9, + scales: Optional[List[float]] = None, + steps: Optional[List[int]] = None, + clip: bool = True, + ): + super().__init__() + if steps is not None: + assert len(aspect_ratios) == len(steps) + self.aspect_ratios = aspect_ratios + self.steps = steps + self.clip = clip + num_outputs = len(aspect_ratios) + + # Estimation of default boxes scales + if scales is None: + if num_outputs > 1: + range_ratio = max_ratio - min_ratio + self.scales = [min_ratio + range_ratio * k / (num_outputs - 1.0) for k in range(num_outputs)] + self.scales.append(1.0) + else: + self.scales = [min_ratio, max_ratio] + else: + self.scales = scales + + self._wh_pairs = self._generate_wh_pairs(num_outputs) + + def _generate_wh_pairs( + self, num_outputs: int, dtype: torch.dtype = torch.float32, device: torch.device = torch.device("cpu") + ) -> List[Tensor]: + _wh_pairs: List[Tensor] = [] + for k in range(num_outputs): + # Adding the 2 default width-height pairs for aspect ratio 1 and scale s'k + s_k = self.scales[k] + s_prime_k = math.sqrt(self.scales[k] * self.scales[k + 1]) + wh_pairs = [[s_k, s_k], [s_prime_k, s_prime_k]] + + # Adding 2 pairs for each aspect ratio of the feature map k + for ar in self.aspect_ratios[k]: + sq_ar = math.sqrt(ar) + w = self.scales[k] * sq_ar + h = self.scales[k] / sq_ar + wh_pairs.extend([[w, h], [h, w]]) + + _wh_pairs.append(torch.as_tensor(wh_pairs, dtype=dtype, device=device)) + return _wh_pairs + + def num_anchors_per_location(self): + # Estimate num of anchors based on aspect ratios: 2 default boxes + 2 * ratios of feaure map. + return [2 + 2 * len(r) for r in self.aspect_ratios] + + # Default Boxes calculation based on page 6 of SSD paper + def _grid_default_boxes( + self, grid_sizes: List[List[int]], image_size: List[int], dtype: torch.dtype = torch.float32 + ) -> Tensor: + default_boxes = [] + for k, f_k in enumerate(grid_sizes): + # Now add the default boxes for each width-height pair + if self.steps is not None: + x_f_k = image_size[0] / self.steps[k] + y_f_k = image_size[1] / self.steps[k] + else: + y_f_k, x_f_k = f_k + + shifts_x = ((torch.arange(0, f_k[1]) + 0.5) / x_f_k).to(dtype=dtype) + shifts_y = ((torch.arange(0, f_k[0]) + 0.5) / y_f_k).to(dtype=dtype) + # shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x, indexing="ij") #origin + shift_y, shift_x = np.meshgrid(shifts_y, shifts_x, indexing="ij") + shift_y = torch.from_numpy(shift_y) + shift_x = torch.from_numpy(shift_x) + + shift_x = shift_x.reshape(-1) + shift_y = shift_y.reshape(-1) + + shifts = torch.stack((shift_x, shift_y) * len(self._wh_pairs[k]), dim=-1).reshape(-1, 2) + # Clipping the default boxes while the boxes are encoded in format (cx, cy, w, h) + _wh_pair = self._wh_pairs[k].clamp(min=0, max=1) if self.clip else self._wh_pairs[k] + wh_pairs = _wh_pair.repeat((f_k[0] * f_k[1]), 1) + + # default_box = torch.cat((shifts, wh_pairs), dim=1) + default_box = torch.cat((shifts, wh_pairs.half()), dim=1) + + default_boxes.append(default_box) + + return torch.cat(default_boxes, dim=0) + + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "aspect_ratios={aspect_ratios}" + s += ", clip={clip}" + s += ", scales={scales}" + s += ", steps={steps}" + s += ")" + return s.format(**self.__dict__) + + def forward(self, image_list: ImageList, feature_maps: List[Tensor]) -> List[Tensor]: + grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps] + image_size = image_list.tensors.shape[-2:] + dtype, device = feature_maps[0].dtype, feature_maps[0].device + default_boxes = self._grid_default_boxes(grid_sizes, image_size, dtype=dtype) + default_boxes = default_boxes.to(device) + + dboxes = [] + for _ in image_list.image_sizes: + dboxes_in_image = default_boxes + dboxes_in_image = torch.cat( + [ + dboxes_in_image[:, :2] - 0.5 * dboxes_in_image[:, 2:], + dboxes_in_image[:, :2] + 0.5 * dboxes_in_image[:, 2:], + ], + -1, + ) + dboxes_in_image[:, 0::2] *= image_size[1] + dboxes_in_image[:, 1::2] *= image_size[0] + dboxes.append(dboxes_in_image) + return dboxes diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py new file mode 100644 index 0000000000..bac7cb6c74 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py @@ -0,0 +1,270 @@ +import math +from typing import List, Optional + +import torch +from torch import nn, Tensor + +from .image_list import ImageList + + +class AnchorGenerator(nn.Module): + """ + Module that generates anchors for a set of feature maps and + image sizes. + + The module support computing anchors at multiple sizes and aspect ratios + per feature map. This module assumes aspect ratio = height / width for + each anchor. + + sizes and aspect_ratios should have the same number of elements, and it should + correspond to the number of feature maps. + + sizes[i] and aspect_ratios[i] can have an arbitrary number of elements, + and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors + per spatial location for feature map i. + + Args: + sizes (Tuple[Tuple[int]]): + aspect_ratios (Tuple[Tuple[float]]): + """ + + __annotations__ = { + "cell_anchors": List[torch.Tensor], + } + + def __init__( + self, + sizes=((128, 256, 512),), + aspect_ratios=((0.5, 1.0, 2.0),), + ): + super().__init__() + + if not isinstance(sizes[0], (list, tuple)): + # TODO change this + sizes = tuple((s,) for s in sizes) + if not isinstance(aspect_ratios[0], (list, tuple)): + aspect_ratios = (aspect_ratios,) * len(sizes) + + assert len(sizes) == len(aspect_ratios) + + self.sizes = sizes + self.aspect_ratios = aspect_ratios + self.cell_anchors = [ + self.generate_anchors(size, aspect_ratio) for size, aspect_ratio in zip(sizes, aspect_ratios) + ] + + # TODO: https://github.com/pytorch/pytorch/issues/26792 + # For every (aspect_ratios, scales) combination, output a zero-centered anchor with those values. + # (scales, aspect_ratios) are usually an element of zip(self.scales, self.aspect_ratios) + # This method assumes aspect ratio = height / width for an anchor. + def generate_anchors( + self, + scales: List[int], + aspect_ratios: List[float], + dtype: torch.dtype = torch.float32, + device: torch.device = torch.device("cpu"), + ): + scales = torch.as_tensor(scales, dtype=dtype, device=device) + aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device) + h_ratios = torch.sqrt(aspect_ratios) + w_ratios = 1 / h_ratios + + ws = (w_ratios[:, None] * scales[None, :]).view(-1) + hs = (h_ratios[:, None] * scales[None, :]).view(-1) + + base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2 + return base_anchors.round() + + def set_cell_anchors(self, dtype: torch.dtype, device: torch.device): + self.cell_anchors = [cell_anchor.to(dtype=dtype, device=device) for cell_anchor in self.cell_anchors] + + def num_anchors_per_location(self): + return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)] + + # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2), + # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a. + def grid_anchors(self, grid_sizes: List[List[int]], strides: List[List[Tensor]]) -> List[Tensor]: + anchors = [] + cell_anchors = self.cell_anchors + assert cell_anchors is not None + + if not (len(grid_sizes) == len(strides) == len(cell_anchors)): + raise ValueError( + "Anchors should be Tuple[Tuple[int]] because each feature " + "map could potentially have different sizes and aspect ratios. " + "There needs to be a match between the number of " + "feature maps passed and the number of sizes / aspect ratios specified." + ) + + for size, stride, base_anchors in zip(grid_sizes, strides, cell_anchors): + grid_height, grid_width = size + stride_height, stride_width = stride + device = base_anchors.device + + # For output anchor, compute [x_center, y_center, x_center, y_center] + shifts_x = torch.arange(0, grid_width, dtype=torch.int32, device=device) * stride_width + shifts_y = torch.arange(0, grid_height, dtype=torch.int32, device=device) * stride_height + shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x, indexing="ij") + shift_x = shift_x.reshape(-1) + shift_y = shift_y.reshape(-1) + shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1) + + # For every (base anchor, output anchor) pair, + # offset each zero-centered base anchor by the center of the output anchor. + anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4)) + + return anchors + + def forward(self, image_list: ImageList, feature_maps: List[Tensor]) -> List[Tensor]: + grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps] + image_size = image_list.tensors.shape[-2:] + dtype, device = feature_maps[0].dtype, feature_maps[0].device + strides = [ + [ + torch.tensor(image_size[0] // g[0], dtype=torch.int64, device=device), + torch.tensor(image_size[1] // g[1], dtype=torch.int64, device=device), + ] + for g in grid_sizes + ] + self.set_cell_anchors(dtype, device) + anchors_over_all_feature_maps = self.grid_anchors(grid_sizes, strides) + anchors: List[List[torch.Tensor]] = [] + for _ in range(len(image_list.image_sizes)): + anchors_in_image = [anchors_per_feature_map for anchors_per_feature_map in anchors_over_all_feature_maps] + anchors.append(anchors_in_image) + anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors] + return anchors + + +class DefaultBoxGenerator(nn.Module): + """ + This module generates the default boxes of SSD for a set of feature maps and image sizes. + + Args: + aspect_ratios (List[List[int]]): A list with all the aspect ratios used in each feature map. + min_ratio (float): The minimum scale :math:`\text{s}_{\text{min}}` of the default boxes used in the estimation + of the scales of each feature map. It is used only if the ``scales`` parameter is not provided. + max_ratio (float): The maximum scale :math:`\text{s}_{\text{max}}` of the default boxes used in the estimation + of the scales of each feature map. It is used only if the ``scales`` parameter is not provided. + scales (List[float]], optional): The scales of the default boxes. If not provided it will be estimated using + the ``min_ratio`` and ``max_ratio`` parameters. + steps (List[int]], optional): It's a hyper-parameter that affects the tiling of defalt boxes. If not provided + it will be estimated from the data. + clip (bool): Whether the standardized values of default boxes should be clipped between 0 and 1. The clipping + is applied while the boxes are encoded in format ``(cx, cy, w, h)``. + """ + + def __init__( + self, + aspect_ratios: List[List[int]], + min_ratio: float = 0.15, + max_ratio: float = 0.9, + scales: Optional[List[float]] = None, + steps: Optional[List[int]] = None, + clip: bool = True, + ): + super().__init__() + if steps is not None: + assert len(aspect_ratios) == len(steps) + self.aspect_ratios = aspect_ratios + self.steps = steps + self.clip = clip + num_outputs = len(aspect_ratios) + + # Estimation of default boxes scales + if scales is None: + if num_outputs > 1: + range_ratio = max_ratio - min_ratio + self.scales = [min_ratio + range_ratio * k / (num_outputs - 1.0) for k in range(num_outputs)] + self.scales.append(1.0) + else: + self.scales = [min_ratio, max_ratio] + else: + self.scales = scales + + self._wh_pairs = self._generate_wh_pairs(num_outputs) + + def _generate_wh_pairs( + self, num_outputs: int, dtype: torch.dtype = torch.float32, device: torch.device = torch.device("cpu") + ) -> List[Tensor]: + _wh_pairs: List[Tensor] = [] + for k in range(num_outputs): + # Adding the 2 default width-height pairs for aspect ratio 1 and scale s'k + s_k = self.scales[k] + s_prime_k = math.sqrt(self.scales[k] * self.scales[k + 1]) + wh_pairs = [[s_k, s_k], [s_prime_k, s_prime_k]] + + # Adding 2 pairs for each aspect ratio of the feature map k + for ar in self.aspect_ratios[k]: + sq_ar = math.sqrt(ar) + w = self.scales[k] * sq_ar + h = self.scales[k] / sq_ar + wh_pairs.extend([[w, h], [h, w]]) + + _wh_pairs.append(torch.as_tensor(wh_pairs, dtype=dtype, device=device)) + return _wh_pairs + + def num_anchors_per_location(self): + # Estimate num of anchors based on aspect ratios: 2 default boxes + 2 * ratios of feaure map. + return [2 + 2 * len(r) for r in self.aspect_ratios] + + # Default Boxes calculation based on page 6 of SSD paper + def _grid_default_boxes( + self, grid_sizes: List[List[int]], image_size: List[int], dtype: torch.dtype = torch.float32 + ) -> Tensor: + default_boxes = [] + for k, f_k in enumerate(grid_sizes): + # Now add the default boxes for each width-height pair + if self.steps is not None: + x_f_k = image_size[0] / self.steps[k] + y_f_k = image_size[1] / self.steps[k] + else: + y_f_k, x_f_k = f_k + + shifts_x = ((torch.arange(0, f_k[1]) + 0.5) / x_f_k).to(dtype=dtype) + shifts_y = ((torch.arange(0, f_k[0]) + 0.5) / y_f_k).to(dtype=dtype) + shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x, indexing="ij") + shift_x = shift_x.reshape(-1) + shift_y = shift_y.reshape(-1) + + shifts = torch.stack((shift_x, shift_y) * len(self._wh_pairs[k]), dim=-1).reshape(-1, 2) + # Clipping the default boxes while the boxes are encoded in format (cx, cy, w, h) + _wh_pair = self._wh_pairs[k].clamp(min=0, max=1) if self.clip else self._wh_pairs[k] + wh_pairs = _wh_pair.repeat((f_k[0] * f_k[1]), 1) + + default_box = torch.cat((shifts, wh_pairs), dim=1) + + default_boxes.append(default_box) + + return torch.cat(default_boxes, dim=0) + + def __repr__(self) -> str: + s = self.__class__.__name__ + "(" + s += "aspect_ratios={aspect_ratios}" + s += ", clip={clip}" + s += ", scales={scales}" + s += ", steps={steps}" + s += ")" + return s.format(**self.__dict__) + + def forward(self, image_list: ImageList, feature_maps: List[Tensor]) -> List[Tensor]: + grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps] + image_size = image_list.tensors.shape[-2:] + dtype, device = feature_maps[0].dtype, feature_maps[0].device + default_boxes = self._grid_default_boxes(grid_sizes, image_size, dtype=dtype) + default_boxes = default_boxes.to(device) + + dboxes = [] + for _ in image_list.image_sizes: + dboxes_in_image = default_boxes + dboxes_in_image = torch.cat( + [ + dboxes_in_image[:, :2] - 0.5 * dboxes_in_image[:, 2:], + dboxes_in_image[:, :2] + 0.5 * dboxes_in_image[:, 2:], + ], + -1, + ) + dboxes_in_image[:, 0::2] *= image_size[1] + dboxes_in_image[:, 1::2] *= image_size[0] + dboxes.append(dboxes_in_image) + return dboxes diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py new file mode 100644 index 0000000000..54fdc4c05c --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py @@ -0,0 +1,212 @@ +import warnings +from typing import Callable, Dict, Optional, List, Union + +from torch import nn, Tensor +from torchvision.ops import misc as misc_nn_ops +from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool, ExtraFPNBlock + +from .. import mobilenet +from .. import resnet +from .._utils import IntermediateLayerGetter + + +class BackboneWithFPN(nn.Module): + """ + Adds a FPN on top of a model. + Internally, it uses torchvision.models._utils.IntermediateLayerGetter to + extract a submodel that returns the feature maps specified in return_layers. + The same limitations of IntermediateLayerGetter apply here. + Args: + backbone (nn.Module) + return_layers (Dict[name, new_name]): a dict containing the names + of the modules for which the activations will be returned as + the key of the dict, and the value of the dict is the name + of the returned activation (which the user can specify). + in_channels_list (List[int]): number of channels for each feature map + that is returned, in the order they are present in the OrderedDict + out_channels (int): number of channels in the FPN. + Attributes: + out_channels (int): the number of channels in the FPN + """ + + def __init__( + self, + backbone: nn.Module, + return_layers: Dict[str, str], + in_channels_list: List[int], + out_channels: int, + extra_blocks: Optional[ExtraFPNBlock] = None, + ) -> None: + super().__init__() + + if extra_blocks is None: + extra_blocks = LastLevelMaxPool() + + self.body = IntermediateLayerGetter(backbone, return_layers=return_layers) + self.fpn = FeaturePyramidNetwork( + in_channels_list=in_channels_list, + out_channels=out_channels, + extra_blocks=extra_blocks, + ) + self.out_channels = out_channels + + def forward(self, x: Tensor) -> Dict[str, Tensor]: + x = self.body(x) + x = self.fpn(x) + return x + + +def resnet_fpn_backbone( + backbone_name: str, + pretrained: bool, + norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d, + trainable_layers: int = 3, + returned_layers: Optional[List[int]] = None, + extra_blocks: Optional[ExtraFPNBlock] = None, +) -> BackboneWithFPN: + """ + Constructs a specified ResNet backbone with FPN on top. Freezes the specified number of layers in the backbone. + + Examples:: + + >>> from torchvision.models.detection.backbone_utils import resnet_fpn_backbone + >>> backbone = resnet_fpn_backbone('resnet50', pretrained=True, trainable_layers=3) + >>> # get some dummy image + >>> x = torch.rand(1,3,64,64) + >>> # compute the output + >>> output = backbone(x) + >>> print([(k, v.shape) for k, v in output.items()]) + >>> # returns + >>> [('0', torch.Size([1, 256, 16, 16])), + >>> ('1', torch.Size([1, 256, 8, 8])), + >>> ('2', torch.Size([1, 256, 4, 4])), + >>> ('3', torch.Size([1, 256, 2, 2])), + >>> ('pool', torch.Size([1, 256, 1, 1]))] + + Args: + backbone_name (string): resnet architecture. Possible values are 'ResNet', 'resnet18', 'resnet34', 'resnet50', + 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2' + pretrained (bool): If True, returns a model with backbone pre-trained on Imagenet + norm_layer (callable): it is recommended to use the default value. For details visit: + (https://github.com/facebookresearch/maskrcnn-benchmark/issues/267) + trainable_layers (int): number of trainable (not frozen) resnet layers starting from final block. + Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. + returned_layers (list of int): The layers of the network to return. Each entry must be in ``[1, 4]``. + By default all layers are returned. + extra_blocks (ExtraFPNBlock or None): if provided, extra operations will + be performed. It is expected to take the fpn features, the original + features and the names of the original features as input, and returns + a new list of feature maps and their corresponding names. By + default a ``LastLevelMaxPool`` is used. + """ + backbone = resnet.__dict__[backbone_name](pretrained=pretrained, norm_layer=norm_layer) + return _resnet_fpn_extractor(backbone, trainable_layers, returned_layers, extra_blocks) + + +def _resnet_fpn_extractor( + backbone: resnet.ResNet, + trainable_layers: int, + returned_layers: Optional[List[int]] = None, + extra_blocks: Optional[ExtraFPNBlock] = None, +) -> BackboneWithFPN: + + # select layers that wont be frozen + assert 0 <= trainable_layers <= 5 + layers_to_train = ["layer4", "layer3", "layer2", "layer1", "conv1"][:trainable_layers] + if trainable_layers == 5: + layers_to_train.append("bn1") + for name, parameter in backbone.named_parameters(): + if all([not name.startswith(layer) for layer in layers_to_train]): + parameter.requires_grad_(False) + + if extra_blocks is None: + extra_blocks = LastLevelMaxPool() + + if returned_layers is None: + returned_layers = [1, 2, 3, 4] + assert min(returned_layers) > 0 and max(returned_layers) < 5 + return_layers = {f"layer{k}": str(v) for v, k in enumerate(returned_layers)} + + in_channels_stage2 = backbone.inplanes // 8 + in_channels_list = [in_channels_stage2 * 2 ** (i - 1) for i in returned_layers] + out_channels = 256 + return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks) + + +def _validate_trainable_layers( + pretrained: bool, + trainable_backbone_layers: Optional[int], + max_value: int, + default_value: int, +) -> int: + # don't freeze any layers if pretrained model or backbone is not used + if not pretrained: + if trainable_backbone_layers is not None: + warnings.warn( + "Changing trainable_backbone_layers has not effect if " + "neither pretrained nor pretrained_backbone have been set to True, " + f"falling back to trainable_backbone_layers={max_value} so that all layers are trainable" + ) + trainable_backbone_layers = max_value + + # by default freeze first blocks + if trainable_backbone_layers is None: + trainable_backbone_layers = default_value + assert 0 <= trainable_backbone_layers <= max_value + return trainable_backbone_layers + + +def mobilenet_backbone( + backbone_name: str, + pretrained: bool, + fpn: bool, + norm_layer: Callable[..., nn.Module] = misc_nn_ops.FrozenBatchNorm2d, + trainable_layers: int = 2, + returned_layers: Optional[List[int]] = None, + extra_blocks: Optional[ExtraFPNBlock] = None, +) -> nn.Module: + backbone = mobilenet.__dict__[backbone_name](pretrained=pretrained, norm_layer=norm_layer) + return _mobilenet_extractor(backbone, fpn, trainable_layers, returned_layers, extra_blocks) + + +def _mobilenet_extractor( + backbone: Union[mobilenet.MobileNetV2, mobilenet.MobileNetV3], + fpn: bool, + trainable_layers, + returned_layers: Optional[List[int]] = None, + extra_blocks: Optional[ExtraFPNBlock] = None, +) -> nn.Module: + backbone = backbone.features + # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. + # The first and last blocks are always included because they are the C0 (conv1) and Cn. + stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] + num_stages = len(stage_indices) + + # find the index of the layer from which we wont freeze + assert 0 <= trainable_layers <= num_stages + freeze_before = len(backbone) if trainable_layers == 0 else stage_indices[num_stages - trainable_layers] + + for b in backbone[:freeze_before]: + for parameter in b.parameters(): + parameter.requires_grad_(False) + + out_channels = 256 + if fpn: + if extra_blocks is None: + extra_blocks = LastLevelMaxPool() + + if returned_layers is None: + returned_layers = [num_stages - 2, num_stages - 1] + assert min(returned_layers) >= 0 and max(returned_layers) < num_stages + return_layers = {f"{stage_indices[k]}": str(v) for v, k in enumerate(returned_layers)} + + in_channels_list = [backbone[stage_indices[i]].out_channels for i in returned_layers] + return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels, extra_blocks=extra_blocks) + else: + m = nn.Sequential( + backbone, + # depthwise linear combination of channels to reduce their size + nn.Conv2d(backbone[-1].out_channels, out_channels, 1), + ) + m.out_channels = out_channels # type: ignore[assignment] + return m diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py new file mode 100644 index 0000000000..f5335c451d --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py @@ -0,0 +1,63 @@ +from collections import OrderedDict +from torch import nn +from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool + +from torchvision.ops import misc as misc_nn_ops +from .._utils import IntermediateLayerGetter +from .. import resnet + + +class BackboneWithFPN(nn.Module): + """ + Adds a FPN on top of a model. + Internally, it uses torchvision.models._utils.IntermediateLayerGetter to + extract a submodel that returns the feature maps specified in return_layers. + The same limitations of IntermediatLayerGetter apply here. + Arguments: + backbone (nn.Module) + return_layers (Dict[name, new_name]): a dict containing the names + of the modules for which the activations will be returned as + the key of the dict, and the value of the dict is the name + of the returned activation (which the user can specify). + in_channels_list (List[int]): number of channels for each feature map + that is returned, in the order they are present in the OrderedDict + out_channels (int): number of channels in the FPN. + Attributes: + out_channels (int): the number of channels in the FPN + """ + def __init__(self, backbone, return_layers, in_channels_list, out_channels): + super(BackboneWithFPN, self).__init__() + self.body = IntermediateLayerGetter(backbone, return_layers=return_layers) + self.fpn = FeaturePyramidNetwork( + in_channels_list=in_channels_list, + out_channels=out_channels, + extra_blocks=LastLevelMaxPool(), + ) + self.out_channels = out_channels + + def forward(self, x): + x = self.body(x) + x = self.fpn(x) + return x + + +def resnet_fpn_backbone(backbone_name, pretrained): + backbone = resnet.__dict__[backbone_name]( + pretrained=pretrained, + norm_layer=misc_nn_ops.FrozenBatchNorm2d) + # freeze layers + for name, parameter in backbone.named_parameters(): + if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: + parameter.requires_grad_(False) + + return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'} + + in_channels_stage2 = backbone.inplanes // 8 + in_channels_list = [ + in_channels_stage2, + in_channels_stage2 * 2, + in_channels_stage2 * 4, + in_channels_stage2 * 8, + ] + out_channels = 256 + return BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py new file mode 100644 index 0000000000..92366352b9 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py @@ -0,0 +1,355 @@ +from collections import OrderedDict + +import torch +from torch import nn +import torch.nn.functional as F + +from torchvision.ops import misc as misc_nn_ops +from torchvision.ops import MultiScaleRoIAlign + +from ..utils import load_state_dict_from_url + +from .generalized_rcnn import GeneralizedRCNN +from .rpn import AnchorGenerator, RPNHead, RegionProposalNetwork +from .roi_heads import RoIHeads +from .transform import GeneralizedRCNNTransform +from .backbone_utils import resnet_fpn_backbone + + +__all__ = [ + "FasterRCNN", "fasterrcnn_resnet50_fpn", +] + + +class FasterRCNN(GeneralizedRCNN): + """ + Implements Faster R-CNN. + + The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each + image, and should be in 0-1 range. Different images can have different sizes. + + The behavior of the model changes depending if it is in training or evaluation mode. + + During training, the model expects both the input tensors, as well as a targets (list of dictionary), + containing: + - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values of x + between 0 and W and values of y between 0 and H + - labels (Int64Tensor[N]): the class label for each ground-truth box + + The model returns a Dict[Tensor] during training, containing the classification and regression + losses for both the RPN and the R-CNN. + + During inference, the model requires only the input tensors, and returns the post-processed + predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as + follows: + - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values of x + between 0 and W and values of y between 0 and H + - labels (Int64Tensor[N]): the predicted labels for each image + - scores (Tensor[N]): the scores or each prediction + + Arguments: + backbone (nn.Module): the network used to compute the features for the model. + It should contain a out_channels attribute, which indicates the number of output + channels that each feature map has (and it should be the same for all feature maps). + The backbone should return a single Tensor or and OrderedDict[Tensor]. + num_classes (int): number of output classes of the model (including the background). + If box_predictor is specified, num_classes should be None. + min_size (int): minimum size of the image to be rescaled before feeding it to the backbone + max_size (int): maximum size of the image to be rescaled before feeding it to the backbone + image_mean (Tuple[float, float, float]): mean values used for input normalization. + They are generally the mean values of the dataset on which the backbone has been trained + on + image_std (Tuple[float, float, float]): std values used for input normalization. + They are generally the std values of the dataset on which the backbone has been trained on + rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature + maps. + rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN + rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training + rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing + rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training + rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing + rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals + rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be + considered as positive during training of the RPN. + rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be + considered as negative during training of the RPN. + rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN + for computing the loss + rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training + of the RPN + box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in + the locations indicated by the bounding boxes + box_head (nn.Module): module that takes the cropped feature maps as input + box_predictor (nn.Module): module that takes the output of box_head and returns the + classification logits and box regression deltas. + box_score_thresh (float): during inference, only return proposals with a classification score + greater than box_score_thresh + box_nms_thresh (float): NMS threshold for the prediction head. Used during inference + box_detections_per_img (int): maximum number of detections per image, for all classes. + box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be + considered as positive during training of the classification head + box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be + considered as negative during training of the classification head + box_batch_size_per_image (int): number of proposals that are sampled during training of the + classification head + box_positive_fraction (float): proportion of positive proposals in a mini-batch during training + of the classification head + bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the + bounding boxes + + Example:: + + >>> import torch + >>> import torchvision + >>> from torchvision.models.detection import FasterRCNN + >>> from torchvision.models.detection.rpn import AnchorGenerator + >>> # load a pre-trained model for classification and return + >>> # only the features + >>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features + >>> # FasterRCNN needs to know the number of + >>> # output channels in a backbone. For mobilenet_v2, it's 1280 + >>> # so we need to add it here + >>> backbone.out_channels = 1280 + >>> + >>> # let's make the RPN generate 5 x 3 anchors per spatial + >>> # location, with 5 different sizes and 3 different aspect + >>> # ratios. We have a Tuple[Tuple[int]] because each feature + >>> # map could potentially have different sizes and + >>> # aspect ratios + >>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), + >>> aspect_ratios=((0.5, 1.0, 2.0),)) + >>> + >>> # let's define what are the feature maps that we will + >>> # use to perform the region of interest cropping, as well as + >>> # the size of the crop after rescaling. + >>> # if your backbone returns a Tensor, featmap_names is expected to + >>> # be ['0']. More generally, the backbone should return an + >>> # OrderedDict[Tensor], and in featmap_names you can choose which + >>> # feature maps to use. + >>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], + >>> output_size=7, + >>> sampling_ratio=2) + >>> + >>> # put the pieces together inside a FasterRCNN model + >>> model = FasterRCNN(backbone, + >>> num_classes=2, + >>> rpn_anchor_generator=anchor_generator, + >>> box_roi_pool=roi_pooler) + >>> model.eval() + >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] + >>> predictions = model(x) + """ + + def __init__(self, backbone, num_classes=None, + # transform parameters + min_size=800, max_size=1333, + image_mean=None, image_std=None, + # RPN parameters + rpn_anchor_generator=None, rpn_head=None, + rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, + rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, + rpn_nms_thresh=0.7, + rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, + rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, + # Box parameters + box_roi_pool=None, box_head=None, box_predictor=None, + box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, + box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, + box_batch_size_per_image=512, box_positive_fraction=0.25, + bbox_reg_weights=None): + + if not hasattr(backbone, "out_channels"): + raise ValueError( + "backbone should contain an attribute out_channels " + "specifying the number of output channels (assumed to be the " + "same for all the levels)") + + assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) + assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) + + if num_classes is not None: + if box_predictor is not None: + raise ValueError("num_classes should be None when box_predictor is specified") + else: + if box_predictor is None: + raise ValueError("num_classes should not be None when box_predictor " + "is not specified") + + out_channels = backbone.out_channels + + if rpn_anchor_generator is None: + anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) + aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) + rpn_anchor_generator = AnchorGenerator( + anchor_sizes, aspect_ratios + ) + if rpn_head is None: + rpn_head = RPNHead( + out_channels, rpn_anchor_generator.num_anchors_per_location()[0] + ) + + rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) + rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) + + rpn = RegionProposalNetwork( + rpn_anchor_generator, rpn_head, + rpn_fg_iou_thresh, rpn_bg_iou_thresh, + rpn_batch_size_per_image, rpn_positive_fraction, + rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) + + if box_roi_pool is None: + box_roi_pool = MultiScaleRoIAlign( + featmap_names=['0', '1', '2', '3'], + output_size=7, + sampling_ratio=2) + + if box_head is None: + resolution = box_roi_pool.output_size[0] + representation_size = 1024 + box_head = TwoMLPHead( + out_channels * resolution ** 2, + representation_size) + + if box_predictor is None: + representation_size = 1024 + box_predictor = FastRCNNPredictor( + representation_size, + num_classes) + + roi_heads = RoIHeads( + # Box + box_roi_pool, box_head, box_predictor, + box_fg_iou_thresh, box_bg_iou_thresh, + box_batch_size_per_image, box_positive_fraction, + bbox_reg_weights, + box_score_thresh, box_nms_thresh, box_detections_per_img) + + if image_mean is None: + image_mean = [0.485, 0.456, 0.406] + if image_std is None: + image_std = [0.229, 0.224, 0.225] + transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) + + super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform) + + +class TwoMLPHead(nn.Module): + """ + Standard heads for FPN-based models + + Arguments: + in_channels (int): number of input channels + representation_size (int): size of the intermediate representation + """ + + def __init__(self, in_channels, representation_size): + super(TwoMLPHead, self).__init__() + + self.fc6 = nn.Linear(in_channels, representation_size) + self.fc7 = nn.Linear(representation_size, representation_size) + + def forward(self, x): + x = x.flatten(start_dim=1) + + x = F.relu(self.fc6(x)) + x = F.relu(self.fc7(x)) + + return x + + +class FastRCNNPredictor(nn.Module): + """ + Standard classification + bounding box regression layers + for Fast R-CNN. + + Arguments: + in_channels (int): number of input channels + num_classes (int): number of output classes (including background) + """ + + def __init__(self, in_channels, num_classes): + super(FastRCNNPredictor, self).__init__() + self.cls_score = nn.Linear(in_channels, num_classes) + self.bbox_pred = nn.Linear(in_channels, num_classes * 4) + + def forward(self, x): + if x.dim() == 4: + assert list(x.shape[2:]) == [1, 1] + x = x.flatten(start_dim=1) + scores = self.cls_score(x) + bbox_deltas = self.bbox_pred(x) + + return scores, bbox_deltas + + +model_urls = { + 'fasterrcnn_resnet50_fpn_coco': + 'https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth', +} + + +def fasterrcnn_resnet50_fpn(pretrained=False, progress=True, + num_classes=91, pretrained_backbone=True, **kwargs): + """ + Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. + + The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each + image, and should be in ``0-1`` range. Different images can have different sizes. + + The behavior of the model changes depending if it is in training or evaluation mode. + + During training, the model expects both the input tensors, as well as a targets (list of dictionary), + containing: + - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` + between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` + - labels (``Int64Tensor[N]``): the class label for each ground-truth box + + The model returns a ``Dict[Tensor]`` during training, containing the classification and regression + losses for both the RPN and the R-CNN. + + During inference, the model requires only the input tensors, and returns the post-processed + predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as + follows: + - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` + between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` + - labels (``Int64Tensor[N]``): the predicted labels for each image + - scores (``Tensor[N]``): the scores or each prediction + + Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size. + + Example:: + + >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) + >>> # For training + >>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4) + >>> labels = torch.randint(1, 91, (4, 11)) + >>> images = list(image for image in images) + >>> targets = [] + >>> for i in range(len(images)): + >>> d = {} + >>> d['boxes'] = boxes[i] + >>> d['labels'] = labels[i] + >>> targets.append(d) + >>> output = model(images, targets) + >>> # For inference + >>> model.eval() + >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] + >>> predictions = model(x) + >>> + >>> # optionally, if you want to export the model to ONNX: + >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11) + + Arguments: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 + progress (bool): If True, displays a progress bar of the download to stderr + """ + if pretrained: + # no need to download the backbone if pretrained is set + pretrained_backbone = False + backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) + model = FasterRCNN(backbone, num_classes, **kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls['fasterrcnn_resnet50_fpn_coco'], + progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py new file mode 100644 index 0000000000..50a25fb4f9 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py @@ -0,0 +1,84 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +""" +Implements the Generalized R-CNN framework +""" + +from collections import OrderedDict +import torch +from torch import nn +import warnings +from torch.jit.annotations import Tuple, List, Dict, Optional +from torch import Tensor + + +class GeneralizedRCNN(nn.Module): + """ + Main class for Generalized R-CNN. + + Arguments: + backbone (nn.Module): + rpn (nn.Module): + heads (nn.Module): takes the features + the proposals from the RPN and computes + detections / masks from it. + transform (nn.Module): performs the data transformation from the inputs to feed into + the model + """ + + def __init__(self, backbone, rpn, roi_heads, transform): + super(GeneralizedRCNN, self).__init__() + self.transform = transform + self.backbone = backbone + self.rpn = rpn + self.roi_heads = roi_heads + # used only on torchscript mode + self._has_warned = False + + @torch.jit.unused + def eager_outputs(self, losses, detections): + # type: (Dict[str, Tensor], List[Dict[str, Tensor]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]] + if self.training: + return losses + + return detections + + def forward(self, images, targets=None): + # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) + """ + Arguments: + images (list[Tensor]): images to be processed + targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional) + + Returns: + result (list[BoxList] or dict[Tensor]): the output from the model. + During training, it returns a dict[Tensor] which contains the losses. + During testing, it returns list[BoxList] contains additional fields + like `scores`, `labels` and `mask` (for Mask R-CNN models). + + """ + if self.training and targets is None: + raise ValueError("In training mode, targets should be passed") + original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], []) + for img in images: + val = img.shape[-2:] + assert len(val) == 2 + original_image_sizes.append((val[0], val[1])) + + images, targets = self.transform(images, targets) + features = self.backbone(images.tensors) + if isinstance(features, torch.Tensor): + features = OrderedDict([('0', features)]) + proposals, proposal_losses = self.rpn(images, features, targets) + detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets) + detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) + + losses = {} + losses.update(detector_losses) + losses.update(proposal_losses) + + if torch.jit.is_scripting(): + if not self._has_warned: + warnings.warn("RCNN always returns a (Losses, Detections) tuple in scripting") + self._has_warned = True + return (losses, detections) + else: + return self.eager_outputs(losses, detections) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py new file mode 100644 index 0000000000..583866557e --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py @@ -0,0 +1,25 @@ +from typing import List, Tuple + +import torch +from torch import Tensor + + +class ImageList: + """ + Structure that holds a list of images (of possibly + varying sizes) as a single tensor. + This works by padding the images to the same size, + and storing in a field the original sizes of each image + + Args: + tensors (tensor): Tensor containing images. + image_sizes (list[tuple[int, int]]): List of Tuples each containing size of images. + """ + + def __init__(self, tensors: Tensor, image_sizes: List[Tuple[int, int]]) -> None: + self.tensors = tensors + self.image_sizes = image_sizes + + def to(self, device: torch.device) -> "ImageList": + cast_tensor = self.tensors.to(device) + return ImageList(cast_tensor, self.image_sizes) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py new file mode 100644 index 0000000000..aeee558ca2 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py @@ -0,0 +1,330 @@ +import torch +from torch import nn + +from torchvision.ops import misc as misc_nn_ops + +from torchvision.ops import MultiScaleRoIAlign + +from ..utils import load_state_dict_from_url + +from .faster_rcnn import FasterRCNN +from .backbone_utils import resnet_fpn_backbone + + +__all__ = [ + "KeypointRCNN", "keypointrcnn_resnet50_fpn" +] + + +class KeypointRCNN(FasterRCNN): + """ + Implements Keypoint R-CNN. + + The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each + image, and should be in 0-1 range. Different images can have different sizes. + + The behavior of the model changes depending if it is in training or evaluation mode. + + During training, the model expects both the input tensors, as well as a targets (list of dictionary), + containing: + - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values of x + between 0 and W and values of y between 0 and H + - labels (Int64Tensor[N]): the class label for each ground-truth box + - keypoints (FloatTensor[N, K, 3]): the K keypoints location for each of the N instances, in the + format [x, y, visibility], where visibility=0 means that the keypoint is not visible. + + The model returns a Dict[Tensor] during training, containing the classification and regression + losses for both the RPN and the R-CNN, and the keypoint loss. + + During inference, the model requires only the input tensors, and returns the post-processed + predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as + follows: + - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values of x + between 0 and W and values of y between 0 and H + - labels (Int64Tensor[N]): the predicted labels for each image + - scores (Tensor[N]): the scores or each prediction + - keypoints (FloatTensor[N, K, 3]): the locations of the predicted keypoints, in [x, y, v] format. + + Arguments: + backbone (nn.Module): the network used to compute the features for the model. + It should contain a out_channels attribute, which indicates the number of output + channels that each feature map has (and it should be the same for all feature maps). + The backbone should return a single Tensor or and OrderedDict[Tensor]. + num_classes (int): number of output classes of the model (including the background). + If box_predictor is specified, num_classes should be None. + min_size (int): minimum size of the image to be rescaled before feeding it to the backbone + max_size (int): maximum size of the image to be rescaled before feeding it to the backbone + image_mean (Tuple[float, float, float]): mean values used for input normalization. + They are generally the mean values of the dataset on which the backbone has been trained + on + image_std (Tuple[float, float, float]): std values used for input normalization. + They are generally the std values of the dataset on which the backbone has been trained on + rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature + maps. + rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN + rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training + rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing + rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training + rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing + rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals + rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be + considered as positive during training of the RPN. + rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be + considered as negative during training of the RPN. + rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN + for computing the loss + rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training + of the RPN + box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in + the locations indicated by the bounding boxes + box_head (nn.Module): module that takes the cropped feature maps as input + box_predictor (nn.Module): module that takes the output of box_head and returns the + classification logits and box regression deltas. + box_score_thresh (float): during inference, only return proposals with a classification score + greater than box_score_thresh + box_nms_thresh (float): NMS threshold for the prediction head. Used during inference + box_detections_per_img (int): maximum number of detections per image, for all classes. + box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be + considered as positive during training of the classification head + box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be + considered as negative during training of the classification head + box_batch_size_per_image (int): number of proposals that are sampled during training of the + classification head + box_positive_fraction (float): proportion of positive proposals in a mini-batch during training + of the classification head + bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the + bounding boxes + keypoint_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in + the locations indicated by the bounding boxes, which will be used for the keypoint head. + keypoint_head (nn.Module): module that takes the cropped feature maps as input + keypoint_predictor (nn.Module): module that takes the output of the keypoint_head and returns the + heatmap logits + + Example:: + + >>> import torch + >>> import torchvision + >>> from torchvision.models.detection import KeypointRCNN + >>> from torchvision.models.detection.rpn import AnchorGenerator + >>> + >>> # load a pre-trained model for classification and return + >>> # only the features + >>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features + >>> # KeypointRCNN needs to know the number of + >>> # output channels in a backbone. For mobilenet_v2, it's 1280 + >>> # so we need to add it here + >>> backbone.out_channels = 1280 + >>> + >>> # let's make the RPN generate 5 x 3 anchors per spatial + >>> # location, with 5 different sizes and 3 different aspect + >>> # ratios. We have a Tuple[Tuple[int]] because each feature + >>> # map could potentially have different sizes and + >>> # aspect ratios + >>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), + >>> aspect_ratios=((0.5, 1.0, 2.0),)) + >>> + >>> # let's define what are the feature maps that we will + >>> # use to perform the region of interest cropping, as well as + >>> # the size of the crop after rescaling. + >>> # if your backbone returns a Tensor, featmap_names is expected to + >>> # be ['0']. More generally, the backbone should return an + >>> # OrderedDict[Tensor], and in featmap_names you can choose which + >>> # feature maps to use. + >>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], + >>> output_size=7, + >>> sampling_ratio=2) + >>> + >>> keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], + >>> output_size=14, + >>> sampling_ratio=2) + >>> # put the pieces together inside a KeypointRCNN model + >>> model = KeypointRCNN(backbone, + >>> num_classes=2, + >>> rpn_anchor_generator=anchor_generator, + >>> box_roi_pool=roi_pooler, + >>> keypoint_roi_pool=keypoint_roi_pooler) + >>> model.eval() + >>> model.eval() + >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] + >>> predictions = model(x) + """ + def __init__(self, backbone, num_classes=None, + # transform parameters + min_size=None, max_size=1333, + image_mean=None, image_std=None, + # RPN parameters + rpn_anchor_generator=None, rpn_head=None, + rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, + rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, + rpn_nms_thresh=0.7, + rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, + rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, + # Box parameters + box_roi_pool=None, box_head=None, box_predictor=None, + box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, + box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, + box_batch_size_per_image=512, box_positive_fraction=0.25, + bbox_reg_weights=None, + # keypoint parameters + keypoint_roi_pool=None, keypoint_head=None, keypoint_predictor=None, + num_keypoints=17): + + assert isinstance(keypoint_roi_pool, (MultiScaleRoIAlign, type(None))) + if min_size is None: + min_size = (640, 672, 704, 736, 768, 800) + + if num_classes is not None: + if keypoint_predictor is not None: + raise ValueError("num_classes should be None when keypoint_predictor is specified") + + out_channels = backbone.out_channels + + if keypoint_roi_pool is None: + keypoint_roi_pool = MultiScaleRoIAlign( + featmap_names=['0', '1', '2', '3'], + output_size=14, + sampling_ratio=2) + + if keypoint_head is None: + keypoint_layers = tuple(512 for _ in range(8)) + keypoint_head = KeypointRCNNHeads(out_channels, keypoint_layers) + + if keypoint_predictor is None: + keypoint_dim_reduced = 512 # == keypoint_layers[-1] + keypoint_predictor = KeypointRCNNPredictor(keypoint_dim_reduced, num_keypoints) + + super(KeypointRCNN, self).__init__( + backbone, num_classes, + # transform parameters + min_size, max_size, + image_mean, image_std, + # RPN-specific parameters + rpn_anchor_generator, rpn_head, + rpn_pre_nms_top_n_train, rpn_pre_nms_top_n_test, + rpn_post_nms_top_n_train, rpn_post_nms_top_n_test, + rpn_nms_thresh, + rpn_fg_iou_thresh, rpn_bg_iou_thresh, + rpn_batch_size_per_image, rpn_positive_fraction, + # Box parameters + box_roi_pool, box_head, box_predictor, + box_score_thresh, box_nms_thresh, box_detections_per_img, + box_fg_iou_thresh, box_bg_iou_thresh, + box_batch_size_per_image, box_positive_fraction, + bbox_reg_weights) + + self.roi_heads.keypoint_roi_pool = keypoint_roi_pool + self.roi_heads.keypoint_head = keypoint_head + self.roi_heads.keypoint_predictor = keypoint_predictor + + +class KeypointRCNNHeads(nn.Sequential): + def __init__(self, in_channels, layers): + d = [] + next_feature = in_channels + for l in layers: + d.append(misc_nn_ops.Conv2d(next_feature, l, 3, stride=1, padding=1)) + d.append(nn.ReLU(inplace=True)) + next_feature = l + super(KeypointRCNNHeads, self).__init__(*d) + for m in self.children(): + if isinstance(m, misc_nn_ops.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + nn.init.constant_(m.bias, 0) + + +class KeypointRCNNPredictor(nn.Module): + def __init__(self, in_channels, num_keypoints): + super(KeypointRCNNPredictor, self).__init__() + input_features = in_channels + deconv_kernel = 4 + self.kps_score_lowres = misc_nn_ops.ConvTranspose2d( + input_features, + num_keypoints, + deconv_kernel, + stride=2, + padding=deconv_kernel // 2 - 1, + ) + nn.init.kaiming_normal_( + self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu" + ) + nn.init.constant_(self.kps_score_lowres.bias, 0) + self.up_scale = 2 + self.out_channels = num_keypoints + + def forward(self, x): + x = self.kps_score_lowres(x) + x = misc_nn_ops.interpolate( + x, scale_factor=float(self.up_scale), mode="bilinear", align_corners=False + ) + return x + + +model_urls = { + # legacy model for BC reasons, see https://github.com/pytorch/vision/issues/1606 + 'keypointrcnn_resnet50_fpn_coco_legacy': + 'https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-9f466800.pth', + 'keypointrcnn_resnet50_fpn_coco': + 'https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-fc266e95.pth', +} + + +def keypointrcnn_resnet50_fpn(pretrained=False, progress=True, + num_classes=2, num_keypoints=17, + pretrained_backbone=True, **kwargs): + """ + Constructs a Keypoint R-CNN model with a ResNet-50-FPN backbone. + + The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each + image, and should be in ``0-1`` range. Different images can have different sizes. + + The behavior of the model changes depending if it is in training or evaluation mode. + + During training, the model expects both the input tensors, as well as a targets (list of dictionary), + containing: + - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` + between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` + - labels (``Int64Tensor[N]``): the class label for each ground-truth box + - keypoints (``FloatTensor[N, K, 3]``): the ``K`` keypoints location for each of the ``N`` instances, in the + format ``[x, y, visibility]``, where ``visibility=0`` means that the keypoint is not visible. + + The model returns a ``Dict[Tensor]`` during training, containing the classification and regression + losses for both the RPN and the R-CNN, and the keypoint loss. + + During inference, the model requires only the input tensors, and returns the post-processed + predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as + follows: + - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` + between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` + - labels (``Int64Tensor[N]``): the predicted labels for each image + - scores (``Tensor[N]``): the scores or each prediction + - keypoints (``FloatTensor[N, K, 3]``): the locations of the predicted keypoints, in ``[x, y, v]`` format. + + Keypoint R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size. + + Example:: + + >>> model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True) + >>> model.eval() + >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] + >>> predictions = model(x) + >>> + >>> # optionally, if you want to export the model to ONNX: + >>> torch.onnx.export(model, x, "keypoint_rcnn.onnx", opset_version = 11) + + Arguments: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 + progress (bool): If True, displays a progress bar of the download to stderr + """ + if pretrained: + # no need to download the backbone if pretrained is set + pretrained_backbone = False + backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) + model = KeypointRCNN(backbone, num_classes, num_keypoints=num_keypoints, **kwargs) + if pretrained: + key = 'keypointrcnn_resnet50_fpn_coco' + if pretrained == 'legacy': + key += '_legacy' + state_dict = load_state_dict_from_url(model_urls[key], + progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py new file mode 100644 index 0000000000..a8a980fa3c --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py @@ -0,0 +1,323 @@ +from collections import OrderedDict + +import torch +from torch import nn +import torch.nn.functional as F + +from torchvision.ops import misc as misc_nn_ops +from torchvision.ops import MultiScaleRoIAlign + +from ..utils import load_state_dict_from_url + +from .faster_rcnn import FasterRCNN +from .backbone_utils import resnet_fpn_backbone + +__all__ = [ + "MaskRCNN", "maskrcnn_resnet50_fpn", +] + + +class MaskRCNN(FasterRCNN): + """ + Implements Mask R-CNN. + + The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each + image, and should be in 0-1 range. Different images can have different sizes. + + The behavior of the model changes depending if it is in training or evaluation mode. + + During training, the model expects both the input tensors, as well as a targets (list of dictionary), + containing: + - boxes (FloatTensor[N, 4]): the ground-truth boxes in [x1, y1, x2, y2] format, with values of x + between 0 and W and values of y between 0 and H + - labels (Int64Tensor[N]): the class label for each ground-truth box + - masks (UInt8Tensor[N, H, W]): the segmentation binary masks for each instance + + The model returns a Dict[Tensor] during training, containing the classification and regression + losses for both the RPN and the R-CNN, and the mask loss. + + During inference, the model requires only the input tensors, and returns the post-processed + predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as + follows: + - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values of x + between 0 and W and values of y between 0 and H + - labels (Int64Tensor[N]): the predicted labels for each image + - scores (Tensor[N]): the scores or each prediction + - masks (UInt8Tensor[N, 1, H, W]): the predicted masks for each instance, in 0-1 range. In order to + obtain the final segmentation masks, the soft masks can be thresholded, generally + with a value of 0.5 (mask >= 0.5) + + Arguments: + backbone (nn.Module): the network used to compute the features for the model. + It should contain a out_channels attribute, which indicates the number of output + channels that each feature map has (and it should be the same for all feature maps). + The backbone should return a single Tensor or and OrderedDict[Tensor]. + num_classes (int): number of output classes of the model (including the background). + If box_predictor is specified, num_classes should be None. + min_size (int): minimum size of the image to be rescaled before feeding it to the backbone + max_size (int): maximum size of the image to be rescaled before feeding it to the backbone + image_mean (Tuple[float, float, float]): mean values used for input normalization. + They are generally the mean values of the dataset on which the backbone has been trained + on + image_std (Tuple[float, float, float]): std values used for input normalization. + They are generally the std values of the dataset on which the backbone has been trained on + rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature + maps. + rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN + rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training + rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing + rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training + rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing + rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals + rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be + considered as positive during training of the RPN. + rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be + considered as negative during training of the RPN. + rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN + for computing the loss + rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training + of the RPN + box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in + the locations indicated by the bounding boxes + box_head (nn.Module): module that takes the cropped feature maps as input + box_predictor (nn.Module): module that takes the output of box_head and returns the + classification logits and box regression deltas. + box_score_thresh (float): during inference, only return proposals with a classification score + greater than box_score_thresh + box_nms_thresh (float): NMS threshold for the prediction head. Used during inference + box_detections_per_img (int): maximum number of detections per image, for all classes. + box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be + considered as positive during training of the classification head + box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be + considered as negative during training of the classification head + box_batch_size_per_image (int): number of proposals that are sampled during training of the + classification head + box_positive_fraction (float): proportion of positive proposals in a mini-batch during training + of the classification head + bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the + bounding boxes + mask_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in + the locations indicated by the bounding boxes, which will be used for the mask head. + mask_head (nn.Module): module that takes the cropped feature maps as input + mask_predictor (nn.Module): module that takes the output of the mask_head and returns the + segmentation mask logits + + Example:: + + >>> import torch + >>> import torchvision + >>> from torchvision.models.detection import MaskRCNN + >>> from torchvision.models.detection.rpn import AnchorGenerator + >>> + >>> # load a pre-trained model for classification and return + >>> # only the features + >>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features + >>> # MaskRCNN needs to know the number of + >>> # output channels in a backbone. For mobilenet_v2, it's 1280 + >>> # so we need to add it here + >>> backbone.out_channels = 1280 + >>> + >>> # let's make the RPN generate 5 x 3 anchors per spatial + >>> # location, with 5 different sizes and 3 different aspect + >>> # ratios. We have a Tuple[Tuple[int]] because each feature + >>> # map could potentially have different sizes and + >>> # aspect ratios + >>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), + >>> aspect_ratios=((0.5, 1.0, 2.0),)) + >>> + >>> # let's define what are the feature maps that we will + >>> # use to perform the region of interest cropping, as well as + >>> # the size of the crop after rescaling. + >>> # if your backbone returns a Tensor, featmap_names is expected to + >>> # be ['0']. More generally, the backbone should return an + >>> # OrderedDict[Tensor], and in featmap_names you can choose which + >>> # feature maps to use. + >>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], + >>> output_size=7, + >>> sampling_ratio=2) + >>> + >>> mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], + >>> output_size=14, + >>> sampling_ratio=2) + >>> # put the pieces together inside a MaskRCNN model + >>> model = MaskRCNN(backbone, + >>> num_classes=2, + >>> rpn_anchor_generator=anchor_generator, + >>> box_roi_pool=roi_pooler, + >>> mask_roi_pool=mask_roi_pooler) + >>> model.eval() + >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] + >>> predictions = model(x) + """ + def __init__(self, backbone, num_classes=None, + # transform parameters + min_size=800, max_size=1333, + image_mean=None, image_std=None, + # RPN parameters + rpn_anchor_generator=None, rpn_head=None, + rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, + rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, + rpn_nms_thresh=0.7, + rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, + rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, + # Box parameters + box_roi_pool=None, box_head=None, box_predictor=None, + box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, + box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, + box_batch_size_per_image=512, box_positive_fraction=0.25, + bbox_reg_weights=None, + # Mask parameters + mask_roi_pool=None, mask_head=None, mask_predictor=None): + + assert isinstance(mask_roi_pool, (MultiScaleRoIAlign, type(None))) + + if num_classes is not None: + if mask_predictor is not None: + raise ValueError("num_classes should be None when mask_predictor is specified") + + out_channels = backbone.out_channels + + if mask_roi_pool is None: + mask_roi_pool = MultiScaleRoIAlign( + featmap_names=['0', '1', '2', '3'], + output_size=14, + sampling_ratio=2) + + if mask_head is None: + mask_layers = (256, 256, 256, 256) + mask_dilation = 1 + mask_head = MaskRCNNHeads(out_channels, mask_layers, mask_dilation) + + if mask_predictor is None: + mask_predictor_in_channels = 256 # == mask_layers[-1] + mask_dim_reduced = 256 + mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels, + mask_dim_reduced, num_classes) + + super(MaskRCNN, self).__init__( + backbone, num_classes, + # transform parameters + min_size, max_size, + image_mean, image_std, + # RPN-specific parameters + rpn_anchor_generator, rpn_head, + rpn_pre_nms_top_n_train, rpn_pre_nms_top_n_test, + rpn_post_nms_top_n_train, rpn_post_nms_top_n_test, + rpn_nms_thresh, + rpn_fg_iou_thresh, rpn_bg_iou_thresh, + rpn_batch_size_per_image, rpn_positive_fraction, + # Box parameters + box_roi_pool, box_head, box_predictor, + box_score_thresh, box_nms_thresh, box_detections_per_img, + box_fg_iou_thresh, box_bg_iou_thresh, + box_batch_size_per_image, box_positive_fraction, + bbox_reg_weights) + + self.roi_heads.mask_roi_pool = mask_roi_pool + self.roi_heads.mask_head = mask_head + self.roi_heads.mask_predictor = mask_predictor + + +class MaskRCNNHeads(nn.Sequential): + def __init__(self, in_channels, layers, dilation): + """ + Arguments: + in_channels (int): number of input channels + layers (list): feature dimensions of each FCN layer + dilation (int): dilation rate of kernel + """ + d = OrderedDict() + next_feature = in_channels + for layer_idx, layer_features in enumerate(layers, 1): + d["mask_fcn{}".format(layer_idx)] = misc_nn_ops.Conv2d( + next_feature, layer_features, kernel_size=3, + stride=1, padding=dilation, dilation=dilation) + d["relu{}".format(layer_idx)] = nn.ReLU(inplace=True) + next_feature = layer_features + + super(MaskRCNNHeads, self).__init__(d) + for name, param in self.named_parameters(): + if "weight" in name: + nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") + # elif "bias" in name: + # nn.init.constant_(param, 0) + + +class MaskRCNNPredictor(nn.Sequential): + def __init__(self, in_channels, dim_reduced, num_classes): + super(MaskRCNNPredictor, self).__init__(OrderedDict([ + ("conv5_mask", misc_nn_ops.ConvTranspose2d(in_channels, dim_reduced, 2, 2, 0)), + ("relu", nn.ReLU(inplace=True)), + ("mask_fcn_logits", misc_nn_ops.Conv2d(dim_reduced, num_classes, 1, 1, 0)), + ])) + + for name, param in self.named_parameters(): + if "weight" in name: + nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") + # elif "bias" in name: + # nn.init.constant_(param, 0) + + +model_urls = { + 'maskrcnn_resnet50_fpn_coco': + 'https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth', +} + + +def maskrcnn_resnet50_fpn(pretrained=False, progress=True, + num_classes=91, pretrained_backbone=True, **kwargs): + """ + Constructs a Mask R-CNN model with a ResNet-50-FPN backbone. + + The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each + image, and should be in ``0-1`` range. Different images can have different sizes. + + The behavior of the model changes depending if it is in training or evaluation mode. + + During training, the model expects both the input tensors, as well as a targets (list of dictionary), + containing: + - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` + between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` + - labels (``Int64Tensor[N]``): the class label for each ground-truth box + - masks (``UInt8Tensor[N, H, W]``): the segmentation binary masks for each instance + + The model returns a ``Dict[Tensor]`` during training, containing the classification and regression + losses for both the RPN and the R-CNN, and the mask loss. + + During inference, the model requires only the input tensors, and returns the post-processed + predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as + follows: + - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` + between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` + - labels (``Int64Tensor[N]``): the predicted labels for each image + - scores (``Tensor[N]``): the scores or each prediction + - masks (``UInt8Tensor[N, 1, H, W]``): the predicted masks for each instance, in ``0-1`` range. In order to + obtain the final segmentation masks, the soft masks can be thresholded, generally + with a value of 0.5 (``mask >= 0.5``) + + Mask R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size. + + Example:: + + >>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) + >>> model.eval() + >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] + >>> predictions = model(x) + >>> + >>> # optionally, if you want to export the model to ONNX: + >>> torch.onnx.export(model, x, "mask_rcnn.onnx", opset_version = 11) + + Arguments: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 + progress (bool): If True, displays a progress bar of the download to stderr + """ + if pretrained: + # no need to download the backbone if pretrained is set + pretrained_backbone = False + backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) + model = MaskRCNN(backbone, num_classes, **kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls['maskrcnn_resnet50_fpn_coco'], + progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py new file mode 100644 index 0000000000..fd1334fbc2 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py @@ -0,0 +1,870 @@ +import torch +import torchvision + +import torch.nn.functional as F +from torch import nn, Tensor + +from torchvision.ops import boxes as box_ops +from torchvision.ops import misc as misc_nn_ops + +from torchvision.ops import roi_align + +from . import _utils as det_utils + +from torch.jit.annotations import Optional, List, Dict, Tuple + + +def fastrcnn_loss(class_logits, box_regression, labels, regression_targets): + # type: (Tensor, Tensor, List[Tensor], List[Tensor]) + """ + Computes the loss for Faster R-CNN. + + Arguments: + class_logits (Tensor) + box_regression (Tensor) + labels (list[BoxList]) + regression_targets (Tensor) + + Returns: + classification_loss (Tensor) + box_loss (Tensor) + """ + + labels = torch.cat(labels, dim=0) + regression_targets = torch.cat(regression_targets, dim=0) + + classification_loss = F.cross_entropy(class_logits, labels) + + # get indices that correspond to the regression targets for + # the corresponding ground truth labels, to be used with + # advanced indexing + sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) + labels_pos = labels[sampled_pos_inds_subset] + N, num_classes = class_logits.shape + box_regression = box_regression.reshape(N, -1, 4) + + box_loss = F.smooth_l1_loss( + box_regression[sampled_pos_inds_subset, labels_pos], + regression_targets[sampled_pos_inds_subset], + reduction="sum", + ) + box_loss = box_loss / labels.numel() + + return classification_loss, box_loss + + +def maskrcnn_inference(x, labels): + # type: (Tensor, List[Tensor]) + """ + From the results of the CNN, post process the masks + by taking the mask corresponding to the class with max + probability (which are of fixed size and directly output + by the CNN) and return the masks in the mask field of the BoxList. + + Arguments: + x (Tensor): the mask logits + labels (list[BoxList]): bounding boxes that are used as + reference, one for ech image + + Returns: + results (list[BoxList]): one BoxList for each image, containing + the extra field mask + """ + mask_prob = x.sigmoid() + + # select masks coresponding to the predicted classes + num_masks = x.shape[0] + boxes_per_image = [len(l) for l in labels] + labels = torch.cat(labels) + index = torch.arange(num_masks, device=labels.device) + mask_prob = mask_prob[index, labels][:, None] + + if len(boxes_per_image) == 1: + # TODO : remove when dynamic split supported in ONNX + # and remove assignment to mask_prob_list, just assign to mask_prob + mask_prob_list = [mask_prob] + else: + mask_prob_list = mask_prob.split(boxes_per_image, dim=0) + + return mask_prob_list + + +def project_masks_on_boxes(gt_masks, boxes, matched_idxs, M): + # type: (Tensor, Tensor, Tensor, int) + """ + Given segmentation masks and the bounding boxes corresponding + to the location of the masks in the image, this function + crops and resizes the masks in the position defined by the + boxes. This prepares the masks for them to be fed to the + loss computation as the targets. + """ + matched_idxs = matched_idxs.to(boxes) + rois = torch.cat([matched_idxs[:, None], boxes], dim=1) + gt_masks = gt_masks[:, None].to(rois) + return roi_align(gt_masks, rois, (M, M), 1.)[:, 0] + + +def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs): + # type: (Tensor, List[Tensor], List[Tensor], List[Tensor], List[Tensor]) + """ + Arguments: + proposals (list[BoxList]) + mask_logits (Tensor) + targets (list[BoxList]) + + Return: + mask_loss (Tensor): scalar tensor containing the loss + """ + + discretization_size = mask_logits.shape[-1] + labels = [l[idxs] for l, idxs in zip(gt_labels, mask_matched_idxs)] + mask_targets = [ + project_masks_on_boxes(m, p, i, discretization_size) + for m, p, i in zip(gt_masks, proposals, mask_matched_idxs) + ] + + labels = torch.cat(labels, dim=0) + mask_targets = torch.cat(mask_targets, dim=0) + + # torch.mean (in binary_cross_entropy_with_logits) doesn't + # accept empty tensors, so handle it separately + if mask_targets.numel() == 0: + return mask_logits.sum() * 0 + + mask_loss = F.binary_cross_entropy_with_logits( + mask_logits[torch.arange(labels.shape[0], device=labels.device), labels], mask_targets + ) + return mask_loss + + +def keypoints_to_heatmap(keypoints, rois, heatmap_size): + # type: (Tensor, Tensor, int) + offset_x = rois[:, 0] + offset_y = rois[:, 1] + scale_x = heatmap_size / (rois[:, 2] - rois[:, 0]) + scale_y = heatmap_size / (rois[:, 3] - rois[:, 1]) + + offset_x = offset_x[:, None] + offset_y = offset_y[:, None] + scale_x = scale_x[:, None] + scale_y = scale_y[:, None] + + x = keypoints[..., 0] + y = keypoints[..., 1] + + x_boundary_inds = x == rois[:, 2][:, None] + y_boundary_inds = y == rois[:, 3][:, None] + + x = (x - offset_x) * scale_x + x = x.floor().long() + y = (y - offset_y) * scale_y + y = y.floor().long() + + x[x_boundary_inds] = torch.tensor(heatmap_size - 1) + y[y_boundary_inds] = torch.tensor(heatmap_size - 1) + + valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size) + vis = keypoints[..., 2] > 0 + valid = (valid_loc & vis).long() + + lin_ind = y * heatmap_size + x + heatmaps = lin_ind * valid + + return heatmaps, valid + + +def _onnx_heatmaps_to_keypoints(maps, maps_i, roi_map_width, roi_map_height, + widths_i, heights_i, offset_x_i, offset_y_i): + num_keypoints = torch.scalar_tensor(maps.size(1), dtype=torch.int64) + + width_correction = widths_i / roi_map_width + height_correction = heights_i / roi_map_height + + roi_map = torch.nn.functional.interpolate( + maps_i[None], size=(int(roi_map_height), int(roi_map_width)), mode='bicubic', align_corners=False)[0] + + w = torch.scalar_tensor(roi_map.size(2), dtype=torch.int64) + pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1) + + x_int = (pos % w) + y_int = ((pos - x_int) / w) + + x = (torch.tensor(0.5, dtype=torch.float32) + x_int.to(dtype=torch.float32)) * \ + width_correction.to(dtype=torch.float32) + y = (torch.tensor(0.5, dtype=torch.float32) + y_int.to(dtype=torch.float32)) * \ + height_correction.to(dtype=torch.float32) + + xy_preds_i_0 = x + offset_x_i.to(dtype=torch.float32) + xy_preds_i_1 = y + offset_y_i.to(dtype=torch.float32) + xy_preds_i_2 = torch.ones((xy_preds_i_1.shape), dtype=torch.float32) + xy_preds_i = torch.stack([xy_preds_i_0.to(dtype=torch.float32), + xy_preds_i_1.to(dtype=torch.float32), + xy_preds_i_2.to(dtype=torch.float32)], 0) + + # TODO: simplify when indexing without rank will be supported by ONNX + end_scores_i = roi_map.index_select(1, y_int.to(dtype=torch.int64)) \ + .index_select(2, x_int.to(dtype=torch.int64))[:num_keypoints, 0, 0] + return xy_preds_i, end_scores_i + + +@torch.jit.script +def _onnx_heatmaps_to_keypoints_loop(maps, rois, widths_ceil, heights_ceil, + widths, heights, offset_x, offset_y, num_keypoints): + xy_preds = torch.zeros((0, 3, int(num_keypoints)), dtype=torch.float32, device=maps.device) + end_scores = torch.zeros((0, int(num_keypoints)), dtype=torch.float32, device=maps.device) + + for i in range(int(rois.size(0))): + xy_preds_i, end_scores_i = _onnx_heatmaps_to_keypoints(maps, maps[i], + widths_ceil[i], heights_ceil[i], + widths[i], heights[i], + offset_x[i], offset_y[i]) + xy_preds = torch.cat((xy_preds.to(dtype=torch.float32), + xy_preds_i.unsqueeze(0).to(dtype=torch.float32)), 0) + end_scores = torch.cat((end_scores.to(dtype=torch.float32), + end_scores_i.to(dtype=torch.float32).unsqueeze(0)), 0) + return xy_preds, end_scores + + +def heatmaps_to_keypoints(maps, rois): + """Extract predicted keypoint locations from heatmaps. Output has shape + (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob) + for each keypoint. + """ + # This function converts a discrete image coordinate in a HEATMAP_SIZE x + # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain + # consistency with keypoints_to_heatmap_labels by using the conversion from + # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a + # continuous coordinate. + offset_x = rois[:, 0] + offset_y = rois[:, 1] + + widths = rois[:, 2] - rois[:, 0] + heights = rois[:, 3] - rois[:, 1] + widths = widths.clamp(min=1) + heights = heights.clamp(min=1) + widths_ceil = widths.ceil() + heights_ceil = heights.ceil() + + num_keypoints = maps.shape[1] + + if torchvision._is_tracing(): + xy_preds, end_scores = _onnx_heatmaps_to_keypoints_loop(maps, rois, + widths_ceil, heights_ceil, widths, heights, + offset_x, offset_y, + torch.scalar_tensor(num_keypoints, dtype=torch.int64)) + return xy_preds.permute(0, 2, 1), end_scores + + xy_preds = torch.zeros((len(rois), 3, num_keypoints), dtype=torch.float32, device=maps.device) + end_scores = torch.zeros((len(rois), num_keypoints), dtype=torch.float32, device=maps.device) + for i in range(len(rois)): + roi_map_width = int(widths_ceil[i].item()) + roi_map_height = int(heights_ceil[i].item()) + width_correction = widths[i] / roi_map_width + height_correction = heights[i] / roi_map_height + roi_map = torch.nn.functional.interpolate( + maps[i][None], size=(roi_map_height, roi_map_width), mode='bicubic', align_corners=False)[0] + # roi_map_probs = scores_to_probs(roi_map.copy()) + w = roi_map.shape[2] + pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1) + + x_int = pos % w + y_int = (pos - x_int) // w + # assert (roi_map_probs[k, y_int, x_int] == + # roi_map_probs[k, :, :].max()) + x = (x_int.float() + 0.5) * width_correction + y = (y_int.float() + 0.5) * height_correction + xy_preds[i, 0, :] = x + offset_x[i] + xy_preds[i, 1, :] = y + offset_y[i] + xy_preds[i, 2, :] = 1 + end_scores[i, :] = roi_map[torch.arange(num_keypoints), y_int, x_int] + + return xy_preds.permute(0, 2, 1), end_scores + + +def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched_idxs): + # type: (Tensor, List[Tensor], List[Tensor], List[Tensor]) + N, K, H, W = keypoint_logits.shape + assert H == W + discretization_size = H + heatmaps = [] + valid = [] + for proposals_per_image, gt_kp_in_image, midx in zip(proposals, gt_keypoints, keypoint_matched_idxs): + kp = gt_kp_in_image[midx] + heatmaps_per_image, valid_per_image = keypoints_to_heatmap( + kp, proposals_per_image, discretization_size + ) + heatmaps.append(heatmaps_per_image.view(-1)) + valid.append(valid_per_image.view(-1)) + + keypoint_targets = torch.cat(heatmaps, dim=0) + valid = torch.cat(valid, dim=0).to(dtype=torch.uint8) + valid = torch.nonzero(valid).squeeze(1) + + # torch.mean (in binary_cross_entropy_with_logits) does'nt + # accept empty tensors, so handle it sepaartely + if keypoint_targets.numel() == 0 or len(valid) == 0: + return keypoint_logits.sum() * 0 + + keypoint_logits = keypoint_logits.view(N * K, H * W) + + keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid]) + return keypoint_loss + + +def keypointrcnn_inference(x, boxes): + # type: (Tensor, List[Tensor]) + kp_probs = [] + kp_scores = [] + + boxes_per_image = [box.size(0) for box in boxes] + + if len(boxes_per_image) == 1: + # TODO : remove when dynamic split supported in ONNX + kp_prob, scores = heatmaps_to_keypoints(x, boxes[0]) + return [kp_prob], [scores] + + x2 = x.split(boxes_per_image, dim=0) + + for xx, bb in zip(x2, boxes): + kp_prob, scores = heatmaps_to_keypoints(xx, bb) + kp_probs.append(kp_prob) + kp_scores.append(scores) + + return kp_probs, kp_scores + + +def _onnx_expand_boxes(boxes, scale): + # type: (Tensor, float) + w_half = (boxes[:, 2] - boxes[:, 0]) * .5 + h_half = (boxes[:, 3] - boxes[:, 1]) * .5 + x_c = (boxes[:, 2] + boxes[:, 0]) * .5 + y_c = (boxes[:, 3] + boxes[:, 1]) * .5 + + w_half = w_half.to(dtype=torch.float32) * scale + h_half = h_half.to(dtype=torch.float32) * scale + + boxes_exp0 = x_c - w_half + boxes_exp1 = y_c - h_half + boxes_exp2 = x_c + w_half + boxes_exp3 = y_c + h_half + boxes_exp = torch.stack((boxes_exp0, boxes_exp1, boxes_exp2, boxes_exp3), 1) + return boxes_exp + + +# the next two functions should be merged inside Masker +# but are kept here for the moment while we need them +# temporarily for paste_mask_in_image +def expand_boxes(boxes, scale): + # type: (Tensor, float) + if torchvision._is_tracing(): + return _onnx_expand_boxes(boxes, scale) + w_half = (boxes[:, 2] - boxes[:, 0]) * .5 + h_half = (boxes[:, 3] - boxes[:, 1]) * .5 + x_c = (boxes[:, 2] + boxes[:, 0]) * .5 + y_c = (boxes[:, 3] + boxes[:, 1]) * .5 + + w_half *= scale + h_half *= scale + + boxes_exp = torch.zeros_like(boxes) + boxes_exp[:, 0] = x_c - w_half + boxes_exp[:, 2] = x_c + w_half + boxes_exp[:, 1] = y_c - h_half + boxes_exp[:, 3] = y_c + h_half + return boxes_exp + + +@torch.jit.unused +def expand_masks_tracing_scale(M, padding): + # type: (int, int) -> float + return torch.tensor(M + 2 * padding).to(torch.float32) / torch.tensor(M).to(torch.float32) + + +def expand_masks(mask, padding): + # type: (Tensor, int) + M = mask.shape[-1] + if torch._C._get_tracing_state(): # could not import is_tracing(), not sure why + scale = expand_masks_tracing_scale(M, padding) + else: + scale = float(M + 2 * padding) / M + padded_mask = torch.nn.functional.pad(mask, (padding,) * 4) + return padded_mask, scale + + +def paste_mask_in_image(mask, box, im_h, im_w): + # type: (Tensor, Tensor, int, int) + TO_REMOVE = 1 + w = int(box[2] - box[0] + TO_REMOVE) + h = int(box[3] - box[1] + TO_REMOVE) + w = max(w, 1) + h = max(h, 1) + + # Set shape to [batchxCxHxW] + mask = mask.expand((1, 1, -1, -1)) + + # Resize mask + mask = misc_nn_ops.interpolate(mask, size=(h, w), mode='bilinear', align_corners=False) + mask = mask[0][0] + + im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device) + x_0 = max(box[0], 0) + x_1 = min(box[2] + 1, im_w) + y_0 = max(box[1], 0) + y_1 = min(box[3] + 1, im_h) + + im_mask[y_0:y_1, x_0:x_1] = mask[ + (y_0 - box[1]):(y_1 - box[1]), (x_0 - box[0]):(x_1 - box[0]) + ] + return im_mask + + +def _onnx_paste_mask_in_image(mask, box, im_h, im_w): + one = torch.ones(1, dtype=torch.int64) + zero = torch.zeros(1, dtype=torch.int64) + + w = (box[2] - box[0] + one) + h = (box[3] - box[1] + one) + w = torch.max(torch.cat((w, one))) + h = torch.max(torch.cat((h, one))) + + # Set shape to [batchxCxHxW] + mask = mask.expand((1, 1, mask.size(0), mask.size(1))) + + # Resize mask + mask = torch.nn.functional.interpolate(mask, size=(int(h), int(w)), mode='bilinear', align_corners=False) + mask = mask[0][0] + + x_0 = torch.max(torch.cat((box[0].unsqueeze(0), zero))) + x_1 = torch.min(torch.cat((box[2].unsqueeze(0) + one, im_w.unsqueeze(0)))) + y_0 = torch.max(torch.cat((box[1].unsqueeze(0), zero))) + y_1 = torch.min(torch.cat((box[3].unsqueeze(0) + one, im_h.unsqueeze(0)))) + + unpaded_im_mask = mask[(y_0 - box[1]):(y_1 - box[1]), + (x_0 - box[0]):(x_1 - box[0])] + + # TODO : replace below with a dynamic padding when support is added in ONNX + + # pad y + zeros_y0 = torch.zeros(y_0, unpaded_im_mask.size(1)) + zeros_y1 = torch.zeros(im_h - y_1, unpaded_im_mask.size(1)) + concat_0 = torch.cat((zeros_y0, + unpaded_im_mask.to(dtype=torch.float32), + zeros_y1), 0)[0:im_h, :] + # pad x + zeros_x0 = torch.zeros(concat_0.size(0), x_0) + zeros_x1 = torch.zeros(concat_0.size(0), im_w - x_1) + im_mask = torch.cat((zeros_x0, + concat_0, + zeros_x1), 1)[:, :im_w] + return im_mask + + +@torch.jit.script +def _onnx_paste_masks_in_image_loop(masks, boxes, im_h, im_w): + res_append = torch.zeros(0, im_h, im_w) + for i in range(masks.size(0)): + mask_res = _onnx_paste_mask_in_image(masks[i][0], boxes[i], im_h, im_w) + mask_res = mask_res.unsqueeze(0) + res_append = torch.cat((res_append, mask_res)) + return res_append + + +def paste_masks_in_image(masks, boxes, img_shape, padding=1): + # type: (Tensor, Tensor, Tuple[int, int], int) + masks, scale = expand_masks(masks, padding=padding) + boxes = expand_boxes(boxes, scale).to(dtype=torch.int64) + im_h, im_w = img_shape + + if torchvision._is_tracing(): + return _onnx_paste_masks_in_image_loop(masks, boxes, + torch.scalar_tensor(im_h, dtype=torch.int64), + torch.scalar_tensor(im_w, dtype=torch.int64))[:, None] + res = [ + paste_mask_in_image(m[0], b, im_h, im_w) + for m, b in zip(masks, boxes) + ] + if len(res) > 0: + ret = torch.stack(res, dim=0)[:, None] + else: + ret = masks.new_empty((0, 1, im_h, im_w)) + return ret + + +class RoIHeads(torch.nn.Module): + __annotations__ = { + 'box_coder': det_utils.BoxCoder, + 'proposal_matcher': det_utils.Matcher, + 'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler, + } + + def __init__(self, + box_roi_pool, + box_head, + box_predictor, + # Faster R-CNN training + fg_iou_thresh, bg_iou_thresh, + batch_size_per_image, positive_fraction, + bbox_reg_weights, + # Faster R-CNN inference + score_thresh, + nms_thresh, + detections_per_img, + # Mask + mask_roi_pool=None, + mask_head=None, + mask_predictor=None, + keypoint_roi_pool=None, + keypoint_head=None, + keypoint_predictor=None, + ): + super(RoIHeads, self).__init__() + + self.box_similarity = box_ops.box_iou + # assign ground-truth boxes for each proposal + self.proposal_matcher = det_utils.Matcher( + fg_iou_thresh, + bg_iou_thresh, + allow_low_quality_matches=False) + + self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( + batch_size_per_image, + positive_fraction) + + if bbox_reg_weights is None: + bbox_reg_weights = (10., 10., 5., 5.) + self.box_coder = det_utils.BoxCoder(bbox_reg_weights) + + self.box_roi_pool = box_roi_pool + self.box_head = box_head + self.box_predictor = box_predictor + + self.score_thresh = score_thresh + self.nms_thresh = nms_thresh + self.detections_per_img = detections_per_img + + self.mask_roi_pool = mask_roi_pool + self.mask_head = mask_head + self.mask_predictor = mask_predictor + + self.keypoint_roi_pool = keypoint_roi_pool + self.keypoint_head = keypoint_head + self.keypoint_predictor = keypoint_predictor + + def has_mask(self): + if self.mask_roi_pool is None: + return False + if self.mask_head is None: + return False + if self.mask_predictor is None: + return False + return True + + def has_keypoint(self): + if self.keypoint_roi_pool is None: + return False + if self.keypoint_head is None: + return False + if self.keypoint_predictor is None: + return False + return True + + def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels): + # type: (List[Tensor], List[Tensor], List[Tensor]) + matched_idxs = [] + labels = [] + for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels): + + if gt_boxes_in_image.numel() == 0: + # Background image + device = proposals_in_image.device + clamped_matched_idxs_in_image = torch.zeros( + (proposals_in_image.shape[0],), dtype=torch.int64, device=device + ) + labels_in_image = torch.zeros( + (proposals_in_image.shape[0],), dtype=torch.int64, device=device + ) + else: + # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands + match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image) + matched_idxs_in_image = self.proposal_matcher(match_quality_matrix) + + clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0) + + labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image] + labels_in_image = labels_in_image.to(dtype=torch.int64) + + # Label background (below the low threshold) + bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD + labels_in_image[bg_inds] = torch.tensor(0) + + # Label ignore proposals (between low and high thresholds) + ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS + labels_in_image[ignore_inds] = torch.tensor(-1) # -1 is ignored by sampler + + matched_idxs.append(clamped_matched_idxs_in_image) + labels.append(labels_in_image) + return matched_idxs, labels + + def subsample(self, labels): + # type: (List[Tensor]) + sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) + sampled_inds = [] + for img_idx, (pos_inds_img, neg_inds_img) in enumerate( + zip(sampled_pos_inds, sampled_neg_inds) + ): + img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1) + sampled_inds.append(img_sampled_inds) + return sampled_inds + + def add_gt_proposals(self, proposals, gt_boxes): + # type: (List[Tensor], List[Tensor]) + proposals = [ + torch.cat((proposal, gt_box)) + for proposal, gt_box in zip(proposals, gt_boxes) + ] + + return proposals + + def DELTEME_all(self, the_list): + # type: (List[bool]) + for i in the_list: + if not i: + return False + return True + + def check_targets(self, targets): + # type: (Optional[List[Dict[str, Tensor]]]) + assert targets is not None + assert self.DELTEME_all(["boxes" in t for t in targets]) + assert self.DELTEME_all(["labels" in t for t in targets]) + if self.has_mask(): + assert self.DELTEME_all(["masks" in t for t in targets]) + + def select_training_samples(self, proposals, targets): + # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) + self.check_targets(targets) + assert targets is not None + dtype = proposals[0].dtype + device = proposals[0].device + + gt_boxes = [t["boxes"].to(dtype) for t in targets] + gt_labels = [t["labels"] for t in targets] + + # append ground-truth bboxes to propos + proposals = self.add_gt_proposals(proposals, gt_boxes) + + # get matching gt indices for each proposal + matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels) + # sample a fixed proportion of positive-negative proposals + sampled_inds = self.subsample(labels) + matched_gt_boxes = [] + num_images = len(proposals) + for img_id in range(num_images): + img_sampled_inds = sampled_inds[img_id] + proposals[img_id] = proposals[img_id][img_sampled_inds] + labels[img_id] = labels[img_id][img_sampled_inds] + matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds] + + gt_boxes_in_image = gt_boxes[img_id] + if gt_boxes_in_image.numel() == 0: + gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device) + matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]]) + + regression_targets = self.box_coder.encode(matched_gt_boxes, proposals) + return proposals, matched_idxs, labels, regression_targets + + def postprocess_detections(self, class_logits, box_regression, proposals, image_shapes): + # type: (Tensor, Tensor, List[Tensor], List[Tuple[int, int]]) + device = class_logits.device + num_classes = class_logits.shape[-1] + + boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals] + pred_boxes = self.box_coder.decode(box_regression, proposals) + + pred_scores = F.softmax(class_logits, -1) + + pred_boxes_list = pred_boxes.split(boxes_per_image, 0) + pred_scores_list = pred_scores.split(boxes_per_image, 0) + + all_boxes = [] + all_scores = [] + all_labels = [] + for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes): + boxes = box_ops.clip_boxes_to_image(boxes, image_shape) + + # create labels for each prediction + labels = torch.arange(num_classes, device=device) + labels = labels.view(1, -1).expand_as(scores) + + # remove predictions with the background label + boxes = boxes[:, 1:] + scores = scores[:, 1:] + labels = labels[:, 1:] + + # batch everything, by making every class prediction be a separate instance + boxes = boxes.reshape(-1, 4) + scores = scores.reshape(-1) + labels = labels.reshape(-1) + + # remove low scoring boxes + inds = torch.nonzero(scores > self.score_thresh).squeeze(1) + boxes, scores, labels = boxes[inds], scores[inds], labels[inds] + + # remove empty boxes + keep = box_ops.remove_small_boxes(boxes, min_size=1e-2) + boxes, scores, labels = boxes[keep], scores[keep], labels[keep] + + # non-maximum suppression, independently done per class + keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh) + # keep only topk scoring predictions + keep = keep[:self.detections_per_img] + boxes, scores, labels = boxes[keep], scores[keep], labels[keep] + + all_boxes.append(boxes) + all_scores.append(scores) + all_labels.append(labels) + + return all_boxes, all_scores, all_labels + + def forward(self, features, proposals, image_shapes, targets=None): + # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]], Optional[List[Dict[str, Tensor]]]) + """ + Arguments: + features (List[Tensor]) + proposals (List[Tensor[N, 4]]) + image_shapes (List[Tuple[H, W]]) + targets (List[Dict]) + """ + if targets is not None: + for t in targets: + # TODO: https://github.com/pytorch/pytorch/issues/26731 + floating_point_types = (torch.float, torch.double, torch.half) + assert t["boxes"].dtype in floating_point_types, 'target boxes must of float type' + assert t["labels"].dtype == torch.int64, 'target labels must of int64 type' + if self.has_keypoint(): + assert t["keypoints"].dtype == torch.float32, 'target keypoints must of float type' + + if self.training: + proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets) + else: + labels = None + regression_targets = None + matched_idxs = None + + box_features = self.box_roi_pool(features, proposals, image_shapes) + box_features = self.box_head(box_features) + class_logits, box_regression = self.box_predictor(box_features) + + result = torch.jit.annotate(List[Dict[str, torch.Tensor]], []) + losses = {} + if self.training: + assert labels is not None and regression_targets is not None + loss_classifier, loss_box_reg = fastrcnn_loss( + class_logits, box_regression, labels, regression_targets) + losses = { + "loss_classifier": loss_classifier, + "loss_box_reg": loss_box_reg + } + else: + boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes) + num_images = len(boxes) + for i in range(num_images): + result.append( + { + "boxes": boxes[i], + "labels": labels[i], + "scores": scores[i], + } + ) + + if self.has_mask(): + mask_proposals = [p["boxes"] for p in result] + if self.training: + assert matched_idxs is not None + # during training, only focus on positive boxes + num_images = len(proposals) + mask_proposals = [] + pos_matched_idxs = [] + for img_id in range(num_images): + pos = torch.nonzero(labels[img_id] > 0).squeeze(1) + mask_proposals.append(proposals[img_id][pos]) + pos_matched_idxs.append(matched_idxs[img_id][pos]) + else: + pos_matched_idxs = None + + if self.mask_roi_pool is not None: + mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes) + mask_features = self.mask_head(mask_features) + mask_logits = self.mask_predictor(mask_features) + else: + mask_logits = torch.tensor(0) + raise Exception("Expected mask_roi_pool to be not None") + + loss_mask = {} + if self.training: + assert targets is not None + assert pos_matched_idxs is not None + assert mask_logits is not None + + gt_masks = [t["masks"] for t in targets] + gt_labels = [t["labels"] for t in targets] + rcnn_loss_mask = maskrcnn_loss( + mask_logits, mask_proposals, + gt_masks, gt_labels, pos_matched_idxs) + loss_mask = { + "loss_mask": rcnn_loss_mask + } + else: + labels = [r["labels"] for r in result] + masks_probs = maskrcnn_inference(mask_logits, labels) + for mask_prob, r in zip(masks_probs, result): + r["masks"] = mask_prob + + losses.update(loss_mask) + + # keep none checks in if conditional so torchscript will conditionally + # compile each branch + if self.keypoint_roi_pool is not None and self.keypoint_head is not None \ + and self.keypoint_predictor is not None: + keypoint_proposals = [p["boxes"] for p in result] + if self.training: + # during training, only focus on positive boxes + num_images = len(proposals) + keypoint_proposals = [] + pos_matched_idxs = [] + assert matched_idxs is not None + for img_id in range(num_images): + pos = torch.nonzero(labels[img_id] > 0).squeeze(1) + keypoint_proposals.append(proposals[img_id][pos]) + pos_matched_idxs.append(matched_idxs[img_id][pos]) + else: + pos_matched_idxs = None + + keypoint_features = self.keypoint_roi_pool(features, keypoint_proposals, image_shapes) + keypoint_features = self.keypoint_head(keypoint_features) + keypoint_logits = self.keypoint_predictor(keypoint_features) + + loss_keypoint = {} + if self.training: + assert targets is not None + assert pos_matched_idxs is not None + + gt_keypoints = [t["keypoints"] for t in targets] + rcnn_loss_keypoint = keypointrcnn_loss( + keypoint_logits, keypoint_proposals, + gt_keypoints, pos_matched_idxs) + loss_keypoint = { + "loss_keypoint": rcnn_loss_keypoint + } + else: + assert keypoint_logits is not None + assert keypoint_proposals is not None + + keypoints_probs, kp_scores = keypointrcnn_inference(keypoint_logits, keypoint_proposals) + for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result): + r["keypoints"] = keypoint_prob + r["keypoints_scores"] = kps + + losses.update(loss_keypoint) + + return result, losses diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py new file mode 100644 index 0000000000..381bc77084 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py @@ -0,0 +1,501 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch +from torch.nn import functional as F +from torch import nn, Tensor + +import torchvision +from torchvision.ops import boxes as box_ops + +from . import _utils as det_utils +from .image_list import ImageList + +from torch.jit.annotations import List, Optional, Dict, Tuple + + +@torch.jit.unused +def _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n): + # type: (Tensor, int) -> Tuple[int, int] + from torch.onnx import operators + num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0) + # TODO : remove cast to IntTensor/num_anchors.dtype when + # ONNX Runtime version is updated with ReduceMin int64 support + pre_nms_top_n = torch.min(torch.cat( + (torch.tensor([orig_pre_nms_top_n], dtype=num_anchors.dtype), + num_anchors), 0).to(torch.int32)).to(num_anchors.dtype) + + return num_anchors, pre_nms_top_n + + +class AnchorGenerator(nn.Module): + __annotations__ = { + "cell_anchors": Optional[List[torch.Tensor]], + "_cache": Dict[str, List[torch.Tensor]] + } + + """ + Module that generates anchors for a set of feature maps and + image sizes. + + The module support computing anchors at multiple sizes and aspect ratios + per feature map. + + sizes and aspect_ratios should have the same number of elements, and it should + correspond to the number of feature maps. + + sizes[i] and aspect_ratios[i] can have an arbitrary number of elements, + and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors + per spatial location for feature map i. + + Arguments: + sizes (Tuple[Tuple[int]]): + aspect_ratios (Tuple[Tuple[float]]): + """ + + def __init__( + self, + sizes=(128, 256, 512), + aspect_ratios=(0.5, 1.0, 2.0), + ): + super(AnchorGenerator, self).__init__() + + if not isinstance(sizes[0], (list, tuple)): + # TODO change this + sizes = tuple((s,) for s in sizes) + if not isinstance(aspect_ratios[0], (list, tuple)): + aspect_ratios = (aspect_ratios,) * len(sizes) + + assert len(sizes) == len(aspect_ratios) + + self.sizes = sizes + self.aspect_ratios = aspect_ratios + self.cell_anchors = None + self._cache = {} + + # TODO: https://github.com/pytorch/pytorch/issues/26792 + # For every (aspect_ratios, scales) combination, output a zero-centered anchor with those values. + # (scales, aspect_ratios) are usually an element of zip(self.scales, self.aspect_ratios) + def generate_anchors(self, scales, aspect_ratios, dtype=torch.float32, device="cpu"): + # type: (List[int], List[float], int, Device) # noqa: F821 + scales = torch.as_tensor(scales, dtype=dtype, device=device) + aspect_ratios = torch.as_tensor(aspect_ratios, dtype=dtype, device=device) + h_ratios = torch.sqrt(aspect_ratios) + w_ratios = 1 / h_ratios + + ws = (w_ratios[:, None] * scales[None, :]).view(-1) + hs = (h_ratios[:, None] * scales[None, :]).view(-1) + + base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2 + return base_anchors.round() + + def set_cell_anchors(self, dtype, device): + # type: (int, Device) -> None # noqa: F821 + if self.cell_anchors is not None: + cell_anchors = self.cell_anchors + assert cell_anchors is not None + # suppose that all anchors have the same device + # which is a valid assumption in the current state of the codebase + if cell_anchors[0].device == device: + return + + cell_anchors = [ + self.generate_anchors( + sizes, + aspect_ratios, + dtype, + device + ) + for sizes, aspect_ratios in zip(self.sizes, self.aspect_ratios) + ] + self.cell_anchors = cell_anchors + + def num_anchors_per_location(self): + return [len(s) * len(a) for s, a in zip(self.sizes, self.aspect_ratios)] + + # For every combination of (a, (g, s), i) in (self.cell_anchors, zip(grid_sizes, strides), 0:2), + # output g[i] anchors that are s[i] distance apart in direction i, with the same dimensions as a. + def grid_anchors(self, grid_sizes, strides): + # type: (List[List[int]], List[List[Tensor]]) + anchors = [] + cell_anchors = self.cell_anchors + assert cell_anchors is not None + + for size, stride, base_anchors in zip( + grid_sizes, strides, cell_anchors + ): + grid_height, grid_width = size + stride_height, stride_width = stride + device = base_anchors.device + + # For output anchor, compute [x_center, y_center, x_center, y_center] + shifts_x = torch.arange( + 0, grid_width, dtype=torch.float32, device=device + ) * stride_width + shifts_y = torch.arange( + 0, grid_height, dtype=torch.float32, device=device + ) * stride_height + shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) + shift_x = shift_x.reshape(-1) + shift_y = shift_y.reshape(-1) + shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1) + + # For every (base anchor, output anchor) pair, + # offset each zero-centered base anchor by the center of the output anchor. + anchors.append( + (shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4) + ) + + return anchors + + def cached_grid_anchors(self, grid_sizes, strides): + # type: (List[List[int]], List[List[Tensor]]) + key = str(grid_sizes) + str(strides) + if key in self._cache: + return self._cache[key] + anchors = self.grid_anchors(grid_sizes, strides) + self._cache[key] = anchors + return anchors + + def forward(self, image_list, feature_maps): + # type: (ImageList, List[Tensor]) + grid_sizes = list([feature_map.shape[-2:] for feature_map in feature_maps]) + image_size = image_list.tensors.shape[-2:] + dtype, device = feature_maps[0].dtype, feature_maps[0].device + strides = [[torch.tensor(image_size[0] / g[0], dtype=torch.int64, device=device), + torch.tensor(image_size[1] / g[1], dtype=torch.int64, device=device)] for g in grid_sizes] + self.set_cell_anchors(dtype, device) + anchors_over_all_feature_maps = self.cached_grid_anchors(grid_sizes, strides) + anchors = torch.jit.annotate(List[List[torch.Tensor]], []) + for i, (image_height, image_width) in enumerate(image_list.image_sizes): + anchors_in_image = [] + for anchors_per_feature_map in anchors_over_all_feature_maps: + anchors_in_image.append(anchors_per_feature_map) + anchors.append(anchors_in_image) + anchors = [torch.cat(anchors_per_image) for anchors_per_image in anchors] + # Clear the cache in case that memory leaks. + self._cache.clear() + return anchors + + +class RPNHead(nn.Module): + """ + Adds a simple RPN Head with classification and regression heads + + Arguments: + in_channels (int): number of channels of the input feature + num_anchors (int): number of anchors to be predicted + """ + + def __init__(self, in_channels, num_anchors): + super(RPNHead, self).__init__() + self.conv = nn.Conv2d( + in_channels, in_channels, kernel_size=3, stride=1, padding=1 + ) + self.cls_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1) + self.bbox_pred = nn.Conv2d( + in_channels, num_anchors * 4, kernel_size=1, stride=1 + ) + + for l in self.children(): + torch.nn.init.normal_(l.weight, std=0.01) + torch.nn.init.constant_(l.bias, 0) + + def forward(self, x): + # type: (List[Tensor]) + logits = [] + bbox_reg = [] + for feature in x: + t = F.relu(self.conv(feature)) + logits.append(self.cls_logits(t)) + bbox_reg.append(self.bbox_pred(t)) + return logits, bbox_reg + + +def permute_and_flatten(layer, N, A, C, H, W): + # type: (Tensor, int, int, int, int, int) + layer = layer.view(N, -1, C, H, W) + layer = layer.permute(0, 3, 4, 1, 2) + layer = layer.reshape(N, -1, C) + return layer + + +def concat_box_prediction_layers(box_cls, box_regression): + # type: (List[Tensor], List[Tensor]) + box_cls_flattened = [] + box_regression_flattened = [] + # for each feature level, permute the outputs to make them be in the + # same format as the labels. Note that the labels are computed for + # all feature levels concatenated, so we keep the same representation + # for the objectness and the box_regression + for box_cls_per_level, box_regression_per_level in zip( + box_cls, box_regression + ): + N, AxC, H, W = box_cls_per_level.shape + Ax4 = box_regression_per_level.shape[1] + A = Ax4 // 4 + C = AxC // A + box_cls_per_level = permute_and_flatten( + box_cls_per_level, N, A, C, H, W + ) + box_cls_flattened.append(box_cls_per_level) + + box_regression_per_level = permute_and_flatten( + box_regression_per_level, N, A, 4, H, W + ) + box_regression_flattened.append(box_regression_per_level) + # concatenate on the first dimension (representing the feature levels), to + # take into account the way the labels were generated (with all feature maps + # being concatenated as well) + box_cls = torch.cat(box_cls_flattened, dim=1).flatten(0, -2) + box_regression = torch.cat(box_regression_flattened, dim=1).reshape(-1, 4) + return box_cls, box_regression + + +class RegionProposalNetwork(torch.nn.Module): + """ + Implements Region Proposal Network (RPN). + + Arguments: + anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature + maps. + head (nn.Module): module that computes the objectness and regression deltas + fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be + considered as positive during training of the RPN. + bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be + considered as negative during training of the RPN. + batch_size_per_image (int): number of anchors that are sampled during training of the RPN + for computing the loss + positive_fraction (float): proportion of positive anchors in a mini-batch during training + of the RPN + pre_nms_top_n (Dict[int]): number of proposals to keep before applying NMS. It should + contain two fields: training and testing, to allow for different values depending + on training or evaluation + post_nms_top_n (Dict[int]): number of proposals to keep after applying NMS. It should + contain two fields: training and testing, to allow for different values depending + on training or evaluation + nms_thresh (float): NMS threshold used for postprocessing the RPN proposals + + """ + __annotations__ = { + 'box_coder': det_utils.BoxCoder, + 'proposal_matcher': det_utils.Matcher, + 'fg_bg_sampler': det_utils.BalancedPositiveNegativeSampler, + 'pre_nms_top_n': Dict[str, int], + 'post_nms_top_n': Dict[str, int], + } + + def __init__(self, + anchor_generator, + head, + # + fg_iou_thresh, bg_iou_thresh, + batch_size_per_image, positive_fraction, + # + pre_nms_top_n, post_nms_top_n, nms_thresh): + super(RegionProposalNetwork, self).__init__() + self.anchor_generator = anchor_generator + self.head = head + self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) + + # used during training + self.box_similarity = box_ops.box_iou + + self.proposal_matcher = det_utils.Matcher( + fg_iou_thresh, + bg_iou_thresh, + allow_low_quality_matches=True, + ) + + self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( + batch_size_per_image, positive_fraction + ) + # used during testing + self._pre_nms_top_n = pre_nms_top_n + self._post_nms_top_n = post_nms_top_n + self.nms_thresh = nms_thresh + self.min_size = 1e-3 + + def pre_nms_top_n(self): + if self.training: + return self._pre_nms_top_n['training'] + return self._pre_nms_top_n['testing'] + + def post_nms_top_n(self): + if self.training: + return self._post_nms_top_n['training'] + return self._post_nms_top_n['testing'] + + def assign_targets_to_anchors(self, anchors, targets): + # type: (List[Tensor], List[Dict[str, Tensor]]) + labels = [] + matched_gt_boxes = [] + for anchors_per_image, targets_per_image in zip(anchors, targets): + gt_boxes = targets_per_image["boxes"] + + if gt_boxes.numel() == 0: + # Background image (negative example) + device = anchors_per_image.device + matched_gt_boxes_per_image = torch.zeros(anchors_per_image.shape, dtype=torch.float32, device=device) + labels_per_image = torch.zeros((anchors_per_image.shape[0],), dtype=torch.float32, device=device) + else: + match_quality_matrix = box_ops.box_iou(gt_boxes, anchors_per_image) + matched_idxs = self.proposal_matcher(match_quality_matrix) + # get the targets corresponding GT for each proposal + # NB: need to clamp the indices because we can have a single + # GT in the image, and matched_idxs can be -2, which goes + # out of bounds + matched_gt_boxes_per_image = gt_boxes[matched_idxs.clamp(min=0)] + + labels_per_image = matched_idxs >= 0 + labels_per_image = labels_per_image.to(dtype=torch.float32) + + # Background (negative examples) + bg_indices = matched_idxs == self.proposal_matcher.BELOW_LOW_THRESHOLD + labels_per_image[bg_indices] = torch.tensor(0.0) + + # discard indices that are between thresholds + inds_to_discard = matched_idxs == self.proposal_matcher.BETWEEN_THRESHOLDS + labels_per_image[inds_to_discard] = torch.tensor(-1.0) + + labels.append(labels_per_image) + matched_gt_boxes.append(matched_gt_boxes_per_image) + return labels, matched_gt_boxes + + def _get_top_n_idx(self, objectness, num_anchors_per_level): + # type: (Tensor, List[int]) + r = [] + offset = 0 + for ob in objectness.split(num_anchors_per_level, 1): + if torchvision._is_tracing(): + num_anchors, pre_nms_top_n = _onnx_get_num_anchors_and_pre_nms_top_n(ob, self.pre_nms_top_n()) + else: + num_anchors = ob.shape[1] + pre_nms_top_n = min(self.pre_nms_top_n(), num_anchors) + _, top_n_idx = ob.topk(pre_nms_top_n, dim=1) + r.append(top_n_idx + offset) + offset += num_anchors + return torch.cat(r, dim=1) + + def filter_proposals(self, proposals, objectness, image_shapes, num_anchors_per_level): + # type: (Tensor, Tensor, List[Tuple[int, int]], List[int]) + num_images = proposals.shape[0] + device = proposals.device + # do not backprop throught objectness + objectness = objectness.detach() + objectness = objectness.reshape(num_images, -1) + + levels = [ + torch.full((n,), idx, dtype=torch.int64, device=device) + for idx, n in enumerate(num_anchors_per_level) + ] + levels = torch.cat(levels, 0) + levels = levels.reshape(1, -1).expand_as(objectness) + + # select top_n boxes independently per level before applying nms + top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level) + + image_range = torch.arange(num_images, device=device) + batch_idx = image_range[:, None] + + objectness = objectness[batch_idx, top_n_idx] + levels = levels[batch_idx, top_n_idx] + proposals = proposals[batch_idx, top_n_idx] + + final_boxes = [] + final_scores = [] + for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels, image_shapes): + boxes = box_ops.clip_boxes_to_image(boxes, img_shape) + keep = box_ops.remove_small_boxes(boxes, self.min_size) + boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep] + # non-maximum suppression, independently done per level + keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh) + # keep only topk scoring predictions + keep = keep[:self.post_nms_top_n()] + boxes, scores = boxes[keep], scores[keep] + final_boxes.append(boxes) + final_scores.append(scores) + return final_boxes, final_scores + + def compute_loss(self, objectness, pred_bbox_deltas, labels, regression_targets): + # type: (Tensor, Tensor, List[Tensor], List[Tensor]) + """ + Arguments: + objectness (Tensor) + pred_bbox_deltas (Tensor) + labels (List[Tensor]) + regression_targets (List[Tensor]) + + Returns: + objectness_loss (Tensor) + box_loss (Tensor) + """ + + sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) + sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) + sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) + + sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) + + objectness = objectness.flatten() + + labels = torch.cat(labels, dim=0) + regression_targets = torch.cat(regression_targets, dim=0) + + box_loss = F.l1_loss( + pred_bbox_deltas[sampled_pos_inds], + regression_targets[sampled_pos_inds], + reduction="sum", + ) / (sampled_inds.numel()) + + objectness_loss = F.binary_cross_entropy_with_logits( + objectness[sampled_inds], labels[sampled_inds] + ) + + return objectness_loss, box_loss + + def forward(self, images, features, targets=None): + # type: (ImageList, Dict[str, Tensor], Optional[List[Dict[str, Tensor]]]) + """ + Arguments: + images (ImageList): images for which we want to compute the predictions + features (List[Tensor]): features computed from the images that are + used for computing the predictions. Each tensor in the list + correspond to different feature levels + targets (List[Dict[Tensor]]): ground-truth boxes present in the image (optional). + If provided, each element in the dict should contain a field `boxes`, + with the locations of the ground-truth boxes. + + Returns: + boxes (List[Tensor]): the predicted boxes from the RPN, one Tensor per + image. + losses (Dict[Tensor]): the losses for the model during training. During + testing, it is an empty dict. + """ + # RPN uses all feature maps that are available + features = list(features.values()) + objectness, pred_bbox_deltas = self.head(features) + anchors = self.anchor_generator(images, features) + + num_images = len(anchors) + num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness] + num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors] + objectness, pred_bbox_deltas = \ + concat_box_prediction_layers(objectness, pred_bbox_deltas) + # apply pred_bbox_deltas to anchors to obtain the decoded proposals + # note that we detach the deltas because Faster R-CNN do not backprop through + # the proposals + proposals = self.box_coder.decode(pred_bbox_deltas.detach(), anchors) + proposals = proposals.view(num_images, -1, 4) + boxes, scores = self.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level) + + losses = {} + if self.training: + assert targets is not None + labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets) + regression_targets = self.box_coder.encode(matched_gt_boxes, anchors) + loss_objectness, loss_rpn_box_reg = self.compute_loss( + objectness, pred_bbox_deltas, labels, regression_targets) + losses = { + "loss_objectness": loss_objectness, + "loss_rpn_box_reg": loss_rpn_box_reg, + } + return boxes, losses diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py new file mode 100644 index 0000000000..91a5ae5cda --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py @@ -0,0 +1,638 @@ +import warnings +from collections import OrderedDict +from typing import Any, Dict, List, Optional, Tuple +import pdb +import torch +import time +import torch.nn.functional as F +from torch import nn, Tensor +import numpy as np +import sys + +from ..._internally_replaced_utils import load_state_dict_from_url +from ...ops import boxes as box_ops +from ...utils import _log_api_usage_once +from .. import vgg +from . import _utils as det_utils +from .anchor_utils import DefaultBoxGenerator +from .backbone_utils import _validate_trainable_layers +from .transform import GeneralizedRCNNTransform + + +# for servers to immediately record the logs +def flush_print(func): + def new_print(*args, **kwargs): + func(*args, **kwargs) + sys.stdout.flush() + return new_print +print = flush_print(print) + + +__all__ = ["SSD", "ssd300_vgg16"] + +model_urls = { + "ssd300_vgg16_coco": "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth", +} + +backbone_urls = { + # We port the features of a VGG16 backbone trained by amdegroot because unlike the one on TorchVision, it uses the + # same input standardization method as the paper. Ref: https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth + # Only the `features` weights have proper values, those on the `classifier` module are filled with nans. + "vgg16_features": "https://download.pytorch.org/models/vgg16_features-amdegroot-88682ab5.pth" +} + + +def _xavier_init(conv: nn.Module): + for layer in conv.modules(): + if isinstance(layer, nn.Conv2d): + torch.nn.init.xavier_uniform_(layer.weight) + if layer.bias is not None: + torch.nn.init.constant_(layer.bias, 0.0) + + +class SSDHead(nn.Module): + def __init__(self, in_channels: List[int], num_anchors: List[int], num_classes: int): + super().__init__() + self.classification_head = SSDClassificationHead(in_channels, num_anchors, num_classes) + self.regression_head = SSDRegressionHead(in_channels, num_anchors) + + def forward(self, x: List[Tensor]) -> Dict[str, Tensor]: + return { + "bbox_regression": self.regression_head(x), + "cls_logits": self.classification_head(x), + } + + +class SSDScoringHead(nn.Module): + def __init__(self, module_list: nn.ModuleList, num_columns: int): + super().__init__() + self.module_list = module_list + self.num_columns = num_columns + + def _get_result_from_module_list(self, x: Tensor, idx: int) -> Tensor: + """ + This is equivalent to self.module_list[idx](x), + but torchscript doesn't support this yet + """ + num_blocks = len(self.module_list) + if idx < 0: + idx += num_blocks + out = x + for i, module in enumerate(self.module_list): + if i == idx: + out = module(x) + return out + + def forward(self, x: List[Tensor]) -> Tensor: + all_results = [] + + for i, features in enumerate(x): + results = self._get_result_from_module_list(features, i) + + # Permute output from (N, A * K, H, W) to (N, HWA, K). + N, _, H, W = results.shape + results = results.view(N, -1, self.num_columns, H, W) + results = results.permute(0, 3, 4, 1, 2) + results = results.reshape(N, -1, self.num_columns) # Size=(N, HWA, K) + + all_results.append(results) + + return torch.cat(all_results, dim=1) + + +class SSDClassificationHead(SSDScoringHead): + def __init__(self, in_channels: List[int], num_anchors: List[int], num_classes: int): + cls_logits = nn.ModuleList() + for channels, anchors in zip(in_channels, num_anchors): + cls_logits.append(nn.Conv2d(channels, num_classes * anchors, kernel_size=3, padding=1)) + _xavier_init(cls_logits) + super().__init__(cls_logits, num_classes) + + +class SSDRegressionHead(SSDScoringHead): + def __init__(self, in_channels: List[int], num_anchors: List[int]): + bbox_reg = nn.ModuleList() + for channels, anchors in zip(in_channels, num_anchors): + bbox_reg.append(nn.Conv2d(channels, 4 * anchors, kernel_size=3, padding=1)) + _xavier_init(bbox_reg) + super().__init__(bbox_reg, 4) + + +class SSD(nn.Module): + """ + Implements SSD architecture from `"SSD: Single Shot MultiBox Detector" `_. + + The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each + image, and should be in 0-1 range. Different images can have different sizes but they will be resized + to a fixed size before passing it to the backbone. + + The behavior of the model changes depending if it is in training or evaluation mode. + + During training, the model expects both the input tensors, as well as a targets (list of dictionary), + containing: + - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with + ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. + - labels (Int64Tensor[N]): the class label for each ground-truth box + + The model returns a Dict[Tensor] during training, containing the classification and regression + losses. + + During inference, the model requires only the input tensors, and returns the post-processed + predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as + follows, where ``N`` is the number of detections: + + - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with + ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. + - labels (Int64Tensor[N]): the predicted labels for each detection + - scores (Tensor[N]): the scores for each detection + + Args: + backbone (nn.Module): the network used to compute the features for the model. + It should contain an out_channels attribute with the list of the output channels of + each feature map. The backbone should return a single Tensor or an OrderedDict[Tensor]. + anchor_generator (DefaultBoxGenerator): module that generates the default boxes for a + set of feature maps. + size (Tuple[int, int]): the width and height to which images will be rescaled before feeding them + to the backbone. + num_classes (int): number of output classes of the model (including the background). + image_mean (Tuple[float, float, float]): mean values used for input normalization. + They are generally the mean values of the dataset on which the backbone has been trained + on + image_std (Tuple[float, float, float]): std values used for input normalization. + They are generally the std values of the dataset on which the backbone has been trained on + head (nn.Module, optional): Module run on top of the backbone features. Defaults to a module containing + a classification and regression module. + score_thresh (float): Score threshold used for postprocessing the detections. + nms_thresh (float): NMS threshold used for postprocessing the detections. + detections_per_img (int): Number of best detections to keep after NMS. + iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be + considered as positive during training. + topk_candidates (int): Number of best detections to keep before NMS. + positive_fraction (float): a number between 0 and 1 which indicates the proportion of positive + proposals used during the training of the classification head. It is used to estimate the negative to + positive ratio. + """ + + __annotations__ = { + "box_coder": det_utils.BoxCoder, + "proposal_matcher": det_utils.Matcher, + } + + def __init__( + self, + backbone: nn.Module, + anchor_generator: DefaultBoxGenerator, + size: Tuple[int, int], + num_classes: int, + image_mean: Optional[List[float]] = None, + image_std: Optional[List[float]] = None, + head: Optional[nn.Module] = None, + score_thresh: float = 0.01, + nms_thresh: float = 0.45, + detections_per_img: int = 200, + iou_thresh: float = 0.5, + topk_candidates: int = 400, + positive_fraction: float = 0.25, + ): + super().__init__() + _log_api_usage_once(self) + + self.backbone = backbone + + self.anchor_generator = anchor_generator + + self.box_coder = det_utils.BoxCoder(weights=(10.0, 10.0, 5.0, 5.0)) + + if head is None: + if hasattr(backbone, "out_channels"): + out_channels = backbone.out_channels + else: + out_channels = det_utils.retrieve_out_channels(backbone, size) + + assert len(out_channels) == len(anchor_generator.aspect_ratios) + + num_anchors = self.anchor_generator.num_anchors_per_location() + head = SSDHead(out_channels, num_anchors, num_classes) + self.head = head + + self.proposal_matcher = det_utils.SSDMatcher(iou_thresh) + + if image_mean is None: + image_mean = [0.485, 0.456, 0.406] + if image_std is None: + image_std = [0.229, 0.224, 0.225] + self.transform = GeneralizedRCNNTransform( + min(size), max(size), image_mean, image_std, size_divisible=1, fixed_size=size + ) + + self.score_thresh = score_thresh + self.nms_thresh = nms_thresh + self.detections_per_img = detections_per_img + self.topk_candidates = topk_candidates + self.neg_to_pos_ratio = (1.0 - positive_fraction) / positive_fraction + + # used only on torchscript mode + self._has_warned = False + + @torch.jit.unused + def eager_outputs( + self, losses: Dict[str, Tensor], detections: List[Dict[str, Tensor]] + ) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]: + if self.training: + return losses + + return detections + + def compute_loss( + self, + targets: List[Dict[str, Tensor]], + head_outputs: Dict[str, Tensor], + anchors: List[Tensor], + matched_idxs: List[Tensor], + ) -> Dict[str, Tensor]: + bbox_regression = head_outputs["bbox_regression"] + cls_logits = head_outputs["cls_logits"] + + # Match original targets with default boxes + num_foreground = 0 + bbox_loss = [] + cls_targets = [] + for ( + targets_per_image, + bbox_regression_per_image, + cls_logits_per_image, + anchors_per_image, + matched_idxs_per_image, + ) in zip(targets, bbox_regression, cls_logits, anchors, matched_idxs): + + # produce the matching between boxes and targets + np_matched_idxs_per_image = matched_idxs_per_image.to('cpu').numpy() + np_foreground_idxs_per_image = np.where(np_matched_idxs_per_image>=0)[0] + boxes_num = len(np_foreground_idxs_per_image) + fix_boxes_num = 0 + if boxes_num <= 100: + fix_boxes_num = 100 + elif boxes_num <= 1000: + fix_boxes_num = 1000 + else: + fix_boxes_num = 3234 + + pad_idx = np.zeros(fix_boxes_num - boxes_num) + np_foreground_idxs_per_image = np.concatenate((np_foreground_idxs_per_image, pad_idx), axis=0) + foreground_idxs_per_image = torch.as_tensor(np_foreground_idxs_per_image, + dtype=matched_idxs_per_image.dtype, + device=matched_idxs_per_image.device) + foreground_matched_idxs_per_image = matched_idxs_per_image[foreground_idxs_per_image] + num_foreground += boxes_num + + # Calculate regression loss + matched_gt_boxes_per_image = targets_per_image["boxes"][foreground_matched_idxs_per_image] + bbox_regression_per_image = bbox_regression_per_image[foreground_idxs_per_image, :] + anchors_per_image = anchors_per_image[foreground_idxs_per_image, :] + target_regression = self.box_coder.encode_single(matched_gt_boxes_per_image, anchors_per_image) + bbox_regression_per_image[boxes_num:,:] = target_regression[boxes_num:,:] + bbox_loss.append( + torch.nn.functional.smooth_l1_loss(bbox_regression_per_image, target_regression, reduction="sum") + ) + + # Estimate ground truth for class targets + gt_classes_target = torch.zeros( + (cls_logits_per_image.size(0),), + dtype=targets_per_image["labels"].dtype, + device=targets_per_image["labels"].device, + ) + gt_classes_target[foreground_idxs_per_image] = targets_per_image["labels"][ + foreground_matched_idxs_per_image + ] + cls_targets.append(gt_classes_target) + + bbox_loss = torch.stack(bbox_loss) + cls_targets = torch.stack(cls_targets) + + # Calculate classification loss + num_classes = cls_logits.size(-1) + cls_loss = F.cross_entropy(cls_logits.view(-1, num_classes), cls_targets.view(-1), reduction="none").view( + cls_targets.size() + ) + + # Hard Negative Sampling + foreground_idxs = cls_targets > 0 + num_negative = self.neg_to_pos_ratio * foreground_idxs.sum(1, keepdim=True) + # num_negative[num_negative < self.neg_to_pos_ratio] = self.neg_to_pos_ratio + negative_loss = cls_loss.clone() + negative_loss[foreground_idxs] = -float(1) # use -inf to detect positive values that creeped in the sample + values, idx = negative_loss.sort(1, descending=True) + # background_idxs = torch.logical_and(idx.sort(1)[1] < num_negative, torch.isfinite(values)) + background_idxs = idx.sort(1)[1] < num_negative + + N = max(1, num_foreground) + return { + "bbox_regression": bbox_loss.sum() / N, + "classification": ((cls_loss*foreground_idxs).sum() + (cls_loss*background_idxs).sum()) / N, + } + + def forward( + self, images: List[Tensor], targets: Optional[List[Dict[str, Tensor]]] = None + ) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]: + if self.training and targets is None: + raise ValueError("In training mode, targets should be passed") + + if self.training: + assert targets is not None + for target in targets: + boxes = target["boxes"] + if isinstance(boxes, torch.Tensor): + if len(boxes.shape) != 2 or boxes.shape[-1] != 4: + raise ValueError(f"Expected target boxes to be a tensor of shape [N, 4], got {boxes.shape}.") + else: + raise ValueError(f"Expected target boxes to be of type Tensor, got {type(boxes)}.") + else: + original_image_sizes = targets + + # get the features from the backbone + features = self.backbone(images.tensors) + if isinstance(features, torch.Tensor): + features = OrderedDict([("0", features)]) + + features = list(features.values()) + + # compute the ssd heads outputs using the features + head_outputs = self.head(features) + + # create the set of anchors + anchors = self.anchor_generator(images, features) + + losses = {} + detections: List[Dict[str, Tensor]] = [] + if self.training: + assert targets is not None + matched_idxs = [] + for anchors_per_image, targets_per_image in zip(anchors, targets): + if targets_per_image["boxes"].numel() == 0: + matched_idxs.append( + torch.full((anchors_per_image.size(0),), -1, dtype=torch.int64, device=anchors_per_image.device) + ) + continue + match_quality_matrix = box_ops.box_iou(targets_per_image["boxes"], anchors_per_image) + matched_idxs.append(self.proposal_matcher(match_quality_matrix)) + losses = self.compute_loss(targets, head_outputs, anchors, matched_idxs) + else: + detections = self.postprocess_detections(head_outputs, anchors, images.image_sizes) + + detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) + + if torch.jit.is_scripting(): + if not self._has_warned: + warnings.warn("SSD always returns a (Losses, Detections) tuple in scripting") + self._has_warned = True + return losses, detections + return self.eager_outputs(losses, detections) + + def postprocess_detections( + self, head_outputs: Dict[str, Tensor], image_anchors: List[Tensor], image_shapes: List[Tuple[int, int]] + ) -> List[Dict[str, Tensor]]: + bbox_regression = head_outputs["bbox_regression"] + pred_scores = F.softmax(head_outputs["cls_logits"], dim=-1) + + num_classes = pred_scores.size(-1) + device = pred_scores.device + + detections: List[Dict[str, Tensor]] = [] + + for boxes, scores, anchors, image_shape in zip(bbox_regression, pred_scores, image_anchors, image_shapes): + boxes = self.box_coder.decode_single(boxes, anchors) + boxes = box_ops.clip_boxes_to_image(boxes, image_shape) + + image_boxes = [] + image_scores = [] + image_labels = [] + for label in range(1, num_classes): + score = scores[:, label] + + keep_idxs = score > self.score_thresh + # score = score[keep_idxs] + # box = boxes[keep_idxs] + score = score * keep_idxs + box = boxes * keep_idxs.unsqueeze(1).repeat(1, 4) + + # keep only topk scoring predictions + num_topk = min(self.topk_candidates, score.size(0)) + score, idxs = score.topk(num_topk) + box = box[idxs] + + image_boxes.append(box) + image_scores.append(score) + image_labels.append(torch.full_like(score, fill_value=label, dtype=torch.int64, device=device)) + + image_boxes = torch.cat(image_boxes, dim=0) + image_scores = torch.cat(image_scores, dim=0) + image_labels = torch.cat(image_labels, dim=0) + + # non-maximum suppression + keep = box_ops.batched_nms(image_boxes, image_scores, image_labels, self.nms_thresh) + keep = keep[: self.detections_per_img] + + detections.append( + { + "boxes": image_boxes[keep], + "scores": image_scores[keep], + "labels": image_labels[keep], + } + ) + return detections + + +class SSDFeatureExtractorVGG(nn.Module): + def __init__(self, backbone: nn.Module, highres: bool): + super().__init__() + + _, _, maxpool3_pos, maxpool4_pos, _ = (i for i, layer in enumerate(backbone) if isinstance(layer, nn.MaxPool2d)) + + # Patch ceil_mode for maxpool3 to get the same WxH output sizes as the paper + backbone[maxpool3_pos].ceil_mode = True + + # parameters used for L2 regularization + rescaling + self.scale_weight = nn.Parameter(torch.ones(512) * 20) + + # Multiple Feature maps - page 4, Fig 2 of SSD paper + self.features = nn.Sequential(*backbone[:maxpool4_pos]) # until conv4_3 + + # SSD300 case - page 4, Fig 2 of SSD paper + extra = nn.ModuleList( + [ + nn.Sequential( + nn.Conv2d(1024, 256, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 512, kernel_size=3, padding=1, stride=2), # conv8_2 + nn.ReLU(inplace=True), + ), + nn.Sequential( + nn.Conv2d(512, 128, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2), # conv9_2 + nn.ReLU(inplace=True), + ), + nn.Sequential( + nn.Conv2d(256, 128, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, 256, kernel_size=3), # conv10_2 + nn.ReLU(inplace=True), + ), + nn.Sequential( + nn.Conv2d(256, 128, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, 256, kernel_size=3), # conv11_2 + nn.ReLU(inplace=True), + ), + ] + ) + if highres: + # Additional layers for the SSD512 case. See page 11, footernote 5. + extra.append( + nn.Sequential( + nn.Conv2d(256, 128, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, 256, kernel_size=4), # conv12_2 + nn.ReLU(inplace=True), + ) + ) + _xavier_init(extra) + + fc = nn.Sequential( + nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=False), # add modified maxpool5 + nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, padding=6, dilation=6), # FC6 with atrous + nn.ReLU(inplace=True), + nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=1), # FC7 + nn.ReLU(inplace=True), + ) + _xavier_init(fc) + extra.insert( + 0, + nn.Sequential( + *backbone[maxpool4_pos:-1], # until conv5_3, skip maxpool5 + fc, + ), + ) + self.extra = extra + + def forward(self, x: Tensor) -> Dict[str, Tensor]: + # L2 regularization + Rescaling of 1st block's feature map + x = self.features(x) + rescaled = self.scale_weight.view(1, -1, 1, 1) * F.normalize(x) + output = [rescaled] + + # Calculating Feature maps for the rest blocks + for block in self.extra: + x = block(x) + output.append(x) + + return OrderedDict([(str(i), v) for i, v in enumerate(output)]) + + +def _vgg_extractor(backbone: vgg.VGG, highres: bool, trainable_layers: int): + backbone = backbone.features + # Gather the indices of maxpools. These are the locations of output blocks. + stage_indices = [0] + [i for i, b in enumerate(backbone) if isinstance(b, nn.MaxPool2d)][:-1] + num_stages = len(stage_indices) + + # find the index of the layer from which we wont freeze + assert 0 <= trainable_layers <= num_stages + freeze_before = len(backbone) if trainable_layers == 0 else stage_indices[num_stages - trainable_layers] + + for b in backbone[:freeze_before]: + for parameter in b.parameters(): + parameter.requires_grad_(False) + + return SSDFeatureExtractorVGG(backbone, highres) + + +def ssd300_vgg16( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 91, + pretrained_backbone: bool = True, + trainable_backbone_layers: Optional[int] = None, + **kwargs: Any, +): + """Constructs an SSD model with input size 300x300 and a VGG16 backbone. + + Reference: `"SSD: Single Shot MultiBox Detector" `_. + + The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each + image, and should be in 0-1 range. Different images can have different sizes but they will be resized + to a fixed size before passing it to the backbone. + + The behavior of the model changes depending if it is in training or evaluation mode. + + During training, the model expects both the input tensors, as well as a targets (list of dictionary), + containing: + + - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with + ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. + - labels (Int64Tensor[N]): the class label for each ground-truth box + + The model returns a Dict[Tensor] during training, containing the classification and regression + losses. + + During inference, the model requires only the input tensors, and returns the post-processed + predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as + follows, where ``N`` is the number of detections: + + - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with + ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. + - labels (Int64Tensor[N]): the predicted labels for each detection + - scores (Tensor[N]): the scores for each detection + + Example: + + >>> model = torchvision.models.detection.ssd300_vgg16(pretrained=True) + >>> model.eval() + >>> x = [torch.rand(3, 300, 300), torch.rand(3, 500, 400)] + >>> predictions = model(x) + + Args: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 + progress (bool): If True, displays a progress bar of the download to stderr + num_classes (int): number of output classes of the model (including the background) + pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet + trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. + Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. + """ + if "size" in kwargs: + warnings.warn("The size of the model is already fixed; ignoring the argument.") + + trainable_backbone_layers = _validate_trainable_layers( + pretrained or pretrained_backbone, trainable_backbone_layers, 5, 4 + ) + + if pretrained: + # no need to download the backbone if pretrained is set + pretrained_backbone = False + + # Use custom backbones more appropriate for SSD + backbone = vgg.vgg16(pretrained=False, progress=progress) + if pretrained_backbone: + state_dict = load_state_dict_from_url(backbone_urls["vgg16_features"], progress=progress) + backbone.load_state_dict(state_dict) + + backbone = _vgg_extractor(backbone, False, trainable_backbone_layers) + anchor_generator = DefaultBoxGenerator( + [[2], [2, 3], [2, 3], [2, 3], [2], [2]], + scales=[0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05], + steps=[8, 16, 32, 64, 100, 300], + ) + + defaults = { + # Rescale the input in a way compatible to the backbone + "image_mean": [0.48235, 0.45882, 0.40784], + "image_std": [1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0], # undo the 0-1 scaling of toTensor + } + kwargs = {**defaults, **kwargs} + model = SSD(backbone, anchor_generator, (300, 300), num_classes, **kwargs) + if pretrained: + weights_name = "ssd300_vgg16_coco" + if model_urls.get(weights_name, None) is None: + raise ValueError(f"No checkpoint is available for model {weights_name}") + state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py new file mode 100644 index 0000000000..be30bb54c4 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py @@ -0,0 +1,629 @@ +import warnings +from collections import OrderedDict +from typing import Any, Dict, List, Optional, Tuple + +import torch +import torch.nn.functional as F +from torch import nn, Tensor + +from ..._internally_replaced_utils import load_state_dict_from_url +from ...ops import boxes as box_ops +from ...utils import _log_api_usage_once +from .. import vgg +from . import _utils as det_utils +from .anchor_utils import DefaultBoxGenerator +from .backbone_utils import _validate_trainable_layers +from .transform import GeneralizedRCNNTransform + +__all__ = ["SSD", "ssd300_vgg16"] + +model_urls = { + "ssd300_vgg16_coco": "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth", +} + +backbone_urls = { + # We port the features of a VGG16 backbone trained by amdegroot because unlike the one on TorchVision, it uses the + # same input standardization method as the paper. Ref: https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth + # Only the `features` weights have proper values, those on the `classifier` module are filled with nans. + "vgg16_features": "https://download.pytorch.org/models/vgg16_features-amdegroot-88682ab5.pth" +} + + +def _xavier_init(conv: nn.Module): + for layer in conv.modules(): + if isinstance(layer, nn.Conv2d): + torch.nn.init.xavier_uniform_(layer.weight) + if layer.bias is not None: + torch.nn.init.constant_(layer.bias, 0.0) + + +class SSDHead(nn.Module): + def __init__(self, in_channels: List[int], num_anchors: List[int], num_classes: int): + super().__init__() + self.classification_head = SSDClassificationHead(in_channels, num_anchors, num_classes) + self.regression_head = SSDRegressionHead(in_channels, num_anchors) + + def forward(self, x: List[Tensor]) -> Dict[str, Tensor]: + return { + "bbox_regression": self.regression_head(x), + "cls_logits": self.classification_head(x), + } + + +class SSDScoringHead(nn.Module): + def __init__(self, module_list: nn.ModuleList, num_columns: int): + super().__init__() + self.module_list = module_list + self.num_columns = num_columns + + def _get_result_from_module_list(self, x: Tensor, idx: int) -> Tensor: + """ + This is equivalent to self.module_list[idx](x), + but torchscript doesn't support this yet + """ + num_blocks = len(self.module_list) + if idx < 0: + idx += num_blocks + out = x + for i, module in enumerate(self.module_list): + if i == idx: + out = module(x) + return out + + def forward(self, x: List[Tensor]) -> Tensor: + all_results = [] + + for i, features in enumerate(x): + results = self._get_result_from_module_list(features, i) + + # Permute output from (N, A * K, H, W) to (N, HWA, K). + N, _, H, W = results.shape + results = results.view(N, -1, self.num_columns, H, W) + results = results.permute(0, 3, 4, 1, 2) + results = results.reshape(N, -1, self.num_columns) # Size=(N, HWA, K) + + all_results.append(results) + + return torch.cat(all_results, dim=1) + + +class SSDClassificationHead(SSDScoringHead): + def __init__(self, in_channels: List[int], num_anchors: List[int], num_classes: int): + cls_logits = nn.ModuleList() + for channels, anchors in zip(in_channels, num_anchors): + cls_logits.append(nn.Conv2d(channels, num_classes * anchors, kernel_size=3, padding=1)) + _xavier_init(cls_logits) + super().__init__(cls_logits, num_classes) + + +class SSDRegressionHead(SSDScoringHead): + def __init__(self, in_channels: List[int], num_anchors: List[int]): + bbox_reg = nn.ModuleList() + for channels, anchors in zip(in_channels, num_anchors): + bbox_reg.append(nn.Conv2d(channels, 4 * anchors, kernel_size=3, padding=1)) + _xavier_init(bbox_reg) + super().__init__(bbox_reg, 4) + + +class SSD(nn.Module): + """ + Implements SSD architecture from `"SSD: Single Shot MultiBox Detector" `_. + + The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each + image, and should be in 0-1 range. Different images can have different sizes but they will be resized + to a fixed size before passing it to the backbone. + + The behavior of the model changes depending if it is in training or evaluation mode. + + During training, the model expects both the input tensors, as well as a targets (list of dictionary), + containing: + - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with + ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. + - labels (Int64Tensor[N]): the class label for each ground-truth box + + The model returns a Dict[Tensor] during training, containing the classification and regression + losses. + + During inference, the model requires only the input tensors, and returns the post-processed + predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as + follows, where ``N`` is the number of detections: + + - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with + ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. + - labels (Int64Tensor[N]): the predicted labels for each detection + - scores (Tensor[N]): the scores for each detection + + Args: + backbone (nn.Module): the network used to compute the features for the model. + It should contain an out_channels attribute with the list of the output channels of + each feature map. The backbone should return a single Tensor or an OrderedDict[Tensor]. + anchor_generator (DefaultBoxGenerator): module that generates the default boxes for a + set of feature maps. + size (Tuple[int, int]): the width and height to which images will be rescaled before feeding them + to the backbone. + num_classes (int): number of output classes of the model (including the background). + image_mean (Tuple[float, float, float]): mean values used for input normalization. + They are generally the mean values of the dataset on which the backbone has been trained + on + image_std (Tuple[float, float, float]): std values used for input normalization. + They are generally the std values of the dataset on which the backbone has been trained on + head (nn.Module, optional): Module run on top of the backbone features. Defaults to a module containing + a classification and regression module. + score_thresh (float): Score threshold used for postprocessing the detections. + nms_thresh (float): NMS threshold used for postprocessing the detections. + detections_per_img (int): Number of best detections to keep after NMS. + iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be + considered as positive during training. + topk_candidates (int): Number of best detections to keep before NMS. + positive_fraction (float): a number between 0 and 1 which indicates the proportion of positive + proposals used during the training of the classification head. It is used to estimate the negative to + positive ratio. + """ + + __annotations__ = { + "box_coder": det_utils.BoxCoder, + "proposal_matcher": det_utils.Matcher, + } + + def __init__( + self, + backbone: nn.Module, + anchor_generator: DefaultBoxGenerator, + size: Tuple[int, int], + num_classes: int, + image_mean: Optional[List[float]] = None, + image_std: Optional[List[float]] = None, + head: Optional[nn.Module] = None, + score_thresh: float = 0.01, + nms_thresh: float = 0.45, + detections_per_img: int = 200, + iou_thresh: float = 0.5, + topk_candidates: int = 400, + positive_fraction: float = 0.25, + ): + super().__init__() + _log_api_usage_once(self) + + self.backbone = backbone + + self.anchor_generator = anchor_generator + + self.box_coder = det_utils.BoxCoder(weights=(10.0, 10.0, 5.0, 5.0)) + + if head is None: + if hasattr(backbone, "out_channels"): + out_channels = backbone.out_channels + else: + out_channels = det_utils.retrieve_out_channels(backbone, size) + + assert len(out_channels) == len(anchor_generator.aspect_ratios) + + num_anchors = self.anchor_generator.num_anchors_per_location() + head = SSDHead(out_channels, num_anchors, num_classes) + self.head = head + + self.proposal_matcher = det_utils.SSDMatcher(iou_thresh) + + if image_mean is None: + image_mean = [0.485, 0.456, 0.406] + if image_std is None: + image_std = [0.229, 0.224, 0.225] + self.transform = GeneralizedRCNNTransform( + min(size), max(size), image_mean, image_std, size_divisible=1, fixed_size=size + ) + + self.score_thresh = score_thresh + self.nms_thresh = nms_thresh + self.detections_per_img = detections_per_img + self.topk_candidates = topk_candidates + self.neg_to_pos_ratio = (1.0 - positive_fraction) / positive_fraction + + # used only on torchscript mode + self._has_warned = False + + @torch.jit.unused + def eager_outputs( + self, losses: Dict[str, Tensor], detections: List[Dict[str, Tensor]] + ) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]: + if self.training: + return losses + + return detections + + def compute_loss( + self, + targets: List[Dict[str, Tensor]], + head_outputs: Dict[str, Tensor], + anchors: List[Tensor], + matched_idxs: List[Tensor], + ) -> Dict[str, Tensor]: + bbox_regression = head_outputs["bbox_regression"] + cls_logits = head_outputs["cls_logits"] + + # Match original targets with default boxes + num_foreground = 0 + bbox_loss = [] + cls_targets = [] + for ( + targets_per_image, + bbox_regression_per_image, + cls_logits_per_image, + anchors_per_image, + matched_idxs_per_image, + ) in zip(targets, bbox_regression, cls_logits, anchors, matched_idxs): + # produce the matching between boxes and targets + foreground_idxs_per_image = torch.where(matched_idxs_per_image >= 0)[0] + foreground_matched_idxs_per_image = matched_idxs_per_image[foreground_idxs_per_image] + num_foreground += foreground_matched_idxs_per_image.numel() + + # Calculate regression loss + matched_gt_boxes_per_image = targets_per_image["boxes"][foreground_matched_idxs_per_image] + bbox_regression_per_image = bbox_regression_per_image[foreground_idxs_per_image, :] + anchors_per_image = anchors_per_image[foreground_idxs_per_image, :] + target_regression = self.box_coder.encode_single(matched_gt_boxes_per_image, anchors_per_image) + bbox_loss.append( + torch.nn.functional.smooth_l1_loss(bbox_regression_per_image, target_regression, reduction="sum") + ) + + # Estimate ground truth for class targets + gt_classes_target = torch.zeros( + (cls_logits_per_image.size(0),), + dtype=targets_per_image["labels"].dtype, + device=targets_per_image["labels"].device, + ) + gt_classes_target[foreground_idxs_per_image] = targets_per_image["labels"][ + foreground_matched_idxs_per_image + ] + cls_targets.append(gt_classes_target) + + bbox_loss = torch.stack(bbox_loss) + cls_targets = torch.stack(cls_targets) + + # Calculate classification loss + num_classes = cls_logits.size(-1) + cls_loss = F.cross_entropy(cls_logits.view(-1, num_classes), cls_targets.view(-1), reduction="none").view( + cls_targets.size() + ) + + # Hard Negative Sampling + foreground_idxs = cls_targets > 0 + num_negative = self.neg_to_pos_ratio * foreground_idxs.sum(1, keepdim=True) + # num_negative[num_negative < self.neg_to_pos_ratio] = self.neg_to_pos_ratio + negative_loss = cls_loss.clone() + negative_loss[foreground_idxs] = -float("inf") # use -inf to detect positive values that creeped in the sample + values, idx = negative_loss.sort(1, descending=True) + # background_idxs = torch.logical_and(idx.sort(1)[1] < num_negative, torch.isfinite(values)) + background_idxs = idx.sort(1)[1] < num_negative + + N = max(1, num_foreground) + return { + "bbox_regression": bbox_loss.sum() / N, + "classification": (cls_loss[foreground_idxs].sum() + cls_loss[background_idxs].sum()) / N, + } + + def forward( + self, images: List[Tensor], targets: Optional[List[Dict[str, Tensor]]] = None + ) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]: + if self.training and targets is None: + raise ValueError("In training mode, targets should be passed") + + if self.training: + assert targets is not None + for target in targets: + boxes = target["boxes"] + if isinstance(boxes, torch.Tensor): + if len(boxes.shape) != 2 or boxes.shape[-1] != 4: + raise ValueError(f"Expected target boxes to be a tensor of shape [N, 4], got {boxes.shape}.") + else: + raise ValueError(f"Expected target boxes to be of type Tensor, got {type(boxes)}.") + + # get the original image sizes + original_image_sizes: List[Tuple[int, int]] = [] + for img in images: + val = img.shape[-2:] + assert len(val) == 2 + original_image_sizes.append((val[0], val[1])) + + # transform the input + images, targets = self.transform(images, targets) + + # Check for degenerate boxes + if targets is not None: + for target_idx, target in enumerate(targets): + boxes = target["boxes"] + degenerate_boxes = boxes[:, 2:] <= boxes[:, :2] + if degenerate_boxes.any(): + bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0] + degen_bb: List[float] = boxes[bb_idx].tolist() + raise ValueError( + "All bounding boxes should have positive height and width." + f" Found invalid box {degen_bb} for target at index {target_idx}." + ) + + # get the features from the backbone + features = self.backbone(images.tensors) + if isinstance(features, torch.Tensor): + features = OrderedDict([("0", features)]) + + features = list(features.values()) + + # compute the ssd heads outputs using the features + head_outputs = self.head(features) + + # create the set of anchors + anchors = self.anchor_generator(images, features) + + losses = {} + detections: List[Dict[str, Tensor]] = [] + if self.training: + assert targets is not None + + matched_idxs = [] + for anchors_per_image, targets_per_image in zip(anchors, targets): + if targets_per_image["boxes"].numel() == 0: + matched_idxs.append( + torch.full((anchors_per_image.size(0),), -1, dtype=torch.int64, device=anchors_per_image.device) + ) + continue + + match_quality_matrix = box_ops.box_iou(targets_per_image["boxes"], anchors_per_image) + matched_idxs.append(self.proposal_matcher(match_quality_matrix)) + + losses = self.compute_loss(targets, head_outputs, anchors, matched_idxs) + else: + detections = self.postprocess_detections(head_outputs, anchors, images.image_sizes) + detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) + + if torch.jit.is_scripting(): + if not self._has_warned: + warnings.warn("SSD always returns a (Losses, Detections) tuple in scripting") + self._has_warned = True + return losses, detections + return self.eager_outputs(losses, detections) + + def postprocess_detections( + self, head_outputs: Dict[str, Tensor], image_anchors: List[Tensor], image_shapes: List[Tuple[int, int]] + ) -> List[Dict[str, Tensor]]: + bbox_regression = head_outputs["bbox_regression"] + pred_scores = F.softmax(head_outputs["cls_logits"], dim=-1) + + num_classes = pred_scores.size(-1) + device = pred_scores.device + + detections: List[Dict[str, Tensor]] = [] + + for boxes, scores, anchors, image_shape in zip(bbox_regression, pred_scores, image_anchors, image_shapes): + boxes = self.box_coder.decode_single(boxes, anchors) + boxes = box_ops.clip_boxes_to_image(boxes, image_shape) + + image_boxes = [] + image_scores = [] + image_labels = [] + for label in range(1, num_classes): + score = scores[:, label] + + keep_idxs = score > self.score_thresh + score = score[keep_idxs] + box = boxes[keep_idxs] + + # keep only topk scoring predictions + num_topk = min(self.topk_candidates, score.size(0)) + score, idxs = score.topk(num_topk) + box = box[idxs] + + image_boxes.append(box) + image_scores.append(score) + image_labels.append(torch.full_like(score, fill_value=label, dtype=torch.int64, device=device)) + + image_boxes = torch.cat(image_boxes, dim=0) + image_scores = torch.cat(image_scores, dim=0) + image_labels = torch.cat(image_labels, dim=0) + + # non-maximum suppression + keep = box_ops.batched_nms(image_boxes, image_scores, image_labels, self.nms_thresh) + keep = keep[: self.detections_per_img] + + detections.append( + { + "boxes": image_boxes[keep], + "scores": image_scores[keep], + "labels": image_labels[keep], + } + ) + return detections + + +class SSDFeatureExtractorVGG(nn.Module): + def __init__(self, backbone: nn.Module, highres: bool): + super().__init__() + + _, _, maxpool3_pos, maxpool4_pos, _ = (i for i, layer in enumerate(backbone) if isinstance(layer, nn.MaxPool2d)) + + # Patch ceil_mode for maxpool3 to get the same WxH output sizes as the paper + backbone[maxpool3_pos].ceil_mode = True + + # parameters used for L2 regularization + rescaling + self.scale_weight = nn.Parameter(torch.ones(512) * 20) + + # Multiple Feature maps - page 4, Fig 2 of SSD paper + self.features = nn.Sequential(*backbone[:maxpool4_pos]) # until conv4_3 + + # SSD300 case - page 4, Fig 2 of SSD paper + extra = nn.ModuleList( + [ + nn.Sequential( + nn.Conv2d(1024, 256, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 512, kernel_size=3, padding=1, stride=2), # conv8_2 + nn.ReLU(inplace=True), + ), + nn.Sequential( + nn.Conv2d(512, 128, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2), # conv9_2 + nn.ReLU(inplace=True), + ), + nn.Sequential( + nn.Conv2d(256, 128, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, 256, kernel_size=3), # conv10_2 + nn.ReLU(inplace=True), + ), + nn.Sequential( + nn.Conv2d(256, 128, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, 256, kernel_size=3), # conv11_2 + nn.ReLU(inplace=True), + ), + ] + ) + if highres: + # Additional layers for the SSD512 case. See page 11, footernote 5. + extra.append( + nn.Sequential( + nn.Conv2d(256, 128, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, 256, kernel_size=4), # conv12_2 + nn.ReLU(inplace=True), + ) + ) + _xavier_init(extra) + + fc = nn.Sequential( + nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=False), # add modified maxpool5 + nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, padding=6, dilation=6), # FC6 with atrous + nn.ReLU(inplace=True), + nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=1), # FC7 + nn.ReLU(inplace=True), + ) + _xavier_init(fc) + extra.insert( + 0, + nn.Sequential( + *backbone[maxpool4_pos:-1], # until conv5_3, skip maxpool5 + fc, + ), + ) + self.extra = extra + + def forward(self, x: Tensor) -> Dict[str, Tensor]: + # L2 regularization + Rescaling of 1st block's feature map + x = self.features(x) + rescaled = self.scale_weight.view(1, -1, 1, 1) * F.normalize(x) + output = [rescaled] + + # Calculating Feature maps for the rest blocks + for block in self.extra: + x = block(x) + output.append(x) + + return OrderedDict([(str(i), v) for i, v in enumerate(output)]) + + +def _vgg_extractor(backbone: vgg.VGG, highres: bool, trainable_layers: int): + backbone = backbone.features + # Gather the indices of maxpools. These are the locations of output blocks. + stage_indices = [0] + [i for i, b in enumerate(backbone) if isinstance(b, nn.MaxPool2d)][:-1] + num_stages = len(stage_indices) + + # find the index of the layer from which we wont freeze + assert 0 <= trainable_layers <= num_stages + freeze_before = len(backbone) if trainable_layers == 0 else stage_indices[num_stages - trainable_layers] + + for b in backbone[:freeze_before]: + for parameter in b.parameters(): + parameter.requires_grad_(False) + + return SSDFeatureExtractorVGG(backbone, highres) + + +def ssd300_vgg16( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 91, + pretrained_backbone: bool = True, + trainable_backbone_layers: Optional[int] = None, + **kwargs: Any, +): + """Constructs an SSD model with input size 300x300 and a VGG16 backbone. + + Reference: `"SSD: Single Shot MultiBox Detector" `_. + + The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each + image, and should be in 0-1 range. Different images can have different sizes but they will be resized + to a fixed size before passing it to the backbone. + + The behavior of the model changes depending if it is in training or evaluation mode. + + During training, the model expects both the input tensors, as well as a targets (list of dictionary), + containing: + + - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with + ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. + - labels (Int64Tensor[N]): the class label for each ground-truth box + + The model returns a Dict[Tensor] during training, containing the classification and regression + losses. + + During inference, the model requires only the input tensors, and returns the post-processed + predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as + follows, where ``N`` is the number of detections: + + - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with + ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. + - labels (Int64Tensor[N]): the predicted labels for each detection + - scores (Tensor[N]): the scores for each detection + + Example: + + >>> model = torchvision.models.detection.ssd300_vgg16(pretrained=True) + >>> model.eval() + >>> x = [torch.rand(3, 300, 300), torch.rand(3, 500, 400)] + >>> predictions = model(x) + + Args: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 + progress (bool): If True, displays a progress bar of the download to stderr + num_classes (int): number of output classes of the model (including the background) + pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet + trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. + Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. + """ + if "size" in kwargs: + warnings.warn("The size of the model is already fixed; ignoring the argument.") + + trainable_backbone_layers = _validate_trainable_layers( + pretrained or pretrained_backbone, trainable_backbone_layers, 5, 4 + ) + + if pretrained: + # no need to download the backbone if pretrained is set + pretrained_backbone = False + + # Use custom backbones more appropriate for SSD + backbone = vgg.vgg16(pretrained=False, progress=progress) + if pretrained_backbone: + state_dict = load_state_dict_from_url(backbone_urls["vgg16_features"], progress=progress) + backbone.load_state_dict(state_dict) + + backbone = _vgg_extractor(backbone, False, trainable_backbone_layers) + anchor_generator = DefaultBoxGenerator( + [[2], [2, 3], [2, 3], [2, 3], [2], [2]], + scales=[0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05], + steps=[8, 16, 32, 64, 100, 300], + ) + + defaults = { + # Rescale the input in a way compatible to the backbone + "image_mean": [0.48235, 0.45882, 0.40784], + "image_std": [1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0], # undo the 0-1 scaling of toTensor + } + kwargs = {**defaults, **kwargs} + model = SSD(backbone, anchor_generator, (300, 300), num_classes, **kwargs) + if pretrained: + weights_name = "ssd300_vgg16_coco" + if model_urls.get(weights_name, None) is None: + raise ValueError(f"No checkpoint is available for model {weights_name}") + state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py new file mode 100644 index 0000000000..652d3afe4d --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py @@ -0,0 +1,274 @@ +import warnings +from collections import OrderedDict +from functools import partial +from typing import Any, Callable, Dict, List, Optional, Union + +import torch +from torch import nn, Tensor + +from ..._internally_replaced_utils import load_state_dict_from_url +from ...ops.misc import ConvNormActivation +from ...utils import _log_api_usage_once +from .. import mobilenet +from . import _utils as det_utils +from .anchor_utils import DefaultBoxGenerator +from .backbone_utils import _validate_trainable_layers +from .ssd import SSD, SSDScoringHead + + +__all__ = ["ssdlite320_mobilenet_v3_large"] + +model_urls = { + "ssdlite320_mobilenet_v3_large_coco": "https://download.pytorch.org/models/ssdlite320_mobilenet_v3_large_coco-a79551df.pth" +} + + +# Building blocks of SSDlite as described in section 6.2 of MobileNetV2 paper +def _prediction_block( + in_channels: int, out_channels: int, kernel_size: int, norm_layer: Callable[..., nn.Module] +) -> nn.Sequential: + return nn.Sequential( + # 3x3 depthwise with stride 1 and padding 1 + ConvNormActivation( + in_channels, + in_channels, + kernel_size=kernel_size, + groups=in_channels, + norm_layer=norm_layer, + activation_layer=nn.ReLU6, + ), + # 1x1 projetion to output channels + nn.Conv2d(in_channels, out_channels, 1), + ) + + +def _extra_block(in_channels: int, out_channels: int, norm_layer: Callable[..., nn.Module]) -> nn.Sequential: + activation = nn.ReLU6 + intermediate_channels = out_channels // 2 + return nn.Sequential( + # 1x1 projection to half output channels + ConvNormActivation( + in_channels, intermediate_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=activation + ), + # 3x3 depthwise with stride 2 and padding 1 + ConvNormActivation( + intermediate_channels, + intermediate_channels, + kernel_size=3, + stride=2, + groups=intermediate_channels, + norm_layer=norm_layer, + activation_layer=activation, + ), + # 1x1 projetion to output channels + ConvNormActivation( + intermediate_channels, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=activation + ), + ) + + +def _normal_init(conv: nn.Module): + for layer in conv.modules(): + if isinstance(layer, nn.Conv2d): + torch.nn.init.normal_(layer.weight, mean=0.0, std=0.03) + if layer.bias is not None: + torch.nn.init.constant_(layer.bias, 0.0) + + +class SSDLiteHead(nn.Module): + def __init__( + self, in_channels: List[int], num_anchors: List[int], num_classes: int, norm_layer: Callable[..., nn.Module] + ): + super().__init__() + self.classification_head = SSDLiteClassificationHead(in_channels, num_anchors, num_classes, norm_layer) + self.regression_head = SSDLiteRegressionHead(in_channels, num_anchors, norm_layer) + + def forward(self, x: List[Tensor]) -> Dict[str, Tensor]: + return { + "bbox_regression": self.regression_head(x), + "cls_logits": self.classification_head(x), + } + + +class SSDLiteClassificationHead(SSDScoringHead): + def __init__( + self, in_channels: List[int], num_anchors: List[int], num_classes: int, norm_layer: Callable[..., nn.Module] + ): + cls_logits = nn.ModuleList() + for channels, anchors in zip(in_channels, num_anchors): + cls_logits.append(_prediction_block(channels, num_classes * anchors, 3, norm_layer)) + _normal_init(cls_logits) + super().__init__(cls_logits, num_classes) + + +class SSDLiteRegressionHead(SSDScoringHead): + def __init__(self, in_channels: List[int], num_anchors: List[int], norm_layer: Callable[..., nn.Module]): + bbox_reg = nn.ModuleList() + for channels, anchors in zip(in_channels, num_anchors): + bbox_reg.append(_prediction_block(channels, 4 * anchors, 3, norm_layer)) + _normal_init(bbox_reg) + super().__init__(bbox_reg, 4) + + +class SSDLiteFeatureExtractorMobileNet(nn.Module): + def __init__( + self, + backbone: nn.Module, + c4_pos: int, + norm_layer: Callable[..., nn.Module], + width_mult: float = 1.0, + min_depth: int = 16, + ): + super().__init__() + _log_api_usage_once(self) + + assert not backbone[c4_pos].use_res_connect + self.features = nn.Sequential( + # As described in section 6.3 of MobileNetV3 paper + nn.Sequential(*backbone[:c4_pos], backbone[c4_pos].block[0]), # from start until C4 expansion layer + nn.Sequential(backbone[c4_pos].block[1:], *backbone[c4_pos + 1 :]), # from C4 depthwise until end + ) + + get_depth = lambda d: max(min_depth, int(d * width_mult)) # noqa: E731 + extra = nn.ModuleList( + [ + _extra_block(backbone[-1].out_channels, get_depth(512), norm_layer), + _extra_block(get_depth(512), get_depth(256), norm_layer), + _extra_block(get_depth(256), get_depth(256), norm_layer), + _extra_block(get_depth(256), get_depth(128), norm_layer), + ] + ) + _normal_init(extra) + + self.extra = extra + + def forward(self, x: Tensor) -> Dict[str, Tensor]: + # Get feature maps from backbone and extra. Can't be refactored due to JIT limitations. + output = [] + for block in self.features: + x = block(x) + output.append(x) + + for block in self.extra: + x = block(x) + output.append(x) + + return OrderedDict([(str(i), v) for i, v in enumerate(output)]) + + +def _mobilenet_extractor( + backbone: Union[mobilenet.MobileNetV2, mobilenet.MobileNetV3], + trainable_layers: int, + norm_layer: Callable[..., nn.Module], +): + backbone = backbone.features + # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. + # The first and last blocks are always included because they are the C0 (conv1) and Cn. + stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1] + num_stages = len(stage_indices) + + # find the index of the layer from which we wont freeze + assert 0 <= trainable_layers <= num_stages + freeze_before = len(backbone) if trainable_layers == 0 else stage_indices[num_stages - trainable_layers] + + for b in backbone[:freeze_before]: + for parameter in b.parameters(): + parameter.requires_grad_(False) + + return SSDLiteFeatureExtractorMobileNet(backbone, stage_indices[-2], norm_layer) + + +def ssdlite320_mobilenet_v3_large( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 91, + pretrained_backbone: bool = False, + trainable_backbone_layers: Optional[int] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None, + **kwargs: Any, +): + """Constructs an SSDlite model with input size 320x320 and a MobileNetV3 Large backbone, as described at + `"Searching for MobileNetV3" + `_ and + `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" + `_. + + See :func:`~torchvision.models.detection.ssd300_vgg16` for more details. + + Example: + + >>> model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(pretrained=True) + >>> model.eval() + >>> x = [torch.rand(3, 320, 320), torch.rand(3, 500, 400)] + >>> predictions = model(x) + + Args: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 + progress (bool): If True, displays a progress bar of the download to stderr + num_classes (int): number of output classes of the model (including the background) + pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet + trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. + Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable. + norm_layer (callable, optional): Module specifying the normalization layer to use. + """ + if "size" in kwargs: + warnings.warn("The size of the model is already fixed; ignoring the argument.") + + trainable_backbone_layers = _validate_trainable_layers( + pretrained or pretrained_backbone, trainable_backbone_layers, 6, 6 + ) + + if pretrained: + pretrained_backbone = False + + # Enable reduced tail if no pretrained backbone is selected. See Table 6 of MobileNetV3 paper. + reduce_tail = not pretrained_backbone + + if norm_layer is None: + norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.03) + + backbone = mobilenet.mobilenet_v3_large( + pretrained=pretrained_backbone, progress=progress, norm_layer=norm_layer, reduced_tail=reduce_tail, **kwargs + ) + if not pretrained_backbone: + # Change the default initialization scheme if not pretrained + _normal_init(backbone) + backbone = _mobilenet_extractor( + backbone, + trainable_backbone_layers, + norm_layer, + ) + + size = (320, 320) + anchor_generator = DefaultBoxGenerator([[2, 3] for _ in range(6)], min_ratio=0.2, max_ratio=0.95) + out_channels = det_utils.retrieve_out_channels(backbone, size) + num_anchors = anchor_generator.num_anchors_per_location() + assert len(out_channels) == len(anchor_generator.aspect_ratios) + + defaults = { + "score_thresh": 0.001, + "nms_thresh": 0.55, + "detections_per_img": 300, + "topk_candidates": 300, + # Rescale the input in a way compatible to the backbone: + # The following mean/std rescale the data from [0, 1] to [-1, -1] + "image_mean": [0.5, 0.5, 0.5], + "image_std": [0.5, 0.5, 0.5], + } + kwargs = {**defaults, **kwargs} + model = SSD( + backbone, + anchor_generator, + size, + num_classes, + head=SSDLiteHead(out_channels, num_anchors, num_classes, norm_layer), + **kwargs, + ) + + if pretrained: + weights_name = "ssdlite320_mobilenet_v3_large_coco" + if model_urls.get(weights_name, None) is None: + raise ValueError(f"No checkpoint is available for model {weights_name}") + state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py new file mode 100644 index 0000000000..af9d13414d --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py @@ -0,0 +1,302 @@ +import math +from typing import List, Tuple, Dict, Optional + +import torch +import torchvision +import pdb +from torch import nn, Tensor + +from .image_list import ImageList +from .roi_heads import paste_masks_in_image + + +@torch.jit.unused +def _get_shape_onnx(image: Tensor) -> Tensor: + from torch.onnx import operators + + return operators.shape_as_tensor(image)[-2:] + + +@torch.jit.unused +def _fake_cast_onnx(v: Tensor) -> float: + # ONNX requires a tensor but here we fake its type for JIT. + return v + + +def _resize_image_and_masks( + image: Tensor, + self_min_size: float, + self_max_size: float, + target: Optional[Dict[str, Tensor]] = None, + fixed_size: Optional[Tuple[int, int]] = None, +) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if torchvision._is_tracing(): + im_shape = _get_shape_onnx(image) + else: + im_shape = torch.tensor(image.shape[-2:]) + + size: Optional[List[int]] = None + scale_factor: Optional[float] = None + recompute_scale_factor: Optional[bool] = None + if fixed_size is not None: + size = [fixed_size[1], fixed_size[0]] + else: + min_size = torch.min(im_shape).to(dtype=torch.float32) + max_size = torch.max(im_shape).to(dtype=torch.float32) + scale = torch.min(self_min_size / min_size, self_max_size / max_size) + + if torchvision._is_tracing(): + scale_factor = _fake_cast_onnx(scale) + else: + scale_factor = scale.item() + recompute_scale_factor = True + + image = torch.nn.functional.interpolate( + image[None], + size=size, + scale_factor=scale_factor, + mode="bilinear", + recompute_scale_factor=recompute_scale_factor, + align_corners=False, + )[0] + + if target is None: + return image, target + + if "masks" in target: + mask = target["masks"] + mask = torch.nn.functional.interpolate( + mask[:, None].float(), size=size, scale_factor=scale_factor, recompute_scale_factor=recompute_scale_factor + )[:, 0].byte() + target["masks"] = mask + return image, target + + +class GeneralizedRCNNTransform(nn.Module): + """ + Performs input / target transformation before feeding the data to a GeneralizedRCNN + model. + + The transformations it perform are: + - input normalization (mean subtraction and std division) + - input / target resizing to match min_size / max_size + + It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets + """ + + def __init__( + self, + min_size: int, + max_size: int, + image_mean: List[float], + image_std: List[float], + size_divisible: int = 32, + fixed_size: Optional[Tuple[int, int]] = None, + ): + super().__init__() + if not isinstance(min_size, (list, tuple)): + min_size = (min_size,) + self.min_size = min_size + self.max_size = max_size + self.image_mean = image_mean + self.image_std = image_std + self.size_divisible = size_divisible + self.fixed_size = fixed_size + + def forward( + self, images: List[Tensor], targets: Optional[List[Dict[str, Tensor]]] = None + ) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]: + images = [img for img in images] + if targets is not None: + # make a copy of targets to avoid modifying it in-place + # once torchscript supports dict comprehension + # this can be simplified as follows + # targets = [{k: v for k,v in t.items()} for t in targets] + targets_copy: List[Dict[str, Tensor]] = [] + for t in targets: + data: Dict[str, Tensor] = {} + for k, v in t.items(): + data[k] = v + targets_copy.append(data) + targets = targets_copy + for i in range(len(images)): + image = images[i] + target_index = targets[i] if targets is not None else None + + if image.dim() != 3: + raise ValueError(f"images is expected to be a list of 3d tensors of shape [C, H, W], got {image.shape}") + image = self.normalize(image) + image, target_index = self.resize(image, target_index) + images[i] = image + if targets is not None and target_index is not None: + targets[i] = target_index + image_sizes = [img.shape[-2:] for img in images] + images = self.batch_images(images, size_divisible=self.size_divisible) + image_sizes_list: List[Tuple[int, int]] = [] + for image_size in image_sizes: + assert len(image_size) == 2 + image_sizes_list.append((image_size[0], image_size[1])) + + image_list = ImageList(images, image_sizes_list) + return image_list, targets + + def normalize(self, image: Tensor) -> Tensor: + if not image.is_floating_point(): + raise TypeError( + f"Expected input images to be of floating type (in range [0, 1]), " + f"but found type {image.dtype} instead" + ) + dtype, device = image.dtype, image.device + mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device) + std = torch.as_tensor(self.image_std, dtype=dtype, device=device) + return (image - mean[:, None, None]) / std[:, None, None] + + def torch_choice(self, k: List[int]) -> int: + """ + Implements `random.choice` via torch ops so it can be compiled with + TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803 + is fixed. + """ + index = int(torch.empty(1).uniform_(0.0, float(len(k))).item()) + return k[index] + + def resize( + self, + image: Tensor, + target: Optional[Dict[str, Tensor]] = None, + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + h, w = image.shape[-2:] + if self.training: + size = float(self.torch_choice(self.min_size)) + else: + # FIXME assume for now that testing uses the largest scale + size = float(self.min_size[-1]) + image, target = _resize_image_and_masks(image, size, float(self.max_size), target, self.fixed_size) + + if target is None: + return image, target + + bbox = target["boxes"] + bbox = resize_boxes(bbox, (h, w), image.shape[-2:]) + target["boxes"] = bbox + + if "keypoints" in target: + keypoints = target["keypoints"] + keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:]) + target["keypoints"] = keypoints + return image, target + + # _onnx_batch_images() is an implementation of + # batch_images() that is supported by ONNX tracing. + @torch.jit.unused + def _onnx_batch_images(self, images: List[Tensor], size_divisible: int = 32) -> Tensor: + max_size = [] + for i in range(images[0].dim()): + max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64) + max_size.append(max_size_i) + stride = size_divisible + max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64) + max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64) + max_size = tuple(max_size) + + # work around for + # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + # which is not yet supported in onnx + padded_imgs = [] + for img in images: + padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] + padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0])) + padded_imgs.append(padded_img) + + return torch.stack(padded_imgs) + + def max_by_axis(self, the_list: List[List[int]]) -> List[int]: + maxes = the_list[0] + for sublist in the_list[1:]: + for index, item in enumerate(sublist): + maxes[index] = max(maxes[index], item) + return maxes + + def batch_images(self, images: List[Tensor], size_divisible: int = 32) -> Tensor: + if torchvision._is_tracing(): + # batch_images() does not export well to ONNX + # call _onnx_batch_images() instead + return self._onnx_batch_images(images, size_divisible) + + max_size = self.max_by_axis([list(img.shape) for img in images]) + stride = float(size_divisible) + max_size = list(max_size) + max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride) + max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride) + + batch_shape = [len(images)] + max_size + batched_imgs = images[0].new_full(batch_shape, 0) + for i in range(batched_imgs.shape[0]): + img = images[i] + batched_imgs[i, : img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + + return batched_imgs + + def postprocess( + self, + result: List[Dict[str, Tensor]], + image_shapes: List[Tuple[int, int]], + original_image_sizes: List[Tuple[int, int]], + ) -> List[Dict[str, Tensor]]: + if self.training: + return result + for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)): + boxes = pred["boxes"] + boxes = resize_boxes(boxes, im_s, o_im_s) + result[i]["boxes"] = boxes + if "masks" in pred: + masks = pred["masks"] + masks = paste_masks_in_image(masks, boxes, o_im_s) + result[i]["masks"] = masks + if "keypoints" in pred: + keypoints = pred["keypoints"] + keypoints = resize_keypoints(keypoints, im_s, o_im_s) + result[i]["keypoints"] = keypoints + return result + + def __repr__(self) -> str: + format_string = self.__class__.__name__ + "(" + _indent = "\n " + format_string += f"{_indent}Normalize(mean={self.image_mean}, std={self.image_std})" + format_string += f"{_indent}Resize(min_size={self.min_size}, max_size={self.max_size}, mode='bilinear')" + format_string += "\n)" + return format_string + + +def resize_keypoints(keypoints: Tensor, original_size: List[int], new_size: List[int]) -> Tensor: + ratios = [ + torch.tensor(s, dtype=torch.float32, device=keypoints.device) + / torch.tensor(s_orig, dtype=torch.float32, device=keypoints.device) + for s, s_orig in zip(new_size, original_size) + ] + ratio_h, ratio_w = ratios + resized_data = keypoints.clone() + if torch._C._get_tracing_state(): + resized_data_0 = resized_data[:, :, 0] * ratio_w + resized_data_1 = resized_data[:, :, 1] * ratio_h + resized_data = torch.stack((resized_data_0, resized_data_1, resized_data[:, :, 2]), dim=2) + else: + resized_data[..., 0] *= ratio_w + resized_data[..., 1] *= ratio_h + return resized_data + + +def resize_boxes(boxes: Tensor, original_size: List[int], new_size: List[int]) -> Tensor: + ratios = [ + torch.tensor(s, dtype=torch.float32, device=boxes.device) + / torch.tensor(s_orig, dtype=torch.float32, device=boxes.device) + for s, s_orig in zip(new_size, original_size) + ] + ratio_height, ratio_width = ratios + xmin, ymin, xmax, ymax = boxes.unbind(1) + + xmin = xmin * ratio_width + xmax = xmax * ratio_width + ymin = ymin * ratio_height + ymax = ymax * ratio_height + return torch.stack((xmin, ymin, xmax, ymax), dim=1) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py new file mode 100644 index 0000000000..ffbe2279b7 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py @@ -0,0 +1,226 @@ +import random +import math +import torch +from torch import nn, Tensor +import torchvision +from torch.jit.annotations import List, Tuple, Dict, Optional + +from torchvision.ops import misc as misc_nn_ops +from .image_list import ImageList +from .roi_heads import paste_masks_in_image + + +class GeneralizedRCNNTransform(nn.Module): + """ + Performs input / target transformation before feeding the data to a GeneralizedRCNN + model. + + The transformations it perform are: + - input normalization (mean subtraction and std division) + - input / target resizing to match min_size / max_size + + It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets + """ + + def __init__(self, min_size, max_size, image_mean, image_std): + super(GeneralizedRCNNTransform, self).__init__() + if not isinstance(min_size, (list, tuple)): + min_size = (min_size,) + self.min_size = min_size + self.max_size = max_size + self.image_mean = image_mean + self.image_std = image_std + + def forward(self, images, targets=None): + # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) + images = [img for img in images] + for i in range(len(images)): + image = images[i] + target_index = targets[i] if targets is not None else None + + if image.dim() != 3: + raise ValueError("images is expected to be a list of 3d tensors " + "of shape [C, H, W], got {}".format(image.shape)) + image = self.normalize(image) + image, target_index = self.resize(image, target_index) + images[i] = image + if targets is not None and target_index is not None: + targets[i] = target_index + + image_sizes = [img.shape[-2:] for img in images] + images = self.batch_images(images) + image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], []) + for image_size in image_sizes: + assert len(image_size) == 2 + image_sizes_list.append((image_size[0], image_size[1])) + + image_list = ImageList(images, image_sizes_list) + return image_list, targets + + def normalize(self, image): + dtype, device = image.dtype, image.device + mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device) + std = torch.as_tensor(self.image_std, dtype=dtype, device=device) + return (image - mean[:, None, None]) / std[:, None, None] + + def torch_choice(self, l): + # type: (List[int]) + """ + Implements `random.choice` via torch ops so it can be compiled with + TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803 + is fixed. + """ + index = int(torch.empty(1).uniform_(0., float(len(l))).item()) + return l[index] + + def resize(self, image, target): + # type: (Tensor, Optional[Dict[str, Tensor]]) + h, w = image.shape[-2:] + im_shape = torch.tensor(image.shape[-2:]) + min_size = float(torch.min(im_shape)) + max_size = float(torch.max(im_shape)) + if self.training: + size = float(self.torch_choice(self.min_size)) + else: + # FIXME assume for now that testing uses the largest scale + size = float(self.min_size[-1]) + scale_factor = size / min_size + if max_size * scale_factor > self.max_size: + scale_factor = self.max_size / max_size + image = torch.nn.functional.interpolate( + image[None], scale_factor=scale_factor, mode='bilinear', + align_corners=False)[0] + + if target is None: + return image, target + + bbox = target["boxes"] + bbox = resize_boxes(bbox, (h, w), image.shape[-2:]) + target["boxes"] = bbox + + if "masks" in target: + mask = target["masks"] + mask = misc_nn_ops.interpolate(mask[None].float(), scale_factor=scale_factor)[0].byte() + target["masks"] = mask + + if "keypoints" in target: + keypoints = target["keypoints"] + keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:]) + target["keypoints"] = keypoints + return image, target + + # _onnx_batch_images() is an implementation of + # batch_images() that is supported by ONNX tracing. + @torch.jit.unused + def _onnx_batch_images(self, images, size_divisible=32): + # type: (List[Tensor], int) -> Tensor + max_size = [] + for i in range(images[0].dim()): + max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64) + max_size.append(max_size_i) + stride = size_divisible + max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64) + max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64) + max_size = tuple(max_size) + + # work around for + # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + # which is not yet supported in onnx + padded_imgs = [] + for img in images: + padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] + padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0])) + padded_imgs.append(padded_img) + + return torch.stack(padded_imgs) + + def max_by_axis(self, the_list): + # type: (List[List[int]]) -> List[int] + maxes = the_list[0] + for sublist in the_list[1:]: + for index, item in enumerate(sublist): + maxes[index] = max(maxes[index], item) + return maxes + + def batch_images(self, images, size_divisible=32): + # type: (List[Tensor], int) + if torchvision._is_tracing(): + # batch_images() does not export well to ONNX + # call _onnx_batch_images() instead + return self._onnx_batch_images(images, size_divisible) + + max_size = self.max_by_axis([list(img.shape) for img in images]) + stride = float(size_divisible) + max_size = list(max_size) + max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride) + max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride) + + batch_shape = [len(images)] + max_size + batched_imgs = images[0].new_full(batch_shape, 0) + for img, pad_img in zip(images, batched_imgs): + pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + + return batched_imgs + + def postprocess(self, result, image_shapes, original_image_sizes): + # type: (List[Dict[str, Tensor]], List[Tuple[int, int]], List[Tuple[int, int]]) + if self.training: + return result + for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)): + boxes = pred["boxes"] + boxes = resize_boxes(boxes, im_s, o_im_s) + result[i]["boxes"] = boxes + if "masks" in pred: + masks = pred["masks"] + masks = paste_masks_in_image(masks, boxes, o_im_s) + result[i]["masks"] = masks + if "keypoints" in pred: + keypoints = pred["keypoints"] + keypoints = resize_keypoints(keypoints, im_s, o_im_s) + result[i]["keypoints"] = keypoints + return result + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + _indent = '\n ' + format_string += "{0}Normalize(mean={1}, std={2})".format(_indent, self.image_mean, self.image_std) + format_string += "{0}Resize(min_size={1}, max_size={2}, mode='bilinear')".format(_indent, self.min_size, + self.max_size) + format_string += '\n)' + return format_string + + +def resize_keypoints(keypoints, original_size, new_size): + # type: (Tensor, List[int], List[int]) + ratios = [ + torch.tensor(s, dtype=torch.float32, device=keypoints.device) / + torch.tensor(s_orig, dtype=torch.float32, device=keypoints.device) + for s, s_orig in zip(new_size, original_size) + ] + ratio_h, ratio_w = ratios + resized_data = keypoints.clone() + if torch._C._get_tracing_state(): + resized_data_0 = resized_data[:, :, 0] * ratio_w + resized_data_1 = resized_data[:, :, 1] * ratio_h + resized_data = torch.stack((resized_data_0, resized_data_1, resized_data[:, :, 2]), dim=2) + else: + resized_data[..., 0] *= ratio_w + resized_data[..., 1] *= ratio_h + return resized_data + + +def resize_boxes(boxes, original_size, new_size): + # type: (Tensor, List[int], List[int]) + ratios = [ + torch.tensor(s, dtype=torch.float32, device=boxes.device) / + torch.tensor(s_orig, dtype=torch.float32, device=boxes.device) + for s, s_orig in zip(new_size, original_size) + ] + ratio_height, ratio_width = ratios + xmin, ymin, xmax, ymax = boxes.unbind(1) + + xmin = xmin * ratio_width + xmax = xmax * ratio_width + ymin = ymin * ratio_height + ymax = ymax * ratio_height + return torch.stack((xmin, ymin, xmax, ymax), dim=1) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py new file mode 100644 index 0000000000..4b1cb28003 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py @@ -0,0 +1,290 @@ +import warnings +from collections import namedtuple +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.jit.annotations import Optional, Tuple +from torch import Tensor +from .utils import load_state_dict_from_url + +__all__ = ['GoogLeNet', 'googlenet', "GoogLeNetOutputs", "_GoogLeNetOutputs"] + +model_urls = { + # GoogLeNet ported from TensorFlow + 'googlenet': 'https://download.pytorch.org/models/googlenet-1378be20.pth', +} + +GoogLeNetOutputs = namedtuple('GoogLeNetOutputs', ['logits', 'aux_logits2', 'aux_logits1']) +GoogLeNetOutputs.__annotations__ = {'logits': Tensor, 'aux_logits2': Optional[Tensor], + 'aux_logits1': Optional[Tensor]} + +# Script annotations failed with _GoogleNetOutputs = namedtuple ... +# _GoogLeNetOutputs set here for backwards compat +_GoogLeNetOutputs = GoogLeNetOutputs + + +def googlenet(pretrained=False, progress=True, **kwargs): + r"""GoogLeNet (Inception v1) model architecture from + `"Going Deeper with Convolutions" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + aux_logits (bool): If True, adds two auxiliary branches that can improve training. + Default: *False* when pretrained is True otherwise *True* + transform_input (bool): If True, preprocesses the input according to the method with which it + was trained on ImageNet. Default: *False* + """ + if pretrained: + if 'transform_input' not in kwargs: + kwargs['transform_input'] = True + if 'aux_logits' not in kwargs: + kwargs['aux_logits'] = False + if kwargs['aux_logits']: + warnings.warn('auxiliary heads in the pretrained googlenet model are NOT pretrained, ' + 'so make sure to train them') + original_aux_logits = kwargs['aux_logits'] + kwargs['aux_logits'] = True + kwargs['init_weights'] = False + model = GoogLeNet(**kwargs) + state_dict = load_state_dict_from_url(model_urls['googlenet'], + progress=progress) + model.load_state_dict(state_dict) + if not original_aux_logits: + model.aux_logits = False + model.aux1 = None + model.aux2 = None + return model + + return GoogLeNet(**kwargs) + + +class GoogLeNet(nn.Module): + __constants__ = ['aux_logits', 'transform_input'] + + def __init__(self, num_classes=1000, aux_logits=True, transform_input=False, init_weights=True, + blocks=None): + super(GoogLeNet, self).__init__() + if blocks is None: + blocks = [BasicConv2d, Inception, InceptionAux] + assert len(blocks) == 3 + conv_block = blocks[0] + inception_block = blocks[1] + inception_aux_block = blocks[2] + + self.aux_logits = aux_logits + self.transform_input = transform_input + + self.conv1 = conv_block(3, 64, kernel_size=7, stride=2, padding=3) + self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True) + self.conv2 = conv_block(64, 64, kernel_size=1) + self.conv3 = conv_block(64, 192, kernel_size=3, padding=1) + self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True) + + self.inception3a = inception_block(192, 64, 96, 128, 16, 32, 32) + self.inception3b = inception_block(256, 128, 128, 192, 32, 96, 64) + self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True) + + self.inception4a = inception_block(480, 192, 96, 208, 16, 48, 64) + self.inception4b = inception_block(512, 160, 112, 224, 24, 64, 64) + self.inception4c = inception_block(512, 128, 128, 256, 24, 64, 64) + self.inception4d = inception_block(512, 112, 144, 288, 32, 64, 64) + self.inception4e = inception_block(528, 256, 160, 320, 32, 128, 128) + self.maxpool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True) + + self.inception5a = inception_block(832, 256, 160, 320, 32, 128, 128) + self.inception5b = inception_block(832, 384, 192, 384, 48, 128, 128) + + if aux_logits: + self.aux1 = inception_aux_block(512, num_classes) + self.aux2 = inception_aux_block(528, num_classes) + else: + self.aux1 = None + self.aux2 = None + + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.dropout = nn.Dropout(0.2) + self.fc = nn.Linear(1024, num_classes) + + if init_weights: + self._initialize_weights() + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): + import scipy.stats as stats + X = stats.truncnorm(-2, 2, scale=0.01) + values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype) + values = values.view(m.weight.size()) + with torch.no_grad(): + m.weight.copy_(values) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _transform_input(self, x): + # type: (Tensor) -> Tensor + if self.transform_input: + x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 + x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 + x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 + x = torch.cat((x_ch0, x_ch1, x_ch2), 1) + return x + + def _forward(self, x): + # type: (Tensor) -> Tuple[Tensor, Optional[Tensor], Optional[Tensor]] + # N x 3 x 224 x 224 + x = self.conv1(x) + # N x 64 x 112 x 112 + x = self.maxpool1(x) + # N x 64 x 56 x 56 + x = self.conv2(x) + # N x 64 x 56 x 56 + x = self.conv3(x) + # N x 192 x 56 x 56 + x = self.maxpool2(x) + + # N x 192 x 28 x 28 + x = self.inception3a(x) + # N x 256 x 28 x 28 + x = self.inception3b(x) + # N x 480 x 28 x 28 + x = self.maxpool3(x) + # N x 480 x 14 x 14 + x = self.inception4a(x) + # N x 512 x 14 x 14 + aux1 = torch.jit.annotate(Optional[Tensor], None) + if self.aux1 is not None: + if self.training: + aux1 = self.aux1(x) + + x = self.inception4b(x) + # N x 512 x 14 x 14 + x = self.inception4c(x) + # N x 512 x 14 x 14 + x = self.inception4d(x) + # N x 528 x 14 x 14 + aux2 = torch.jit.annotate(Optional[Tensor], None) + if self.aux2 is not None: + if self.training: + aux2 = self.aux2(x) + + x = self.inception4e(x) + # N x 832 x 14 x 14 + x = self.maxpool4(x) + # N x 832 x 7 x 7 + x = self.inception5a(x) + # N x 832 x 7 x 7 + x = self.inception5b(x) + # N x 1024 x 7 x 7 + + x = self.avgpool(x) + # N x 1024 x 1 x 1 + x = torch.flatten(x, 1) + # N x 1024 + x = self.dropout(x) + x = self.fc(x) + # N x 1000 (num_classes) + return x, aux2, aux1 + + @torch.jit.unused + def eager_outputs(self, x, aux2, aux1): + # type: (Tensor, Optional[Tensor], Optional[Tensor]) -> GoogLeNetOutputs + if self.training and self.aux_logits: + return _GoogLeNetOutputs(x, aux2, aux1) + else: + return x + + def forward(self, x): + # type: (Tensor) -> GoogLeNetOutputs + x = self._transform_input(x) + x, aux1, aux2 = self._forward(x) + aux_defined = self.training and self.aux_logits + if torch.jit.is_scripting(): + if not aux_defined: + warnings.warn("Scripted GoogleNet always returns GoogleNetOutputs Tuple") + return GoogLeNetOutputs(x, aux2, aux1) + else: + return self.eager_outputs(x, aux2, aux1) + + +class Inception(nn.Module): + + def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj, + conv_block=None): + super(Inception, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch1 = conv_block(in_channels, ch1x1, kernel_size=1) + + self.branch2 = nn.Sequential( + conv_block(in_channels, ch3x3red, kernel_size=1), + conv_block(ch3x3red, ch3x3, kernel_size=3, padding=1) + ) + + self.branch3 = nn.Sequential( + conv_block(in_channels, ch5x5red, kernel_size=1), + # Here, kernel_size=3 instead of kernel_size=5 is a known bug. + # Please see https://github.com/pytorch/vision/issues/906 for details. + conv_block(ch5x5red, ch5x5, kernel_size=3, padding=1) + ) + + self.branch4 = nn.Sequential( + nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True), + conv_block(in_channels, pool_proj, kernel_size=1) + ) + + def _forward(self, x): + branch1 = self.branch1(x) + branch2 = self.branch2(x) + branch3 = self.branch3(x) + branch4 = self.branch4(x) + + outputs = [branch1, branch2, branch3, branch4] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionAux(nn.Module): + + def __init__(self, in_channels, num_classes, conv_block=None): + super(InceptionAux, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.conv = conv_block(in_channels, 128, kernel_size=1) + + self.fc1 = nn.Linear(2048, 1024) + self.fc2 = nn.Linear(1024, num_classes) + + def forward(self, x): + # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14 + x = F.adaptive_avg_pool2d(x, (4, 4)) + # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4 + x = self.conv(x) + # N x 128 x 4 x 4 + x = torch.flatten(x, 1) + # N x 2048 + x = F.relu(self.fc1(x), inplace=True) + # N x 1024 + x = F.dropout(x, 0.7, training=self.training) + # N x 1024 + x = self.fc2(x) + # N x 1000 (num_classes) + + return x + + +class BasicConv2d(nn.Module): + + def __init__(self, in_channels, out_channels, **kwargs): + super(BasicConv2d, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs) + self.bn = nn.BatchNorm2d(out_channels, eps=0.001) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return F.relu(x, inplace=True) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py new file mode 100644 index 0000000000..e4c5430c31 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py @@ -0,0 +1,432 @@ +from collections import namedtuple +import warnings +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.jit.annotations import Optional +from torch import Tensor +from .utils import load_state_dict_from_url + + +__all__ = ['Inception3', 'inception_v3', 'InceptionOutputs', '_InceptionOutputs'] + + +model_urls = { + # Inception v3 ported from TensorFlow + 'inception_v3_google': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth', +} + +InceptionOutputs = namedtuple('InceptionOutputs', ['logits', 'aux_logits']) +InceptionOutputs.__annotations__ = {'logits': torch.Tensor, 'aux_logits': Optional[torch.Tensor]} + +# Script annotations failed with _GoogleNetOutputs = namedtuple ... +# _InceptionOutputs set here for backwards compat +_InceptionOutputs = InceptionOutputs + + +def inception_v3(pretrained=False, progress=True, **kwargs): + r"""Inception v3 model architecture from + `"Rethinking the Inception Architecture for Computer Vision" `_. + + .. note:: + **Important**: In contrast to the other models the inception_v3 expects tensors with a size of + N x 3 x 299 x 299, so ensure your images are sized accordingly. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + aux_logits (bool): If True, add an auxiliary branch that can improve training. + Default: *True* + transform_input (bool): If True, preprocesses the input according to the method with which it + was trained on ImageNet. Default: *False* + """ + if pretrained: + if 'transform_input' not in kwargs: + kwargs['transform_input'] = True + if 'aux_logits' in kwargs: + original_aux_logits = kwargs['aux_logits'] + kwargs['aux_logits'] = True + else: + original_aux_logits = True + model = Inception3(**kwargs) + state_dict = load_state_dict_from_url(model_urls['inception_v3_google'], + progress=progress) + model.load_state_dict(state_dict) + if not original_aux_logits: + model.aux_logits = False + del model.AuxLogits + return model + + return Inception3(**kwargs) + + +class Inception3(nn.Module): + + def __init__(self, num_classes=1000, aux_logits=True, transform_input=False, + inception_blocks=None, init_weights=True): + super(Inception3, self).__init__() + if inception_blocks is None: + inception_blocks = [ + BasicConv2d, InceptionA, InceptionB, InceptionC, + InceptionD, InceptionE, InceptionAux + ] + assert len(inception_blocks) == 7 + conv_block = inception_blocks[0] + inception_a = inception_blocks[1] + inception_b = inception_blocks[2] + inception_c = inception_blocks[3] + inception_d = inception_blocks[4] + inception_e = inception_blocks[5] + inception_aux = inception_blocks[6] + + self.aux_logits = aux_logits + self.transform_input = transform_input + self.Conv2d_1a_3x3 = conv_block(3, 32, kernel_size=3, stride=2) + self.Conv2d_2a_3x3 = conv_block(32, 32, kernel_size=3) + self.Conv2d_2b_3x3 = conv_block(32, 64, kernel_size=3, padding=1) + self.Conv2d_3b_1x1 = conv_block(64, 80, kernel_size=1) + self.Conv2d_4a_3x3 = conv_block(80, 192, kernel_size=3) + self.Mixed_5b = inception_a(192, pool_features=32) + self.Mixed_5c = inception_a(256, pool_features=64) + self.Mixed_5d = inception_a(288, pool_features=64) + self.Mixed_6a = inception_b(288) + self.Mixed_6b = inception_c(768, channels_7x7=128) + self.Mixed_6c = inception_c(768, channels_7x7=160) + self.Mixed_6d = inception_c(768, channels_7x7=160) + self.Mixed_6e = inception_c(768, channels_7x7=192) + if aux_logits: + self.AuxLogits = inception_aux(768, num_classes) + self.Mixed_7a = inception_d(768) + self.Mixed_7b = inception_e(1280) + self.Mixed_7c = inception_e(2048) + self.fc = nn.Linear(2048, num_classes) + if init_weights: + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): + import scipy.stats as stats + stddev = m.stddev if hasattr(m, 'stddev') else 0.1 + X = stats.truncnorm(-2, 2, scale=stddev) + values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype) + values = values.view(m.weight.size()) + with torch.no_grad(): + m.weight.copy_(values) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _transform_input(self, x): + if self.transform_input: + x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 + x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 + x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 + x = torch.cat((x_ch0, x_ch1, x_ch2), 1) + return x + + def _forward(self, x): + # N x 3 x 299 x 299 + x = self.Conv2d_1a_3x3(x) + # N x 32 x 149 x 149 + x = self.Conv2d_2a_3x3(x) + # N x 32 x 147 x 147 + x = self.Conv2d_2b_3x3(x) + # N x 64 x 147 x 147 + x = F.max_pool2d(x, kernel_size=3, stride=2) + # N x 64 x 73 x 73 + x = self.Conv2d_3b_1x1(x) + # N x 80 x 73 x 73 + x = self.Conv2d_4a_3x3(x) + # N x 192 x 71 x 71 + x = F.max_pool2d(x, kernel_size=3, stride=2) + # N x 192 x 35 x 35 + x = self.Mixed_5b(x) + # N x 256 x 35 x 35 + x = self.Mixed_5c(x) + # N x 288 x 35 x 35 + x = self.Mixed_5d(x) + # N x 288 x 35 x 35 + x = self.Mixed_6a(x) + # N x 768 x 17 x 17 + x = self.Mixed_6b(x) + # N x 768 x 17 x 17 + x = self.Mixed_6c(x) + # N x 768 x 17 x 17 + x = self.Mixed_6d(x) + # N x 768 x 17 x 17 + x = self.Mixed_6e(x) + # N x 768 x 17 x 17 + aux_defined = self.training and self.aux_logits + if aux_defined: + aux = self.AuxLogits(x) + else: + aux = None + # N x 768 x 17 x 17 + x = self.Mixed_7a(x) + # N x 1280 x 8 x 8 + x = self.Mixed_7b(x) + # N x 2048 x 8 x 8 + x = self.Mixed_7c(x) + # N x 2048 x 8 x 8 + # Adaptive average pooling + x = F.adaptive_avg_pool2d(x, (1, 1)) + # N x 2048 x 1 x 1 + x = F.dropout(x, training=self.training) + # N x 2048 x 1 x 1 + x = torch.flatten(x, 1) + # N x 2048 + x = self.fc(x) + # N x 1000 (num_classes) + return x, aux + + @torch.jit.unused + def eager_outputs(self, x, aux): + # type: (Tensor, Optional[Tensor]) -> InceptionOutputs + if self.training and self.aux_logits: + return InceptionOutputs(x, aux) + else: + return x + + def forward(self, x): + x = self._transform_input(x) + x, aux = self._forward(x) + aux_defined = self.training and self.aux_logits + if torch.jit.is_scripting(): + if not aux_defined: + warnings.warn("Scripted Inception3 always returns Inception3 Tuple") + return InceptionOutputs(x, aux) + else: + return self.eager_outputs(x, aux) + + +class InceptionA(nn.Module): + + def __init__(self, in_channels, pool_features, conv_block=None): + super(InceptionA, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch1x1 = conv_block(in_channels, 64, kernel_size=1) + + self.branch5x5_1 = conv_block(in_channels, 48, kernel_size=1) + self.branch5x5_2 = conv_block(48, 64, kernel_size=5, padding=2) + + self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, padding=1) + + self.branch_pool = conv_block(in_channels, pool_features, kernel_size=1) + + def _forward(self, x): + branch1x1 = self.branch1x1(x) + + branch5x5 = self.branch5x5_1(x) + branch5x5 = self.branch5x5_2(branch5x5) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionB(nn.Module): + + def __init__(self, in_channels, conv_block=None): + super(InceptionB, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch3x3 = conv_block(in_channels, 384, kernel_size=3, stride=2) + + self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, stride=2) + + def _forward(self, x): + branch3x3 = self.branch3x3(x) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + + branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) + + outputs = [branch3x3, branch3x3dbl, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionC(nn.Module): + + def __init__(self, in_channels, channels_7x7, conv_block=None): + super(InceptionC, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch1x1 = conv_block(in_channels, 192, kernel_size=1) + + c7 = channels_7x7 + self.branch7x7_1 = conv_block(in_channels, c7, kernel_size=1) + self.branch7x7_2 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7_3 = conv_block(c7, 192, kernel_size=(7, 1), padding=(3, 0)) + + self.branch7x7dbl_1 = conv_block(in_channels, c7, kernel_size=1) + self.branch7x7dbl_2 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_3 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7dbl_4 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_5 = conv_block(c7, 192, kernel_size=(1, 7), padding=(0, 3)) + + self.branch_pool = conv_block(in_channels, 192, kernel_size=1) + + def _forward(self, x): + branch1x1 = self.branch1x1(x) + + branch7x7 = self.branch7x7_1(x) + branch7x7 = self.branch7x7_2(branch7x7) + branch7x7 = self.branch7x7_3(branch7x7) + + branch7x7dbl = self.branch7x7dbl_1(x) + branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionD(nn.Module): + + def __init__(self, in_channels, conv_block=None): + super(InceptionD, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch3x3_1 = conv_block(in_channels, 192, kernel_size=1) + self.branch3x3_2 = conv_block(192, 320, kernel_size=3, stride=2) + + self.branch7x7x3_1 = conv_block(in_channels, 192, kernel_size=1) + self.branch7x7x3_2 = conv_block(192, 192, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7x3_3 = conv_block(192, 192, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7x3_4 = conv_block(192, 192, kernel_size=3, stride=2) + + def _forward(self, x): + branch3x3 = self.branch3x3_1(x) + branch3x3 = self.branch3x3_2(branch3x3) + + branch7x7x3 = self.branch7x7x3_1(x) + branch7x7x3 = self.branch7x7x3_2(branch7x7x3) + branch7x7x3 = self.branch7x7x3_3(branch7x7x3) + branch7x7x3 = self.branch7x7x3_4(branch7x7x3) + + branch_pool = F.max_pool2d(x, kernel_size=3, stride=2) + outputs = [branch3x3, branch7x7x3, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionE(nn.Module): + + def __init__(self, in_channels, conv_block=None): + super(InceptionE, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.branch1x1 = conv_block(in_channels, 320, kernel_size=1) + + self.branch3x3_1 = conv_block(in_channels, 384, kernel_size=1) + self.branch3x3_2a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3_2b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0)) + + self.branch3x3dbl_1 = conv_block(in_channels, 448, kernel_size=1) + self.branch3x3dbl_2 = conv_block(448, 384, kernel_size=3, padding=1) + self.branch3x3dbl_3a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3dbl_3b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0)) + + self.branch_pool = conv_block(in_channels, 192, kernel_size=1) + + def _forward(self, x): + branch1x1 = self.branch1x1(x) + + branch3x3 = self.branch3x3_1(x) + branch3x3 = [ + self.branch3x3_2a(branch3x3), + self.branch3x3_2b(branch3x3), + ] + branch3x3 = torch.cat(branch3x3, 1) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = [ + self.branch3x3dbl_3a(branch3x3dbl), + self.branch3x3dbl_3b(branch3x3dbl), + ] + branch3x3dbl = torch.cat(branch3x3dbl, 1) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return torch.cat(outputs, 1) + + +class InceptionAux(nn.Module): + + def __init__(self, in_channels, num_classes, conv_block=None): + super(InceptionAux, self).__init__() + if conv_block is None: + conv_block = BasicConv2d + self.conv0 = conv_block(in_channels, 128, kernel_size=1) + self.conv1 = conv_block(128, 768, kernel_size=5) + self.conv1.stddev = 0.01 + self.fc = nn.Linear(768, num_classes) + self.fc.stddev = 0.001 + + def forward(self, x): + # N x 768 x 17 x 17 + x = F.avg_pool2d(x, kernel_size=5, stride=3) + # N x 768 x 5 x 5 + x = self.conv0(x) + # N x 128 x 5 x 5 + x = self.conv1(x) + # N x 768 x 1 x 1 + # Adaptive average pooling + x = F.adaptive_avg_pool2d(x, (1, 1)) + # N x 768 x 1 x 1 + x = torch.flatten(x, 1) + # N x 768 + x = self.fc(x) + # N x 1000 + return x + + +class BasicConv2d(nn.Module): + + def __init__(self, in_channels, out_channels, **kwargs): + super(BasicConv2d, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs) + self.bn = nn.BatchNorm2d(out_channels, eps=0.001) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return F.relu(x, inplace=True) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py new file mode 100644 index 0000000000..59677427f1 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py @@ -0,0 +1,258 @@ +import math +import warnings + +import torch +import torch.nn as nn +from .utils import load_state_dict_from_url + +__all__ = ['MNASNet', 'mnasnet0_5', 'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3'] + +_MODEL_URLS = { + "mnasnet0_5": + "https://download.pytorch.org/models/mnasnet0.5_top1_67.823-3ffadce67e.pth", + "mnasnet0_75": None, + "mnasnet1_0": + "https://download.pytorch.org/models/mnasnet1.0_top1_73.512-f206786ef8.pth", + "mnasnet1_3": None +} + +# Paper suggests 0.9997 momentum, for TensorFlow. Equivalent PyTorch momentum is +# 1.0 - tensorflow. +_BN_MOMENTUM = 1 - 0.9997 + + +class _InvertedResidual(nn.Module): + + def __init__(self, in_ch, out_ch, kernel_size, stride, expansion_factor, + bn_momentum=0.1): + super(_InvertedResidual, self).__init__() + assert stride in [1, 2] + assert kernel_size in [3, 5] + mid_ch = in_ch * expansion_factor + self.apply_residual = (in_ch == out_ch and stride == 1) + self.layers = nn.Sequential( + # Pointwise + nn.Conv2d(in_ch, mid_ch, 1, bias=False), + nn.BatchNorm2d(mid_ch, momentum=bn_momentum), + nn.ReLU(inplace=True), + # Depthwise + nn.Conv2d(mid_ch, mid_ch, kernel_size, padding=kernel_size // 2, + stride=stride, groups=mid_ch, bias=False), + nn.BatchNorm2d(mid_ch, momentum=bn_momentum), + nn.ReLU(inplace=True), + # Linear pointwise. Note that there's no activation. + nn.Conv2d(mid_ch, out_ch, 1, bias=False), + nn.BatchNorm2d(out_ch, momentum=bn_momentum)) + + def forward(self, input): + if self.apply_residual: + return self.layers(input) + input + else: + return self.layers(input) + + +def _stack(in_ch, out_ch, kernel_size, stride, exp_factor, repeats, + bn_momentum): + """ Creates a stack of inverted residuals. """ + assert repeats >= 1 + # First one has no skip, because feature map size changes. + first = _InvertedResidual(in_ch, out_ch, kernel_size, stride, exp_factor, + bn_momentum=bn_momentum) + remaining = [] + for _ in range(1, repeats): + remaining.append( + _InvertedResidual(out_ch, out_ch, kernel_size, 1, exp_factor, + bn_momentum=bn_momentum)) + return nn.Sequential(first, *remaining) + + +def _round_to_multiple_of(val, divisor, round_up_bias=0.9): + """ Asymmetric rounding to make `val` divisible by `divisor`. With default + bias, will round up, unless the number is no more than 10% greater than the + smaller divisible value, i.e. (83, 8) -> 80, but (84, 8) -> 88. """ + assert 0.0 < round_up_bias < 1.0 + new_val = max(divisor, int(val + divisor / 2) // divisor * divisor) + return new_val if new_val >= round_up_bias * val else new_val + divisor + + +def _get_depths(alpha): + """ Scales tensor depths as in reference MobileNet code, prefers rouding up + rather than down. """ + depths = [32, 16, 24, 40, 80, 96, 192, 320] + return [_round_to_multiple_of(depth * alpha, 8) for depth in depths] + + +class MNASNet(torch.nn.Module): + """ MNASNet, as described in https://arxiv.org/pdf/1807.11626.pdf. This + implements the B1 variant of the model. + >>> model = MNASNet(1000, 1.0) + >>> x = torch.rand(1, 3, 224, 224) + >>> y = model(x) + >>> y.dim() + 1 + >>> y.nelement() + 1000 + """ + # Version 2 adds depth scaling in the initial stages of the network. + _version = 2 + + def __init__(self, alpha, num_classes=1000, dropout=0.2): + super(MNASNet, self).__init__() + assert alpha > 0.0 + self.alpha = alpha + self.num_classes = num_classes + depths = _get_depths(alpha) + layers = [ + # First layer: regular conv. + nn.Conv2d(3, depths[0], 3, padding=1, stride=2, bias=False), + nn.BatchNorm2d(depths[0], momentum=_BN_MOMENTUM), + nn.ReLU(inplace=True), + # Depthwise separable, no skip. + nn.Conv2d(depths[0], depths[0], 3, padding=1, stride=1, + groups=depths[0], bias=False), + nn.BatchNorm2d(depths[0], momentum=_BN_MOMENTUM), + nn.ReLU(inplace=True), + nn.Conv2d(depths[0], depths[1], 1, padding=0, stride=1, bias=False), + nn.BatchNorm2d(depths[1], momentum=_BN_MOMENTUM), + # MNASNet blocks: stacks of inverted residuals. + _stack(depths[1], depths[2], 3, 2, 3, 3, _BN_MOMENTUM), + _stack(depths[2], depths[3], 5, 2, 3, 3, _BN_MOMENTUM), + _stack(depths[3], depths[4], 5, 2, 6, 3, _BN_MOMENTUM), + _stack(depths[4], depths[5], 3, 1, 6, 2, _BN_MOMENTUM), + _stack(depths[5], depths[6], 5, 2, 6, 4, _BN_MOMENTUM), + _stack(depths[6], depths[7], 3, 1, 6, 1, _BN_MOMENTUM), + # Final mapping to classifier input. + nn.Conv2d(depths[7], 1280, 1, padding=0, stride=1, bias=False), + nn.BatchNorm2d(1280, momentum=_BN_MOMENTUM), + nn.ReLU(inplace=True), + ] + self.layers = nn.Sequential(*layers) + self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True), + nn.Linear(1280, num_classes)) + self._initialize_weights() + + def forward(self, x): + x = self.layers(x) + # Equivalent to global avgpool and removing H and W dimensions. + x = x.mean([2, 3]) + return self.classifier(x) + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", + nonlinearity="relu") + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.kaiming_uniform_(m.weight, mode="fan_out", + nonlinearity="sigmoid") + nn.init.zeros_(m.bias) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + version = local_metadata.get("version", None) + assert version in [1, 2] + + if version == 1 and not self.alpha == 1.0: + # In the initial version of the model (v1), stem was fixed-size. + # All other layer configurations were the same. This will patch + # the model so that it's identical to v1. Model with alpha 1.0 is + # unaffected. + depths = _get_depths(self.alpha) + v1_stem = [ + nn.Conv2d(3, 32, 3, padding=1, stride=2, bias=False), + nn.BatchNorm2d(32, momentum=_BN_MOMENTUM), + nn.ReLU(inplace=True), + nn.Conv2d(32, 32, 3, padding=1, stride=1, groups=32, + bias=False), + nn.BatchNorm2d(32, momentum=_BN_MOMENTUM), + nn.ReLU(inplace=True), + nn.Conv2d(32, 16, 1, padding=0, stride=1, bias=False), + nn.BatchNorm2d(16, momentum=_BN_MOMENTUM), + _stack(16, depths[2], 3, 2, 3, 3, _BN_MOMENTUM), + ] + for idx, layer in enumerate(v1_stem): + self.layers[idx] = layer + + # The model is now identical to v1, and must be saved as such. + self._version = 1 + warnings.warn( + "A new version of MNASNet model has been implemented. " + "Your checkpoint was saved using the previous version. " + "This checkpoint will load and work as before, but " + "you may want to upgrade by training a newer model or " + "transfer learning from an updated ImageNet checkpoint.", + UserWarning) + + super(MNASNet, self)._load_from_state_dict( + state_dict, prefix, local_metadata, strict, missing_keys, + unexpected_keys, error_msgs) + + +def _load_pretrained(model_name, model, progress): + if model_name not in _MODEL_URLS or _MODEL_URLS[model_name] is None: + raise ValueError( + "No checkpoint is available for model type {}".format(model_name)) + checkpoint_url = _MODEL_URLS[model_name] + model.load_state_dict( + load_state_dict_from_url(checkpoint_url, progress=progress)) + + +def mnasnet0_5(pretrained=False, progress=True, **kwargs): + """MNASNet with depth multiplier of 0.5 from + `"MnasNet: Platform-Aware Neural Architecture Search for Mobile" + `_. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + model = MNASNet(0.5, **kwargs) + if pretrained: + _load_pretrained("mnasnet0_5", model, progress) + return model + + +def mnasnet0_75(pretrained=False, progress=True, **kwargs): + """MNASNet with depth multiplier of 0.75 from + `"MnasNet: Platform-Aware Neural Architecture Search for Mobile" + `_. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + model = MNASNet(0.75, **kwargs) + if pretrained: + _load_pretrained("mnasnet0_75", model, progress) + return model + + +def mnasnet1_0(pretrained=False, progress=True, **kwargs): + """MNASNet with depth multiplier of 1.0 from + `"MnasNet: Platform-Aware Neural Architecture Search for Mobile" + `_. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + model = MNASNet(1.0, **kwargs) + if pretrained: + _load_pretrained("mnasnet1_0", model, progress) + return model + + +def mnasnet1_3(pretrained=False, progress=True, **kwargs): + """MNASNet with depth multiplier of 1.3 from + `"MnasNet: Platform-Aware Neural Architecture Search for Mobile" + `_. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + model = MNASNet(1.3, **kwargs) + if pretrained: + _load_pretrained("mnasnet1_3", model, progress) + return model diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py new file mode 100644 index 0000000000..4108305d3f --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py @@ -0,0 +1,4 @@ +from .mobilenetv2 import MobileNetV2, mobilenet_v2, __all__ as mv2_all +from .mobilenetv3 import MobileNetV3, mobilenet_v3_large, mobilenet_v3_small, __all__ as mv3_all + +__all__ = mv2_all + mv3_all diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py new file mode 100644 index 0000000000..e4c3069a60 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py @@ -0,0 +1,177 @@ +from torch import nn +from .utils import load_state_dict_from_url + + +__all__ = ['MobileNetV2', 'mobilenet_v2'] + + +model_urls = { + 'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth', +} + + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNReLU(nn.Sequential): + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + padding = (kernel_size - 1) // 2 + super(ConvBNReLU, self).__init__( + nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), + nn.BatchNorm2d(out_planes), + nn.ReLU6(inplace=True) + ) + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + # pw + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(nn.Module): + def __init__(self, + num_classes=1000, + width_mult=1.0, + inverted_residual_setting=None, + round_nearest=8, + block=None): + """ + MobileNet V2 main class + + Args: + num_classes (int): Number of classes + width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount + inverted_residual_setting: Network structure + round_nearest (int): Round the number of channels in each layer to be a multiple of this number + Set to 1 to turn off rounding + block: Module specifying inverted residual building block for mobilenet + + """ + super(MobileNetV2, self).__init__() + + if block is None: + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + + if inverted_residual_setting is None: + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # only check the first element, assuming user knows t,c,n,s are required + if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: + raise ValueError("inverted_residual_setting should be non-empty " + "or a 4-element list, got {}".format(inverted_residual_setting)) + + # building first layer + input_channel = _make_divisible(input_channel * width_mult, round_nearest) + self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) + features = [ConvBNReLU(3, input_channel, stride=2)] + # building inverted residual blocks + for t, c, n, s in inverted_residual_setting: + output_channel = _make_divisible(c * width_mult, round_nearest) + for i in range(n): + stride = s if i == 0 else 1 + features.append(block(input_channel, output_channel, stride, expand_ratio=t)) + input_channel = output_channel + # building last several layers + features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) + # make it nn.Sequential + self.features = nn.Sequential(*features) + + # building classifier + self.classifier = nn.Sequential( + nn.Dropout(0.2), + nn.Linear(self.last_channel, num_classes), + ) + + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + def _forward_impl(self, x): + # This exists since TorchScript doesn't support inheritance, so the superclass method + # (this one) needs to have a name other than `forward` that can be accessed in a subclass + x = self.features(x) + # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0] + x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1) + x = self.classifier(x) + return x + + def forward(self, x): + return self._forward_impl(x) + + +def mobilenet_v2(pretrained=False, progress=True, **kwargs): + """ + Constructs a MobileNetV2 architecture from + `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + model = MobileNetV2(**kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'], + progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py new file mode 100644 index 0000000000..1a470953df --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py @@ -0,0 +1,211 @@ +import warnings +from typing import Callable, Any, Optional, List + +import torch +from torch import Tensor +from torch import nn + +from .._internally_replaced_utils import load_state_dict_from_url +from ..ops.misc import ConvNormActivation +from ..utils import _log_api_usage_once +from ._utils import _make_divisible + + +__all__ = ["MobileNetV2", "mobilenet_v2"] + + +model_urls = { + "mobilenet_v2": "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth", +} + + +# necessary for backwards compatibility +class _DeprecatedConvBNAct(ConvNormActivation): + def __init__(self, *args, **kwargs): + warnings.warn( + "The ConvBNReLU/ConvBNActivation classes are deprecated and will be removed in future versions. " + "Use torchvision.ops.misc.ConvNormActivation instead.", + FutureWarning, + ) + if kwargs.get("norm_layer", None) is None: + kwargs["norm_layer"] = nn.BatchNorm2d + if kwargs.get("activation_layer", None) is None: + kwargs["activation_layer"] = nn.ReLU6 + super().__init__(*args, **kwargs) + + +ConvBNReLU = _DeprecatedConvBNAct +ConvBNActivation = _DeprecatedConvBNAct + + +class InvertedResidual(nn.Module): + def __init__( + self, inp: int, oup: int, stride: int, expand_ratio: int, norm_layer: Optional[Callable[..., nn.Module]] = None + ) -> None: + super().__init__() + self.stride = stride + assert stride in [1, 2] + + if norm_layer is None: + norm_layer = nn.BatchNorm2d + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers: List[nn.Module] = [] + if expand_ratio != 1: + # pw + layers.append( + ConvNormActivation(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.ReLU6) + ) + layers.extend( + [ + # dw + ConvNormActivation( + hidden_dim, + hidden_dim, + stride=stride, + groups=hidden_dim, + norm_layer=norm_layer, + activation_layer=nn.ReLU6, + ), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + norm_layer(oup), + ] + ) + self.conv = nn.Sequential(*layers) + self.out_channels = oup + self._is_cn = stride > 1 + + def forward(self, x: Tensor) -> Tensor: + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(nn.Module): + def __init__( + self, + num_classes: int = 1000, + width_mult: float = 1.0, + inverted_residual_setting: Optional[List[List[int]]] = None, + round_nearest: int = 8, + block: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None, + dropout: float = 0.2, + ) -> None: + """ + MobileNet V2 main class + + Args: + num_classes (int): Number of classes + width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount + inverted_residual_setting: Network structure + round_nearest (int): Round the number of channels in each layer to be a multiple of this number + Set to 1 to turn off rounding + block: Module specifying inverted residual building block for mobilenet + norm_layer: Module specifying the normalization layer to use + dropout (float): The droupout probability + + """ + super().__init__() + _log_api_usage_once(self) + + if block is None: + block = InvertedResidual + + if norm_layer is None: + norm_layer = nn.BatchNorm2d + + input_channel = 32 + last_channel = 1280 + + if inverted_residual_setting is None: + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # only check the first element, assuming user knows t,c,n,s are required + if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: + raise ValueError( + f"inverted_residual_setting should be non-empty or a 4-element list, got {inverted_residual_setting}" + ) + + # building first layer + input_channel = _make_divisible(input_channel * width_mult, round_nearest) + self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) + features: List[nn.Module] = [ + ConvNormActivation(3, input_channel, stride=2, norm_layer=norm_layer, activation_layer=nn.ReLU6) + ] + # building inverted residual blocks + for t, c, n, s in inverted_residual_setting: + output_channel = _make_divisible(c * width_mult, round_nearest) + for i in range(n): + stride = s if i == 0 else 1 + features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer)) + input_channel = output_channel + # building last several layers + features.append( + ConvNormActivation( + input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.ReLU6 + ) + ) + # make it nn.Sequential + self.features = nn.Sequential(*features) + + # building classifier + self.classifier = nn.Sequential( + nn.Dropout(p=dropout), + nn.Linear(self.last_channel, num_classes), + ) + + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out") + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + def _forward_impl(self, x: Tensor) -> Tensor: + # This exists since TorchScript doesn't support inheritance, so the superclass method + # (this one) needs to have a name other than `forward` that can be accessed in a subclass + x = self.features(x) + # Cannot use "squeeze" as batch-size can be 1 + x = nn.functional.adaptive_avg_pool2d(x, (1, 1)) + x = torch.flatten(x, 1) + x = self.classifier(x) + return x + + def forward(self, x: Tensor) -> Tensor: + return self._forward_impl(x) + + +def mobilenet_v2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV2: + """ + Constructs a MobileNetV2 architecture from + `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + model = MobileNetV2(**kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls["mobilenet_v2"], progress=progress) + model.load_state_dict(state_dict) + return model diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py new file mode 100644 index 0000000000..e6a2bbbfbe --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py @@ -0,0 +1,333 @@ +import warnings +from functools import partial +from typing import Any, Callable, List, Optional, Sequence + +import torch +from torch import nn, Tensor + +from .._internally_replaced_utils import load_state_dict_from_url +from ..ops.misc import ConvNormActivation, SqueezeExcitation as SElayer +from ..utils import _log_api_usage_once +from ._utils import _make_divisible + + +__all__ = ["MobileNetV3", "mobilenet_v3_large", "mobilenet_v3_small"] + + +model_urls = { + "mobilenet_v3_large": "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth", + "mobilenet_v3_small": "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth", +} + + +class SqueezeExcitation(SElayer): + """DEPRECATED""" + + def __init__(self, input_channels: int, squeeze_factor: int = 4): + squeeze_channels = _make_divisible(input_channels // squeeze_factor, 8) + # super().__init__(input_channels, squeeze_channels, scale_activation=nn.Hardsigmoid) + super().__init__(input_channels, squeeze_channels, scale_activation=nn.Sigmoid) + + self.relu = self.activation + delattr(self, "activation") + warnings.warn( + "This SqueezeExcitation class is deprecated and will be removed in future versions. " + "Use torchvision.ops.misc.SqueezeExcitation instead.", + FutureWarning, + ) + + +class InvertedResidualConfig: + # Stores information listed at Tables 1 and 2 of the MobileNetV3 paper + def __init__( + self, + input_channels: int, + kernel: int, + expanded_channels: int, + out_channels: int, + use_se: bool, + activation: str, + stride: int, + dilation: int, + width_mult: float, + ): + self.input_channels = self.adjust_channels(input_channels, width_mult) + self.kernel = kernel + self.expanded_channels = self.adjust_channels(expanded_channels, width_mult) + self.out_channels = self.adjust_channels(out_channels, width_mult) + self.use_se = use_se + self.use_hs = activation == "HS" + self.stride = stride + self.dilation = dilation + + @staticmethod + def adjust_channels(channels: int, width_mult: float): + return _make_divisible(channels * width_mult, 8) + + +class InvertedResidual(nn.Module): + # Implemented as described at section 5 of MobileNetV3 paper + def __init__( + self, + cnf: InvertedResidualConfig, + norm_layer: Callable[..., nn.Module], + se_layer: Callable[..., nn.Module] = partial(SElayer, scale_activation=nn.Sigmoid), + ): + super().__init__() + if not (1 <= cnf.stride <= 2): + raise ValueError("illegal stride value") + + self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels + + layers: List[nn.Module] = [] + # activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU + activation_layer = nn.ReLU6 + + + # expand + if cnf.expanded_channels != cnf.input_channels: + layers.append( + ConvNormActivation( + cnf.input_channels, + cnf.expanded_channels, + kernel_size=1, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) + + # depthwise + stride = 1 if cnf.dilation > 1 else cnf.stride + layers.append( + ConvNormActivation( + cnf.expanded_channels, + cnf.expanded_channels, + kernel_size=cnf.kernel, + stride=stride, + dilation=cnf.dilation, + groups=cnf.expanded_channels, + norm_layer=norm_layer, + activation_layer=activation_layer, + ) + ) + if cnf.use_se: + squeeze_channels = _make_divisible(cnf.expanded_channels // 4, 8) + layers.append(se_layer(cnf.expanded_channels, squeeze_channels)) + + # project + layers.append( + ConvNormActivation( + cnf.expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None + ) + ) + + self.block = nn.Sequential(*layers) + self.out_channels = cnf.out_channels + self._is_cn = cnf.stride > 1 + + def forward(self, input: Tensor) -> Tensor: + result = self.block(input) + if self.use_res_connect: + result += input + return result + + +class MobileNetV3(nn.Module): + def __init__( + self, + inverted_residual_setting: List[InvertedResidualConfig], + last_channel: int, + num_classes: int = 1000, + block: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None, + dropout: float = 0.2, + **kwargs: Any, + ) -> None: + """ + MobileNet V3 main class + + Args: + inverted_residual_setting (List[InvertedResidualConfig]): Network structure + last_channel (int): The number of channels on the penultimate layer + num_classes (int): Number of classes + block (Optional[Callable[..., nn.Module]]): Module specifying inverted residual building block for mobilenet + norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use + dropout (float): The droupout probability + """ + super().__init__() + _log_api_usage_once(self) + + if not inverted_residual_setting: + raise ValueError("The inverted_residual_setting should not be empty") + elif not ( + isinstance(inverted_residual_setting, Sequence) + and all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting]) + ): + raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]") + + if block is None: + block = InvertedResidual + + if norm_layer is None: + norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01) + + layers: List[nn.Module] = [] + + # building first layer + firstconv_output_channels = inverted_residual_setting[0].input_channels + layers.append( + ConvNormActivation( + 3, + firstconv_output_channels, + kernel_size=3, + stride=2, + norm_layer=norm_layer, + activation_layer=nn.ReLU6, #nn.Hardswish, + ) + ) + + # building inverted residual blocks + for cnf in inverted_residual_setting: + layers.append(block(cnf, norm_layer)) + + # building last several layers + lastconv_input_channels = inverted_residual_setting[-1].out_channels + lastconv_output_channels = 6 * lastconv_input_channels + layers.append( + ConvNormActivation( + lastconv_input_channels, + lastconv_output_channels, + kernel_size=1, + norm_layer=norm_layer, + activation_layer=nn.ReLU6, #nn.Hardswish, + ) + ) + + self.features = nn.Sequential(*layers) + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.classifier = nn.Sequential( + nn.Linear(lastconv_output_channels, last_channel), + # nn.Hardswish(inplace=True), + nn.ReLU6(inplace=True), + nn.Dropout(p=dropout, inplace=True), + nn.Linear(last_channel, num_classes), + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out") + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + def _forward_impl(self, x: Tensor) -> Tensor: + x = self.features(x) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + + x = self.classifier(x) + + return x + + def forward(self, x: Tensor) -> Tensor: + return self._forward_impl(x) + + +def _mobilenet_v3_conf( + arch: str, width_mult: float = 1.0, reduced_tail: bool = False, dilated: bool = False, **kwargs: Any +): + reduce_divider = 2 if reduced_tail else 1 + dilation = 2 if dilated else 1 + + bneck_conf = partial(InvertedResidualConfig, width_mult=width_mult) + adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_mult=width_mult) + + if arch == "mobilenet_v3_large": + inverted_residual_setting = [ + bneck_conf(16, 3, 16, 16, False, "RE", 1, 1), + bneck_conf(16, 3, 64, 24, False, "RE", 2, 1), # C1 + bneck_conf(24, 3, 72, 24, False, "RE", 1, 1), + bneck_conf(24, 5, 72, 40, True, "RE", 2, 1), # C2 + bneck_conf(40, 5, 120, 40, True, "RE", 1, 1), + bneck_conf(40, 5, 120, 40, True, "RE", 1, 1), + bneck_conf(40, 3, 240, 80, False, "HS", 2, 1), # C3 + bneck_conf(80, 3, 200, 80, False, "HS", 1, 1), + bneck_conf(80, 3, 184, 80, False, "HS", 1, 1), + bneck_conf(80, 3, 184, 80, False, "HS", 1, 1), + bneck_conf(80, 3, 480, 112, True, "HS", 1, 1), + bneck_conf(112, 3, 672, 112, True, "HS", 1, 1), + bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2, dilation), # C4 + bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation), + bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation), + ] + last_channel = adjust_channels(1280 // reduce_divider) # C5 + elif arch == "mobilenet_v3_small": + inverted_residual_setting = [ + bneck_conf(16, 3, 16, 16, True, "RE", 2, 1), # C1 + bneck_conf(16, 3, 72, 24, False, "RE", 2, 1), # C2 + bneck_conf(24, 3, 88, 24, False, "RE", 1, 1), + bneck_conf(24, 5, 96, 40, True, "HS", 2, 1), # C3 + bneck_conf(40, 5, 240, 40, True, "HS", 1, 1), + bneck_conf(40, 5, 240, 40, True, "HS", 1, 1), + bneck_conf(40, 5, 120, 48, True, "HS", 1, 1), + bneck_conf(48, 5, 144, 48, True, "HS", 1, 1), + bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2, dilation), # C4 + bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation), + bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation), + ] + last_channel = adjust_channels(1024 // reduce_divider) # C5 + else: + raise ValueError(f"Unsupported model type {arch}") + + return inverted_residual_setting, last_channel + + +def _mobilenet_v3( + arch: str, + inverted_residual_setting: List[InvertedResidualConfig], + last_channel: int, + pretrained: bool, + progress: bool, + **kwargs: Any, +): + model = MobileNetV3(inverted_residual_setting, last_channel, **kwargs) + if pretrained: + if model_urls.get(arch, None) is None: + raise ValueError(f"No checkpoint is available for model type {arch}") + state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) + model.load_state_dict(state_dict) + return model + + +def mobilenet_v3_large(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV3: + """ + Constructs a large MobileNetV3 architecture from + `"Searching for MobileNetV3" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + arch = "mobilenet_v3_large" + inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch, **kwargs) + return _mobilenet_v3(arch, inverted_residual_setting, last_channel, pretrained, progress, **kwargs) + + +def mobilenet_v3_small(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV3: + """ + Constructs a small MobileNetV3 architecture from + `"Searching for MobileNetV3" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + arch = "mobilenet_v3_small" + inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch, **kwargs) + return _mobilenet_v3(arch, inverted_residual_setting, last_channel, pretrained, progress, **kwargs) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/__init__.py new file mode 100644 index 0000000000..deae997a21 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/__init__.py @@ -0,0 +1,5 @@ +from .mobilenet import * +from .resnet import * +from .googlenet import * +from .inception import * +from .shufflenetv2 import * diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py new file mode 100644 index 0000000000..d01534bc70 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py @@ -0,0 +1,166 @@ +import warnings +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.jit.annotations import Optional + +from torchvision.models.utils import load_state_dict_from_url +from torchvision.models.googlenet import ( + GoogLeNetOutputs, BasicConv2d, Inception, InceptionAux, GoogLeNet, model_urls) + +from .utils import _replace_relu, quantize_model + + +__all__ = ['QuantizableGoogLeNet', 'googlenet'] + +quant_model_urls = { + # fp32 GoogLeNet ported from TensorFlow, with weights quantized in PyTorch + 'googlenet_fbgemm': 'https://download.pytorch.org/models/quantized/googlenet_fbgemm-c00238cf.pth', +} + + +def googlenet(pretrained=False, progress=True, quantize=False, **kwargs): + r"""GoogLeNet (Inception v1) model architecture from + `"Going Deeper with Convolutions" `_. + + Note that quantize = True returns a quantized model with 8 bit + weights. Quantized models only support inference and run on CPUs. + GPU inference is not yet supported + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + aux_logits (bool): If True, adds two auxiliary branches that can improve training. + Default: *False* when pretrained is True otherwise *True* + transform_input (bool): If True, preprocesses the input according to the method with which it + was trained on ImageNet. Default: *False* + """ + if pretrained: + if 'transform_input' not in kwargs: + kwargs['transform_input'] = True + if 'aux_logits' not in kwargs: + kwargs['aux_logits'] = False + if kwargs['aux_logits']: + warnings.warn('auxiliary heads in the pretrained googlenet model are NOT pretrained, ' + 'so make sure to train them') + original_aux_logits = kwargs['aux_logits'] + kwargs['aux_logits'] = True + kwargs['init_weights'] = False + + model = QuantizableGoogLeNet(**kwargs) + _replace_relu(model) + + if quantize: + # TODO use pretrained as a string to specify the backend + backend = 'fbgemm' + quantize_model(model, backend) + else: + assert pretrained in [True, False] + + if pretrained: + if quantize: + model_url = quant_model_urls['googlenet' + '_' + backend] + else: + model_url = model_urls['googlenet'] + + state_dict = load_state_dict_from_url(model_url, + progress=progress) + + model.load_state_dict(state_dict) + + if not original_aux_logits: + model.aux_logits = False + model.aux1 = None + model.aux2 = None + return model + + +class QuantizableBasicConv2d(BasicConv2d): + + def __init__(self, *args, **kwargs): + super(QuantizableBasicConv2d, self).__init__(*args, **kwargs) + self.relu = nn.ReLU() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + def fuse_model(self): + torch.quantization.fuse_modules(self, ["conv", "bn", "relu"], inplace=True) + + +class QuantizableInception(Inception): + + def __init__(self, *args, **kwargs): + super(QuantizableInception, self).__init__( + conv_block=QuantizableBasicConv2d, *args, **kwargs) + self.cat = nn.quantized.FloatFunctional() + + def forward(self, x): + outputs = self._forward(x) + return self.cat.cat(outputs, 1) + + +class QuantizableInceptionAux(InceptionAux): + + def __init__(self, *args, **kwargs): + super(QuantizableInceptionAux, self).__init__( + conv_block=QuantizableBasicConv2d, *args, **kwargs) + self.relu = nn.ReLU() + self.dropout = nn.Dropout(0.7) + + def forward(self, x): + # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14 + x = F.adaptive_avg_pool2d(x, (4, 4)) + # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4 + x = self.conv(x) + # N x 128 x 4 x 4 + x = torch.flatten(x, 1) + # N x 2048 + x = self.relu(self.fc1(x)) + # N x 1024 + x = self.dropout(x) + # N x 1024 + x = self.fc2(x) + # N x 1000 (num_classes) + + return x + + +class QuantizableGoogLeNet(GoogLeNet): + + def __init__(self, *args, **kwargs): + super(QuantizableGoogLeNet, self).__init__( + blocks=[QuantizableBasicConv2d, QuantizableInception, QuantizableInceptionAux], + *args, + **kwargs + ) + self.quant = torch.quantization.QuantStub() + self.dequant = torch.quantization.DeQuantStub() + + def forward(self, x): + x = self._transform_input(x) + x = self.quant(x) + x, aux1, aux2 = self._forward(x) + x = self.dequant(x) + aux_defined = self.training and self.aux_logits + if torch.jit.is_scripting(): + if not aux_defined: + warnings.warn("Scripted QuantizableGoogleNet always returns GoogleNetOutputs Tuple") + return GoogLeNetOutputs(x, aux2, aux1) + else: + return self.eager_outputs(x, aux2, aux1) + + def fuse_model(self): + r"""Fuse conv/bn/relu modules in googlenet model + + Fuse conv+bn+relu/ conv+relu/conv+bn modules to prepare for quantization. + Model is modified in place. Note that this operation does not change numerics + and the model after modification is in floating point + """ + + for m in self.modules(): + if type(m) == QuantizableBasicConv2d: + m.fuse_model() diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py new file mode 100644 index 0000000000..f452de0281 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py @@ -0,0 +1,222 @@ +import warnings +from collections import namedtuple + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchvision.models import inception as inception_module +from torchvision.models.inception import InceptionOutputs +from torch.jit.annotations import Optional +from torchvision.models.utils import load_state_dict_from_url +from .utils import _replace_relu, quantize_model + + +__all__ = [ + "QuantizableInception3", + "inception_v3", +] + + +quant_model_urls = { + # fp32 weights ported from TensorFlow, quantized in PyTorch + "inception_v3_google_fbgemm": + "https://download.pytorch.org/models/quantized/inception_v3_google_fbgemm-71447a44.pth" +} + + +def inception_v3(pretrained=False, progress=True, quantize=False, **kwargs): + r"""Inception v3 model architecture from + `"Rethinking the Inception Architecture for Computer Vision" `_. + + .. note:: + **Important**: In contrast to the other models the inception_v3 expects tensors with a size of + N x 3 x 299 x 299, so ensure your images are sized accordingly. + + Note that quantize = True returns a quantized model with 8 bit + weights. Quantized models only support inference and run on CPUs. + GPU inference is not yet supported + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + aux_logits (bool): If True, add an auxiliary branch that can improve training. + Default: *True* + transform_input (bool): If True, preprocesses the input according to the method with which it + was trained on ImageNet. Default: *False* + """ + if pretrained: + if "transform_input" not in kwargs: + kwargs["transform_input"] = True + if "aux_logits" in kwargs: + original_aux_logits = kwargs["aux_logits"] + kwargs["aux_logits"] = True + else: + original_aux_logits = False + + model = QuantizableInception3(**kwargs) + _replace_relu(model) + + if quantize: + # TODO use pretrained as a string to specify the backend + backend = 'fbgemm' + quantize_model(model, backend) + else: + assert pretrained in [True, False] + + if pretrained: + if quantize: + if not original_aux_logits: + model.aux_logits = False + del model.AuxLogits + model_url = quant_model_urls['inception_v3_google' + '_' + backend] + else: + model_url = inception_module.model_urls['inception_v3_google'] + + state_dict = load_state_dict_from_url(model_url, + progress=progress) + + model.load_state_dict(state_dict) + + if not quantize: + if not original_aux_logits: + model.aux_logits = False + del model.AuxLogits + return model + + +class QuantizableBasicConv2d(inception_module.BasicConv2d): + def __init__(self, *args, **kwargs): + super(QuantizableBasicConv2d, self).__init__(*args, **kwargs) + self.relu = nn.ReLU() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + def fuse_model(self): + torch.quantization.fuse_modules(self, ["conv", "bn", "relu"], inplace=True) + + +class QuantizableInceptionA(inception_module.InceptionA): + def __init__(self, *args, **kwargs): + super(QuantizableInceptionA, self).__init__(conv_block=QuantizableBasicConv2d, *args, **kwargs) + self.myop = nn.quantized.FloatFunctional() + + def forward(self, x): + outputs = self._forward(x) + return self.myop.cat(outputs, 1) + + +class QuantizableInceptionB(inception_module.InceptionB): + def __init__(self, *args, **kwargs): + super(QuantizableInceptionB, self).__init__(conv_block=QuantizableBasicConv2d, *args, **kwargs) + self.myop = nn.quantized.FloatFunctional() + + def forward(self, x): + outputs = self._forward(x) + return self.myop.cat(outputs, 1) + + +class QuantizableInceptionC(inception_module.InceptionC): + def __init__(self, *args, **kwargs): + super(QuantizableInceptionC, self).__init__(conv_block=QuantizableBasicConv2d, *args, **kwargs) + self.myop = nn.quantized.FloatFunctional() + + def forward(self, x): + outputs = self._forward(x) + return self.myop.cat(outputs, 1) + + +class QuantizableInceptionD(inception_module.InceptionD): + def __init__(self, *args, **kwargs): + super(QuantizableInceptionD, self).__init__(conv_block=QuantizableBasicConv2d, *args, **kwargs) + self.myop = nn.quantized.FloatFunctional() + + def forward(self, x): + outputs = self._forward(x) + return self.myop.cat(outputs, 1) + + +class QuantizableInceptionE(inception_module.InceptionE): + def __init__(self, *args, **kwargs): + super(QuantizableInceptionE, self).__init__(conv_block=QuantizableBasicConv2d, *args, **kwargs) + self.myop1 = nn.quantized.FloatFunctional() + self.myop2 = nn.quantized.FloatFunctional() + self.myop3 = nn.quantized.FloatFunctional() + + def _forward(self, x): + branch1x1 = self.branch1x1(x) + + branch3x3 = self.branch3x3_1(x) + branch3x3 = [self.branch3x3_2a(branch3x3), self.branch3x3_2b(branch3x3)] + branch3x3 = self.myop1.cat(branch3x3, 1) + + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = [ + self.branch3x3dbl_3a(branch3x3dbl), + self.branch3x3dbl_3b(branch3x3dbl), + ] + branch3x3dbl = self.myop2.cat(branch3x3dbl, 1) + + branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + + outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] + return outputs + + def forward(self, x): + outputs = self._forward(x) + return self.myop3.cat(outputs, 1) + + +class QuantizableInceptionAux(inception_module.InceptionAux): + def __init__(self, *args, **kwargs): + super(QuantizableInceptionAux, self).__init__(conv_block=QuantizableBasicConv2d, *args, **kwargs) + + +class QuantizableInception3(inception_module.Inception3): + def __init__(self, num_classes=1000, aux_logits=True, transform_input=False): + super(QuantizableInception3, self).__init__( + num_classes=num_classes, + aux_logits=aux_logits, + transform_input=transform_input, + inception_blocks=[ + QuantizableBasicConv2d, + QuantizableInceptionA, + QuantizableInceptionB, + QuantizableInceptionC, + QuantizableInceptionD, + QuantizableInceptionE, + QuantizableInceptionAux + ] + ) + self.quant = torch.quantization.QuantStub() + self.dequant = torch.quantization.DeQuantStub() + + def forward(self, x): + x = self._transform_input(x) + x = self.quant(x) + x, aux = self._forward(x) + x = self.dequant(x) + aux_defined = self.training and self.aux_logits + if torch.jit.is_scripting(): + if not aux_defined: + warnings.warn("Scripted QuantizableInception3 always returns QuantizableInception3 Tuple") + return InceptionOutputs(x, aux) + else: + return self.eager_outputs(x, aux) + + def fuse_model(self): + r"""Fuse conv/bn/relu modules in inception model + + Fuse conv+bn+relu/ conv+relu/conv+bn modules to prepare for quantization. + Model is modified in place. Note that this operation does not change numerics + and the model after modification is in floating point + """ + + for m in self.modules(): + if type(m) == QuantizableBasicConv2d: + m.fuse_model() diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py new file mode 100644 index 0000000000..8f2c42db64 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py @@ -0,0 +1,4 @@ +from .mobilenetv2 import QuantizableMobileNetV2, mobilenet_v2, __all__ as mv2_all +from .mobilenetv3 import QuantizableMobileNetV3, mobilenet_v3_large, __all__ as mv3_all + +__all__ = mv2_all + mv3_all diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py new file mode 100644 index 0000000000..faa63e73be --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py @@ -0,0 +1,102 @@ +from typing import Any + +from torch import Tensor +from torch import nn +from torch.quantization import QuantStub, DeQuantStub, fuse_modules +from torchvision.models.mobilenetv2 import InvertedResidual, MobileNetV2, model_urls + +from ..._internally_replaced_utils import load_state_dict_from_url +from ...ops.misc import ConvNormActivation +from .utils import _replace_relu, quantize_model + + +__all__ = ["QuantizableMobileNetV2", "mobilenet_v2"] + +quant_model_urls = { + "mobilenet_v2_qnnpack": "https://download.pytorch.org/models/quantized/mobilenet_v2_qnnpack_37f702c5.pth" +} + + +class QuantizableInvertedResidual(InvertedResidual): + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.skip_add = nn.quantized.FloatFunctional() + + def forward(self, x: Tensor) -> Tensor: + if self.use_res_connect: + return self.skip_add.add(x, self.conv(x)) + else: + return self.conv(x) + + def fuse_model(self) -> None: + for idx in range(len(self.conv)): + if type(self.conv[idx]) is nn.Conv2d: + fuse_modules(self.conv, [str(idx), str(idx + 1)], inplace=True) + + +class QuantizableMobileNetV2(MobileNetV2): + def __init__(self, *args: Any, **kwargs: Any) -> None: + """ + MobileNet V2 main class + + Args: + Inherits args from floating point MobileNetV2 + """ + super().__init__(*args, **kwargs) + self.quant = QuantStub() + self.dequant = DeQuantStub() + + def forward(self, x: Tensor) -> Tensor: + x = self.quant(x) + x = self._forward_impl(x) + x = self.dequant(x) + return x + + def fuse_model(self) -> None: + for m in self.modules(): + if type(m) is ConvNormActivation: + fuse_modules(m, ["0", "1", "2"], inplace=True) + if type(m) is QuantizableInvertedResidual: + m.fuse_model() + + +def mobilenet_v2( + pretrained: bool = False, + progress: bool = True, + quantize: bool = False, + **kwargs: Any, +) -> QuantizableMobileNetV2: + """ + Constructs a MobileNetV2 architecture from + `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" + `_. + + Note that quantize = True returns a quantized model with 8 bit + weights. Quantized models only support inference and run on CPUs. + GPU inference is not yet supported + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet. + progress (bool): If True, displays a progress bar of the download to stderr + quantize(bool): If True, returns a quantized model, else returns a float model + """ + model = QuantizableMobileNetV2(block=QuantizableInvertedResidual, **kwargs) + _replace_relu(model) + + if quantize: + # TODO use pretrained as a string to specify the backend + backend = "qnnpack" + quantize_model(model, backend) + else: + assert pretrained in [True, False] + + if pretrained: + if quantize: + model_url = quant_model_urls["mobilenet_v2_" + backend] + else: + model_url = model_urls["mobilenet_v2"] + + state_dict = load_state_dict_from_url(model_url, progress=progress) + + model.load_state_dict(state_dict) + return model diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py new file mode 100644 index 0000000000..948b72ead7 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py @@ -0,0 +1,171 @@ +from typing import Any, List, Optional + +import torch +from torch import nn, Tensor +from torch.quantization import QuantStub, DeQuantStub, fuse_modules + +from ..._internally_replaced_utils import load_state_dict_from_url +from ...ops.misc import ConvNormActivation, SqueezeExcitation +from ..mobilenetv3 import InvertedResidual, InvertedResidualConfig, MobileNetV3, model_urls, _mobilenet_v3_conf +from .utils import _replace_relu + + +__all__ = ["QuantizableMobileNetV3", "mobilenet_v3_large"] + +quant_model_urls = { + "mobilenet_v3_large_qnnpack": "https://download.pytorch.org/models/quantized/mobilenet_v3_large_qnnpack-5bcacf28.pth", +} + + +class QuantizableSqueezeExcitation(SqueezeExcitation): + _version = 2 + + def __init__(self, *args: Any, **kwargs: Any) -> None: + kwargs["scale_activation"] = nn.Hardsigmoid + super().__init__(*args, **kwargs) + self.skip_mul = nn.quantized.FloatFunctional() + + def forward(self, input: Tensor) -> Tensor: + return self.skip_mul.mul(self._scale(input), input) + + def fuse_model(self) -> None: + fuse_modules(self, ["fc1", "activation"], inplace=True) + + def _load_from_state_dict( + self, + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, + ): + version = local_metadata.get("version", None) + + if version is None or version < 2: + default_state_dict = { + "scale_activation.activation_post_process.scale": torch.tensor([1.0]), + "scale_activation.activation_post_process.zero_point": torch.tensor([0], dtype=torch.int32), + "scale_activation.activation_post_process.fake_quant_enabled": torch.tensor([1]), + "scale_activation.activation_post_process.observer_enabled": torch.tensor([1]), + } + for k, v in default_state_dict.items(): + full_key = prefix + k + if full_key not in state_dict: + state_dict[full_key] = v + + super()._load_from_state_dict( + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, + ) + + +class QuantizableInvertedResidual(InvertedResidual): + # TODO https://github.com/pytorch/vision/pull/4232#pullrequestreview-730461659 + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(se_layer=QuantizableSqueezeExcitation, *args, **kwargs) # type: ignore[misc] + self.skip_add = nn.quantized.FloatFunctional() + + def forward(self, x: Tensor) -> Tensor: + if self.use_res_connect: + return self.skip_add.add(x, self.block(x)) + else: + return self.block(x) + + +class QuantizableMobileNetV3(MobileNetV3): + def __init__(self, *args: Any, **kwargs: Any) -> None: + """ + MobileNet V3 main class + + Args: + Inherits args from floating point MobileNetV3 + """ + super().__init__(*args, **kwargs) + self.quant = QuantStub() + self.dequant = DeQuantStub() + + def forward(self, x: Tensor) -> Tensor: + x = self.quant(x) + x = self._forward_impl(x) + x = self.dequant(x) + return x + + def fuse_model(self) -> None: + for m in self.modules(): + if type(m) is ConvNormActivation: + modules_to_fuse = ["0", "1"] + if len(m) == 3 and type(m[2]) is nn.ReLU: + modules_to_fuse.append("2") + fuse_modules(m, modules_to_fuse, inplace=True) + elif type(m) is QuantizableSqueezeExcitation: + m.fuse_model() + + +def _load_weights(arch: str, model: QuantizableMobileNetV3, model_url: Optional[str], progress: bool) -> None: + if model_url is None: + raise ValueError(f"No checkpoint is available for {arch}") + state_dict = load_state_dict_from_url(model_url, progress=progress) + model.load_state_dict(state_dict) + + +def _mobilenet_v3_model( + arch: str, + inverted_residual_setting: List[InvertedResidualConfig], + last_channel: int, + pretrained: bool, + progress: bool, + quantize: bool, + **kwargs: Any, +) -> QuantizableMobileNetV3: + + model = QuantizableMobileNetV3(inverted_residual_setting, last_channel, block=QuantizableInvertedResidual, **kwargs) + _replace_relu(model) + + if quantize: + backend = "qnnpack" + + model.fuse_model() + model.qconfig = torch.quantization.get_default_qat_qconfig(backend) + torch.quantization.prepare_qat(model, inplace=True) + + if pretrained: + _load_weights(arch, model, quant_model_urls.get(arch + "_" + backend, None), progress) + + torch.quantization.convert(model, inplace=True) + model.eval() + else: + if pretrained: + _load_weights(arch, model, model_urls.get(arch, None), progress) + + return model + + +def mobilenet_v3_large( + pretrained: bool = False, + progress: bool = True, + quantize: bool = False, + **kwargs: Any, +) -> QuantizableMobileNetV3: + """ + Constructs a MobileNetV3 Large architecture from + `"Searching for MobileNetV3" `_. + + Note that quantize = True returns a quantized model with 8 bit + weights. Quantized models only support inference and run on CPUs. + GPU inference is not yet supported + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet. + progress (bool): If True, displays a progress bar of the download to stderr + quantize (bool): If True, returns a quantized model, else returns a float model + """ + arch = "mobilenet_v3_large" + inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch, **kwargs) + return _mobilenet_v3_model(arch, inverted_residual_setting, last_channel, pretrained, progress, quantize, **kwargs) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py new file mode 100644 index 0000000000..5fd3c03929 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py @@ -0,0 +1,174 @@ +import torch +from torchvision.models.resnet import Bottleneck, BasicBlock, ResNet, model_urls +import torch.nn as nn +from torchvision.models.utils import load_state_dict_from_url +from torch.quantization import QuantStub, DeQuantStub, fuse_modules +from torch._jit_internal import Optional +from .utils import _replace_relu, quantize_model + +__all__ = ['QuantizableResNet', 'resnet18', 'resnet50', + 'resnext101_32x8d'] + + +quant_model_urls = { + 'resnet18_fbgemm': + 'https://download.pytorch.org/models/quantized/resnet18_fbgemm_16fa66dd.pth', + 'resnet50_fbgemm': + 'https://download.pytorch.org/models/quantized/resnet50_fbgemm_bf931d71.pth', + 'resnext101_32x8d_fbgemm': + 'https://download.pytorch.org/models/quantized/resnext101_32x8_fbgemm_09835ccf.pth', +} + + +class QuantizableBasicBlock(BasicBlock): + def __init__(self, *args, **kwargs): + super(QuantizableBasicBlock, self).__init__(*args, **kwargs) + self.add_relu = torch.nn.quantized.FloatFunctional() + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out = self.add_relu.add_relu(out, identity) + + return out + + def fuse_model(self): + torch.quantization.fuse_modules(self, [['conv1', 'bn1', 'relu'], + ['conv2', 'bn2']], inplace=True) + if self.downsample: + torch.quantization.fuse_modules(self.downsample, ['0', '1'], inplace=True) + + +class QuantizableBottleneck(Bottleneck): + def __init__(self, *args, **kwargs): + super(QuantizableBottleneck, self).__init__(*args, **kwargs) + self.skip_add_relu = nn.quantized.FloatFunctional() + self.relu1 = nn.ReLU(inplace=False) + self.relu2 = nn.ReLU(inplace=False) + + def forward(self, x): + identity = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu1(out) + out = self.conv2(out) + out = self.bn2(out) + out = self.relu2(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + out = self.skip_add_relu.add_relu(out, identity) + + return out + + def fuse_model(self): + fuse_modules(self, [['conv1', 'bn1', 'relu1'], + ['conv2', 'bn2', 'relu2'], + ['conv3', 'bn3']], inplace=True) + if self.downsample: + torch.quantization.fuse_modules(self.downsample, ['0', '1'], inplace=True) + + +class QuantizableResNet(ResNet): + + def __init__(self, *args, **kwargs): + super(QuantizableResNet, self).__init__(*args, **kwargs) + + self.quant = torch.quantization.QuantStub() + self.dequant = torch.quantization.DeQuantStub() + + def forward(self, x): + x = self.quant(x) + # Ensure scriptability + # super(QuantizableResNet,self).forward(x) + # is not scriptable + x = self._forward_impl(x) + x = self.dequant(x) + return x + + def fuse_model(self): + r"""Fuse conv/bn/relu modules in resnet models + + Fuse conv+bn+relu/ Conv+relu/conv+Bn modules to prepare for quantization. + Model is modified in place. Note that this operation does not change numerics + and the model after modification is in floating point + """ + + fuse_modules(self, ['conv1', 'bn1', 'relu'], inplace=True) + for m in self.modules(): + if type(m) == QuantizableBottleneck or type(m) == QuantizableBasicBlock: + m.fuse_model() + + +def _resnet(arch, block, layers, pretrained, progress, quantize, **kwargs): + model = QuantizableResNet(block, layers, **kwargs) + _replace_relu(model) + if quantize: + # TODO use pretrained as a string to specify the backend + backend = 'fbgemm' + quantize_model(model, backend) + else: + assert pretrained in [True, False] + + if pretrained: + if quantize: + model_url = quant_model_urls[arch + '_' + backend] + else: + model_url = model_urls[arch] + + state_dict = load_state_dict_from_url(model_url, + progress=progress) + + model.load_state_dict(state_dict) + return model + + +def resnet18(pretrained=False, progress=True, quantize=False, **kwargs): + r"""ResNet-18 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet18', QuantizableBasicBlock, [2, 2, 2, 2], pretrained, progress, + quantize, **kwargs) + + +def resnet50(pretrained=False, progress=True, quantize=False, **kwargs): + r"""ResNet-50 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet50', QuantizableBottleneck, [3, 4, 6, 3], pretrained, progress, + quantize, **kwargs) + + +def resnext101_32x8d(pretrained=False, progress=True, quantize=False, **kwargs): + r"""ResNeXt-101 32x8d model from + `"Aggregated Residual Transformation for Deep Neural Networks" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 8 + return _resnet('resnext101_32x8d', QuantizableBottleneck, [3, 4, 23, 3], + pretrained, progress, quantize, **kwargs) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py new file mode 100644 index 0000000000..a2030ca5ec --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py @@ -0,0 +1,154 @@ +import torch +import torch.nn as nn +from torchvision.models.utils import load_state_dict_from_url +import torchvision.models.shufflenetv2 +import sys +from .utils import _replace_relu, quantize_model + +shufflenetv2 = sys.modules['torchvision.models.shufflenetv2'] + +__all__ = [ + 'QuantizableShuffleNetV2', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', + 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0' +] + +quant_model_urls = { + 'shufflenetv2_x0.5_fbgemm': None, + 'shufflenetv2_x1.0_fbgemm': + 'https://download.pytorch.org/models/quantized/shufflenetv2_x1_fbgemm-db332c57.pth', + 'shufflenetv2_x1.5_fbgemm': None, + 'shufflenetv2_x2.0_fbgemm': None, +} + + +class QuantizableInvertedResidual(shufflenetv2.InvertedResidual): + def __init__(self, *args, **kwargs): + super(QuantizableInvertedResidual, self).__init__(*args, **kwargs) + self.cat = nn.quantized.FloatFunctional() + + def forward(self, x): + if self.stride == 1: + x1, x2 = x.chunk(2, dim=1) + out = self.cat.cat((x1, self.branch2(x2)), dim=1) + else: + out = self.cat.cat((self.branch1(x), self.branch2(x)), dim=1) + + out = shufflenetv2.channel_shuffle(out, 2) + + return out + + +class QuantizableShuffleNetV2(shufflenetv2.ShuffleNetV2): + def __init__(self, *args, **kwargs): + super(QuantizableShuffleNetV2, self).__init__(*args, inverted_residual=QuantizableInvertedResidual, **kwargs) + self.quant = torch.quantization.QuantStub() + self.dequant = torch.quantization.DeQuantStub() + + def forward(self, x): + x = self.quant(x) + x = self._forward_impl(x) + x = self.dequant(x) + return x + + def fuse_model(self): + r"""Fuse conv/bn/relu modules in shufflenetv2 model + + Fuse conv+bn+relu/ conv+relu/conv+bn modules to prepare for quantization. + Model is modified in place. Note that this operation does not change numerics + and the model after modification is in floating point + """ + + for name, m in self._modules.items(): + if name in ["conv1", "conv5"]: + torch.quantization.fuse_modules(m, [["0", "1", "2"]], inplace=True) + for m in self.modules(): + if type(m) == QuantizableInvertedResidual: + if len(m.branch1._modules.items()) > 0: + torch.quantization.fuse_modules( + m.branch1, [["0", "1"], ["2", "3", "4"]], inplace=True + ) + torch.quantization.fuse_modules( + m.branch2, + [["0", "1", "2"], ["3", "4"], ["5", "6", "7"]], + inplace=True, + ) + + +def _shufflenetv2(arch, pretrained, progress, quantize, *args, **kwargs): + model = QuantizableShuffleNetV2(*args, **kwargs) + _replace_relu(model) + + if quantize: + # TODO use pretrained as a string to specify the backend + backend = 'fbgemm' + quantize_model(model, backend) + else: + assert pretrained in [True, False] + + if pretrained: + if quantize: + model_url = quant_model_urls[arch + '_' + backend] + else: + model_url = shufflenetv2.model_urls[arch] + + state_dict = load_state_dict_from_url(model_url, + progress=progress) + + model.load_state_dict(state_dict) + return model + + +def shufflenet_v2_x0_5(pretrained=False, progress=True, quantize=False, **kwargs): + """ + Constructs a ShuffleNetV2 with 0.5x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x0.5', pretrained, progress, quantize, + [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs) + + +def shufflenet_v2_x1_0(pretrained=False, progress=True, quantize=False, **kwargs): + """ + Constructs a ShuffleNetV2 with 1.0x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x1.0', pretrained, progress, quantize, + [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs) + + +def shufflenet_v2_x1_5(pretrained=False, progress=True, quantize=False, **kwargs): + """ + Constructs a ShuffleNetV2 with 1.5x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x1.5', pretrained, progress, quantize, + [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs) + + +def shufflenet_v2_x2_0(pretrained=False, progress=True, quantize=False, **kwargs): + """ + Constructs a ShuffleNetV2 with 2.0x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x2.0', pretrained, progress, quantize, + [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py new file mode 100644 index 0000000000..bf23c9a933 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py @@ -0,0 +1,40 @@ +import torch +from torch import nn + + +def _replace_relu(module): + reassign = {} + for name, mod in module.named_children(): + _replace_relu(mod) + # Checking for explicit type instead of instance + # as we only want to replace modules of the exact type + # not inherited classes + if type(mod) == nn.ReLU or type(mod) == nn.ReLU6: + reassign[name] = nn.ReLU(inplace=False) + + for key, value in reassign.items(): + module._modules[key] = value + + +def quantize_model(model, backend): + _dummy_input_data = torch.rand(1, 3, 299, 299) + if backend not in torch.backends.quantized.supported_engines: + raise RuntimeError("Quantized backend not supported ") + torch.backends.quantized.engine = backend + model.eval() + # Make sure that weight qconfig matches that of the serialized models + if backend == 'fbgemm': + model.qconfig = torch.quantization.QConfig( + activation=torch.quantization.default_observer, + weight=torch.quantization.default_per_channel_weight_observer) + elif backend == 'qnnpack': + model.qconfig = torch.quantization.QConfig( + activation=torch.quantization.default_observer, + weight=torch.quantization.default_weight_observer) + + model.fuse_model() + torch.quantization.prepare(model, inplace=True) + model(_dummy_input_data) + torch.quantization.convert(model, inplace=True) + + return diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py new file mode 100644 index 0000000000..797f459f5c --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py @@ -0,0 +1,353 @@ +import torch +import torch.nn as nn +from .utils import load_state_dict_from_url + + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', + 'wide_resnet50_2', 'wide_resnet101_2'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', + 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth', + 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth', + 'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth', + 'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=dilation, groups=groups, bias=False, dilation=dilation) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None): + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError('BasicBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError("Dilation > 1 not supported in BasicBlock") + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + # while original implementation places the stride at the first 1x1 convolution(self.conv1) + # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + # This variant is also known as ResNet V1.5 and improves accuracy according to + # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None): + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width / 64.)) * groups + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, stride, groups, dilation) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, + groups=1, width_per_group=64, replace_stride_with_dilation=None, + norm_layer=None): + super(ResNet, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self._norm_layer = norm_layer + + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + # each element in the tuple indicates if we should replace + # the 2x2 stride with a dilated convolution instead + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError("replace_stride_with_dilation should be None " + "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2, + dilate=replace_stride_with_dilation[0]) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2, + dilate=replace_stride_with_dilation[1]) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, + dilate=replace_stride_with_dilation[2]) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, dilate=False): + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, groups=self.groups, + base_width=self.base_width, dilation=self.dilation, + norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def _forward_impl(self, x): + # See note [TorchScript super()] + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.fc(x) + + return x + + def forward(self, x): + return self._forward_impl(x) + + +def _resnet(arch, block, layers, pretrained, progress, **kwargs): + model = ResNet(block, layers, **kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls[arch], + progress=progress) + model.load_state_dict(state_dict) + return model + + +def resnet18(pretrained=False, progress=True, **kwargs): + r"""ResNet-18 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, + **kwargs) + + +def resnet34(pretrained=False, progress=True, **kwargs): + r"""ResNet-34 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, + **kwargs) + + +def resnet50(pretrained=False, progress=True, **kwargs): + r"""ResNet-50 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, + **kwargs) + + +def resnet101(pretrained=False, progress=True, **kwargs): + r"""ResNet-101 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, + **kwargs) + + +def resnet152(pretrained=False, progress=True, **kwargs): + r"""ResNet-152 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, + **kwargs) + + +def resnext50_32x4d(pretrained=False, progress=True, **kwargs): + r"""ResNeXt-50 32x4d model from + `"Aggregated Residual Transformation for Deep Neural Networks" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 4 + return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3], + pretrained, progress, **kwargs) + + +def resnext101_32x8d(pretrained=False, progress=True, **kwargs): + r"""ResNeXt-101 32x8d model from + `"Aggregated Residual Transformation for Deep Neural Networks" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 8 + return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], + pretrained, progress, **kwargs) + + +def wide_resnet50_2(pretrained=False, progress=True, **kwargs): + r"""Wide ResNet-50-2 model from + `"Wide Residual Networks" `_ + + The model is the same as ResNet except for the bottleneck number of channels + which is twice larger in every block. The number of channels in outer 1x1 + convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 + channels, and in Wide ResNet-50-2 has 2048-1024-2048. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['width_per_group'] = 64 * 2 + return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3], + pretrained, progress, **kwargs) + + +def wide_resnet101_2(pretrained=False, progress=True, **kwargs): + r"""Wide ResNet-101-2 model from + `"Wide Residual Networks" `_ + + The model is the same as ResNet except for the bottleneck number of channels + which is twice larger in every block. The number of channels in outer 1x1 + convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 + channels, and in Wide ResNet-50-2 has 2048-1024-2048. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['width_per_group'] = 64 * 2 + return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3], + pretrained, progress, **kwargs) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/__init__.py new file mode 100644 index 0000000000..43c80c355a --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/__init__.py @@ -0,0 +1,3 @@ +from .segmentation import * +from .fcn import * +from .deeplabv3 import * diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py new file mode 100644 index 0000000000..c5a7ae99e4 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py @@ -0,0 +1,34 @@ +from collections import OrderedDict + +import torch +from torch import nn +from torch.nn import functional as F + + +class _SimpleSegmentationModel(nn.Module): + __constants__ = ['aux_classifier'] + + def __init__(self, backbone, classifier, aux_classifier=None): + super(_SimpleSegmentationModel, self).__init__() + self.backbone = backbone + self.classifier = classifier + self.aux_classifier = aux_classifier + + def forward(self, x): + input_shape = x.shape[-2:] + # contract: features is a dict of tensors + features = self.backbone(x) + + result = OrderedDict() + x = features["out"] + x = self.classifier(x) + x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False) + result["out"] = x + + if self.aux_classifier is not None: + x = features["aux"] + x = self.aux_classifier(x) + x = F.interpolate(x, size=input_shape, mode='bilinear', align_corners=False) + result["aux"] = x + + return result diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py new file mode 100644 index 0000000000..ae652cd7d2 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py @@ -0,0 +1,94 @@ +import torch +from torch import nn +from torch.nn import functional as F + +from ._utils import _SimpleSegmentationModel + + +__all__ = ["DeepLabV3"] + + +class DeepLabV3(_SimpleSegmentationModel): + """ + Implements DeepLabV3 model from + `"Rethinking Atrous Convolution for Semantic Image Segmentation" + `_. + + Arguments: + backbone (nn.Module): the network used to compute the features for the model. + The backbone should return an OrderedDict[Tensor], with the key being + "out" for the last feature map used, and "aux" if an auxiliary classifier + is used. + classifier (nn.Module): module that takes the "out" element returned from + the backbone and returns a dense prediction. + aux_classifier (nn.Module, optional): auxiliary classifier used during training + """ + pass + + +class DeepLabHead(nn.Sequential): + def __init__(self, in_channels, num_classes): + super(DeepLabHead, self).__init__( + ASPP(in_channels, [12, 24, 36]), + nn.Conv2d(256, 256, 3, padding=1, bias=False), + nn.BatchNorm2d(256), + nn.ReLU(), + nn.Conv2d(256, num_classes, 1) + ) + + +class ASPPConv(nn.Sequential): + def __init__(self, in_channels, out_channels, dilation): + modules = [ + nn.Conv2d(in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU() + ] + super(ASPPConv, self).__init__(*modules) + + +class ASPPPooling(nn.Sequential): + def __init__(self, in_channels, out_channels): + super(ASPPPooling, self).__init__( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(in_channels, out_channels, 1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU()) + + def forward(self, x): + size = x.shape[-2:] + for mod in self: + x = mod(x) + return F.interpolate(x, size=size, mode='bilinear', align_corners=False) + + +class ASPP(nn.Module): + def __init__(self, in_channels, atrous_rates): + super(ASPP, self).__init__() + out_channels = 256 + modules = [] + modules.append(nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU())) + + rate1, rate2, rate3 = tuple(atrous_rates) + modules.append(ASPPConv(in_channels, out_channels, rate1)) + modules.append(ASPPConv(in_channels, out_channels, rate2)) + modules.append(ASPPConv(in_channels, out_channels, rate3)) + modules.append(ASPPPooling(in_channels, out_channels)) + + self.convs = nn.ModuleList(modules) + + self.project = nn.Sequential( + nn.Conv2d(5 * out_channels, out_channels, 1, bias=False), + nn.BatchNorm2d(out_channels), + nn.ReLU(), + nn.Dropout(0.5)) + + def forward(self, x): + res = [] + for conv in self.convs: + res.append(conv(x)) + res = torch.cat(res, dim=1) + return self.project(res) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py new file mode 100644 index 0000000000..4d7701cc4e --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py @@ -0,0 +1,36 @@ +from torch import nn + +from ._utils import _SimpleSegmentationModel + + +__all__ = ["FCN"] + + +class FCN(_SimpleSegmentationModel): + """ + Implements a Fully-Convolutional Network for semantic segmentation. + + Arguments: + backbone (nn.Module): the network used to compute the features for the model. + The backbone should return an OrderedDict[Tensor], with the key being + "out" for the last feature map used, and "aux" if an auxiliary classifier + is used. + classifier (nn.Module): module that takes the "out" element returned from + the backbone and returns a dense prediction. + aux_classifier (nn.Module, optional): auxiliary classifier used during training + """ + pass + + +class FCNHead(nn.Sequential): + def __init__(self, in_channels, channels): + inter_channels = in_channels // 4 + layers = [ + nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False), + nn.BatchNorm2d(inter_channels), + nn.ReLU(), + nn.Dropout(0.1), + nn.Conv2d(inter_channels, channels, 1) + ] + + super(FCNHead, self).__init__(*layers) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py new file mode 100644 index 0000000000..15df4d8ae3 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py @@ -0,0 +1,106 @@ +from .._utils import IntermediateLayerGetter +from ..utils import load_state_dict_from_url +from .. import resnet +from .deeplabv3 import DeepLabHead, DeepLabV3 +from .fcn import FCN, FCNHead + + +__all__ = ['fcn_resnet50', 'fcn_resnet101', 'deeplabv3_resnet50', 'deeplabv3_resnet101'] + + +model_urls = { + 'fcn_resnet50_coco': None, + 'fcn_resnet101_coco': 'https://download.pytorch.org/models/fcn_resnet101_coco-7ecb50ca.pth', + 'deeplabv3_resnet50_coco': None, + 'deeplabv3_resnet101_coco': 'https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth', +} + + +def _segm_resnet(name, backbone_name, num_classes, aux, pretrained_backbone=True): + backbone = resnet.__dict__[backbone_name]( + pretrained=pretrained_backbone, + replace_stride_with_dilation=[False, True, True]) + + return_layers = {'layer4': 'out'} + if aux: + return_layers['layer3'] = 'aux' + backbone = IntermediateLayerGetter(backbone, return_layers=return_layers) + + aux_classifier = None + if aux: + inplanes = 1024 + aux_classifier = FCNHead(inplanes, num_classes) + + model_map = { + 'deeplabv3': (DeepLabHead, DeepLabV3), + 'fcn': (FCNHead, FCN), + } + inplanes = 2048 + classifier = model_map[name][0](inplanes, num_classes) + base_model = model_map[name][1] + + model = base_model(backbone, classifier, aux_classifier) + return model + + +def _load_model(arch_type, backbone, pretrained, progress, num_classes, aux_loss, **kwargs): + if pretrained: + aux_loss = True + model = _segm_resnet(arch_type, backbone, num_classes, aux_loss, **kwargs) + if pretrained: + arch = arch_type + '_' + backbone + '_coco' + model_url = model_urls[arch] + if model_url is None: + raise NotImplementedError('pretrained {} is not supported as of now'.format(arch)) + else: + state_dict = load_state_dict_from_url(model_url, progress=progress) + model.load_state_dict(state_dict) + return model + + +def fcn_resnet50(pretrained=False, progress=True, + num_classes=21, aux_loss=None, **kwargs): + """Constructs a Fully-Convolutional Network model with a ResNet-50 backbone. + + Args: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 which + contains the same classes as Pascal VOC + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _load_model('fcn', 'resnet50', pretrained, progress, num_classes, aux_loss, **kwargs) + + +def fcn_resnet101(pretrained=False, progress=True, + num_classes=21, aux_loss=None, **kwargs): + """Constructs a Fully-Convolutional Network model with a ResNet-101 backbone. + + Args: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 which + contains the same classes as Pascal VOC + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _load_model('fcn', 'resnet101', pretrained, progress, num_classes, aux_loss, **kwargs) + + +def deeplabv3_resnet50(pretrained=False, progress=True, + num_classes=21, aux_loss=None, **kwargs): + """Constructs a DeepLabV3 model with a ResNet-50 backbone. + + Args: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 which + contains the same classes as Pascal VOC + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _load_model('deeplabv3', 'resnet50', pretrained, progress, num_classes, aux_loss, **kwargs) + + +def deeplabv3_resnet101(pretrained=False, progress=True, + num_classes=21, aux_loss=None, **kwargs): + """Constructs a DeepLabV3 model with a ResNet-101 backbone. + + Args: + pretrained (bool): If True, returns a model pre-trained on COCO train2017 which + contains the same classes as Pascal VOC + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _load_model('deeplabv3', 'resnet101', pretrained, progress, num_classes, aux_loss, **kwargs) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py new file mode 100644 index 0000000000..14f9521886 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py @@ -0,0 +1,208 @@ +import torch +import torch.nn as nn +from .utils import load_state_dict_from_url + + +__all__ = [ + 'ShuffleNetV2', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', + 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0' +] + +model_urls = { + 'shufflenetv2_x0.5': 'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth', + 'shufflenetv2_x1.0': 'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth', + 'shufflenetv2_x1.5': None, + 'shufflenetv2_x2.0': None, +} + + +def channel_shuffle(x, groups): + # type: (torch.Tensor, int) -> torch.Tensor + batchsize, num_channels, height, width = x.data.size() + channels_per_group = num_channels // groups + + # reshape + x = x.view(batchsize, groups, + channels_per_group, height, width) + + x = torch.transpose(x, 1, 2).contiguous() + + # flatten + x = x.view(batchsize, -1, height, width) + + return x + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride): + super(InvertedResidual, self).__init__() + + if not (1 <= stride <= 3): + raise ValueError('illegal stride value') + self.stride = stride + + branch_features = oup // 2 + assert (self.stride != 1) or (inp == branch_features << 1) + + if self.stride > 1: + self.branch1 = nn.Sequential( + self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1), + nn.BatchNorm2d(inp), + nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + ) + else: + self.branch1 = nn.Sequential() + + self.branch2 = nn.Sequential( + nn.Conv2d(inp if (self.stride > 1) else branch_features, + branch_features, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1), + nn.BatchNorm2d(branch_features), + nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + ) + + @staticmethod + def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False): + return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i) + + def forward(self, x): + if self.stride == 1: + x1, x2 = x.chunk(2, dim=1) + out = torch.cat((x1, self.branch2(x2)), dim=1) + else: + out = torch.cat((self.branch1(x), self.branch2(x)), dim=1) + + out = channel_shuffle(out, 2) + + return out + + +class ShuffleNetV2(nn.Module): + def __init__(self, stages_repeats, stages_out_channels, num_classes=1000, inverted_residual=InvertedResidual): + super(ShuffleNetV2, self).__init__() + + if len(stages_repeats) != 3: + raise ValueError('expected stages_repeats as list of 3 positive ints') + if len(stages_out_channels) != 5: + raise ValueError('expected stages_out_channels as list of 5 positive ints') + self._stage_out_channels = stages_out_channels + + input_channels = 3 + output_channels = self._stage_out_channels[0] + self.conv1 = nn.Sequential( + nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False), + nn.BatchNorm2d(output_channels), + nn.ReLU(inplace=True), + ) + input_channels = output_channels + + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + stage_names = ['stage{}'.format(i) for i in [2, 3, 4]] + for name, repeats, output_channels in zip( + stage_names, stages_repeats, self._stage_out_channels[1:]): + seq = [inverted_residual(input_channels, output_channels, 2)] + for i in range(repeats - 1): + seq.append(inverted_residual(output_channels, output_channels, 1)) + setattr(self, name, nn.Sequential(*seq)) + input_channels = output_channels + + output_channels = self._stage_out_channels[-1] + self.conv5 = nn.Sequential( + nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(output_channels), + nn.ReLU(inplace=True), + ) + + self.fc = nn.Linear(output_channels, num_classes) + + def _forward_impl(self, x): + # See note [TorchScript super()] + x = self.conv1(x) + x = self.maxpool(x) + x = self.stage2(x) + x = self.stage3(x) + x = self.stage4(x) + x = self.conv5(x) + x = x.mean([2, 3]) # globalpool + x = self.fc(x) + return x + + def forward(self, x): + return self._forward_impl(x) + + +def _shufflenetv2(arch, pretrained, progress, *args, **kwargs): + model = ShuffleNetV2(*args, **kwargs) + + if pretrained: + model_url = model_urls[arch] + if model_url is None: + raise NotImplementedError('pretrained {} is not supported as of now'.format(arch)) + else: + state_dict = load_state_dict_from_url(model_url, progress=progress) + model.load_state_dict(state_dict) + + return model + + +def shufflenet_v2_x0_5(pretrained=False, progress=True, **kwargs): + """ + Constructs a ShuffleNetV2 with 0.5x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x0.5', pretrained, progress, + [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs) + + +def shufflenet_v2_x1_0(pretrained=False, progress=True, **kwargs): + """ + Constructs a ShuffleNetV2 with 1.0x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x1.0', pretrained, progress, + [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs) + + +def shufflenet_v2_x1_5(pretrained=False, progress=True, **kwargs): + """ + Constructs a ShuffleNetV2 with 1.5x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x1.5', pretrained, progress, + [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs) + + +def shufflenet_v2_x2_0(pretrained=False, progress=True, **kwargs): + """ + Constructs a ShuffleNetV2 with 2.0x output channels, as described in + `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" + `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _shufflenetv2('shufflenetv2_x2.0', pretrained, progress, + [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py new file mode 100644 index 0000000000..964f3ec66d --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py @@ -0,0 +1,137 @@ +import torch +import torch.nn as nn +import torch.nn.init as init +from .utils import load_state_dict_from_url + +__all__ = ['SqueezeNet', 'squeezenet1_0', 'squeezenet1_1'] + +model_urls = { + 'squeezenet1_0': 'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth', + 'squeezenet1_1': 'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth', +} + + +class Fire(nn.Module): + + def __init__(self, inplanes, squeeze_planes, + expand1x1_planes, expand3x3_planes): + super(Fire, self).__init__() + self.inplanes = inplanes + self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1) + self.squeeze_activation = nn.ReLU(inplace=True) + self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes, + kernel_size=1) + self.expand1x1_activation = nn.ReLU(inplace=True) + self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes, + kernel_size=3, padding=1) + self.expand3x3_activation = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.squeeze_activation(self.squeeze(x)) + return torch.cat([ + self.expand1x1_activation(self.expand1x1(x)), + self.expand3x3_activation(self.expand3x3(x)) + ], 1) + + +class SqueezeNet(nn.Module): + + def __init__(self, version='1_0', num_classes=1000): + super(SqueezeNet, self).__init__() + self.num_classes = num_classes + if version == '1_0': + self.features = nn.Sequential( + nn.Conv2d(3, 96, kernel_size=7, stride=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(96, 16, 64, 64), + Fire(128, 16, 64, 64), + Fire(128, 32, 128, 128), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(256, 32, 128, 128), + Fire(256, 48, 192, 192), + Fire(384, 48, 192, 192), + Fire(384, 64, 256, 256), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(512, 64, 256, 256), + ) + elif version == '1_1': + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=3, stride=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(64, 16, 64, 64), + Fire(128, 16, 64, 64), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(128, 32, 128, 128), + Fire(256, 32, 128, 128), + nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), + Fire(256, 48, 192, 192), + Fire(384, 48, 192, 192), + Fire(384, 64, 256, 256), + Fire(512, 64, 256, 256), + ) + else: + # FIXME: Is this needed? SqueezeNet should only be called from the + # FIXME: squeezenet1_x() functions + # FIXME: This checking is not done for the other models + raise ValueError("Unsupported SqueezeNet version {version}:" + "1_0 or 1_1 expected".format(version=version)) + + # Final convolution is initialized differently from the rest + final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) + self.classifier = nn.Sequential( + nn.Dropout(p=0.5), + final_conv, + nn.ReLU(inplace=True), + nn.AdaptiveAvgPool2d((1, 1)) + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + if m is final_conv: + init.normal_(m.weight, mean=0.0, std=0.01) + else: + init.kaiming_uniform_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + + def forward(self, x): + x = self.features(x) + x = self.classifier(x) + return torch.flatten(x, 1) + + +def _squeezenet(version, pretrained, progress, **kwargs): + model = SqueezeNet(version, **kwargs) + if pretrained: + arch = 'squeezenet' + version + state_dict = load_state_dict_from_url(model_urls[arch], + progress=progress) + model.load_state_dict(state_dict) + return model + + +def squeezenet1_0(pretrained=False, progress=True, **kwargs): + r"""SqueezeNet model architecture from the `"SqueezeNet: AlexNet-level + accuracy with 50x fewer parameters and <0.5MB model size" + `_ paper. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _squeezenet('1_0', pretrained, progress, **kwargs) + + +def squeezenet1_1(pretrained=False, progress=True, **kwargs): + r"""SqueezeNet 1.1 model from the `official SqueezeNet repo + `_. + SqueezeNet 1.1 has 2.4x less computation and slightly fewer parameters + than SqueezeNet 1.0, without sacrificing accuracy. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _squeezenet('1_1', pretrained, progress, **kwargs) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py new file mode 100644 index 0000000000..638ef07cd8 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py @@ -0,0 +1,4 @@ +try: + from torch.hub import load_state_dict_from_url +except ImportError: + from torch.utils.model_zoo import load_url as load_state_dict_from_url diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py new file mode 100644 index 0000000000..dba534f651 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py @@ -0,0 +1,183 @@ +import torch +import torch.nn as nn +from .utils import load_state_dict_from_url + + +__all__ = [ + 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', + 'vgg19_bn', 'vgg19', +] + + +model_urls = { + 'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth', + 'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth', + 'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth', + 'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth', + 'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth', + 'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth', + 'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth', + 'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth', +} + + +class VGG(nn.Module): + + def __init__(self, features, num_classes=1000, init_weights=True): + super(VGG, self).__init__() + self.features = features + self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, num_classes), + ) + if init_weights: + self._initialize_weights() + + def forward(self, x): + x = self.features(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.classifier(x) + return x + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.constant_(m.bias, 0) + + +def make_layers(cfg, batch_norm=False): + layers = [] + in_channels = 3 + for v in cfg: + if v == 'M': + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) + if batch_norm: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.ReLU(inplace=True)] + in_channels = v + return nn.Sequential(*layers) + + +cfgs = { + 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], + 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], + 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], +} + + +def _vgg(arch, cfg, batch_norm, pretrained, progress, **kwargs): + if pretrained: + kwargs['init_weights'] = False + model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs) + if pretrained: + state_dict = load_state_dict_from_url(model_urls[arch], + progress=progress) + model.load_state_dict(state_dict) + return model + + +def vgg11(pretrained=False, progress=True, **kwargs): + r"""VGG 11-layer model (configuration "A") from + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg11', 'A', False, pretrained, progress, **kwargs) + + +def vgg11_bn(pretrained=False, progress=True, **kwargs): + r"""VGG 11-layer model (configuration "A") with batch normalization + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg11_bn', 'A', True, pretrained, progress, **kwargs) + + +def vgg13(pretrained=False, progress=True, **kwargs): + r"""VGG 13-layer model (configuration "B") + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg13', 'B', False, pretrained, progress, **kwargs) + + +def vgg13_bn(pretrained=False, progress=True, **kwargs): + r"""VGG 13-layer model (configuration "B") with batch normalization + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg13_bn', 'B', True, pretrained, progress, **kwargs) + + +def vgg16(pretrained=False, progress=True, **kwargs): + r"""VGG 16-layer model (configuration "D") + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg16', 'D', False, pretrained, progress, **kwargs) + + +def vgg16_bn(pretrained=False, progress=True, **kwargs): + r"""VGG 16-layer model (configuration "D") with batch normalization + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg16_bn', 'D', True, pretrained, progress, **kwargs) + + +def vgg19(pretrained=False, progress=True, **kwargs): + r"""VGG 19-layer model (configuration "E") + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg19', 'E', False, pretrained, progress, **kwargs) + + +def vgg19_bn(pretrained=False, progress=True, **kwargs): + r"""VGG 19-layer model (configuration 'E') with batch normalization + `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _vgg('vgg19_bn', 'E', True, pretrained, progress, **kwargs) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/__init__.py new file mode 100644 index 0000000000..b792ca6ecf --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/__init__.py @@ -0,0 +1 @@ +from .resnet import * diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py new file mode 100644 index 0000000000..a9e59a149c --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py @@ -0,0 +1,341 @@ +import torch +import torch.nn as nn + +from ..utils import load_state_dict_from_url + + +__all__ = ['r3d_18', 'mc3_18', 'r2plus1d_18'] + +model_urls = { + 'r3d_18': 'https://download.pytorch.org/models/r3d_18-b3b3357e.pth', + 'mc3_18': 'https://download.pytorch.org/models/mc3_18-a90a0ba3.pth', + 'r2plus1d_18': 'https://download.pytorch.org/models/r2plus1d_18-91a641e6.pth', +} + + +class Conv3DSimple(nn.Conv3d): + def __init__(self, + in_planes, + out_planes, + midplanes=None, + stride=1, + padding=1): + + super(Conv3DSimple, self).__init__( + in_channels=in_planes, + out_channels=out_planes, + kernel_size=(3, 3, 3), + stride=stride, + padding=padding, + bias=False) + + @staticmethod + def get_downsample_stride(stride): + return (stride, stride, stride) + + +class Conv2Plus1D(nn.Sequential): + + def __init__(self, + in_planes, + out_planes, + midplanes, + stride=1, + padding=1): + super(Conv2Plus1D, self).__init__( + nn.Conv3d(in_planes, midplanes, kernel_size=(1, 3, 3), + stride=(1, stride, stride), padding=(0, padding, padding), + bias=False), + nn.BatchNorm3d(midplanes), + nn.ReLU(inplace=True), + nn.Conv3d(midplanes, out_planes, kernel_size=(3, 1, 1), + stride=(stride, 1, 1), padding=(padding, 0, 0), + bias=False)) + + @staticmethod + def get_downsample_stride(stride): + return (stride, stride, stride) + + +class Conv3DNoTemporal(nn.Conv3d): + + def __init__(self, + in_planes, + out_planes, + midplanes=None, + stride=1, + padding=1): + + super(Conv3DNoTemporal, self).__init__( + in_channels=in_planes, + out_channels=out_planes, + kernel_size=(1, 3, 3), + stride=(1, stride, stride), + padding=(0, padding, padding), + bias=False) + + @staticmethod + def get_downsample_stride(stride): + return (1, stride, stride) + + +class BasicBlock(nn.Module): + + expansion = 1 + + def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None): + midplanes = (inplanes * planes * 3 * 3 * 3) // (inplanes * 3 * 3 + 3 * planes) + + super(BasicBlock, self).__init__() + self.conv1 = nn.Sequential( + conv_builder(inplanes, planes, midplanes, stride), + nn.BatchNorm3d(planes), + nn.ReLU(inplace=True) + ) + self.conv2 = nn.Sequential( + conv_builder(planes, planes, midplanes), + nn.BatchNorm3d(planes) + ) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.conv2(out) + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None): + + super(Bottleneck, self).__init__() + midplanes = (inplanes * planes * 3 * 3 * 3) // (inplanes * 3 * 3 + 3 * planes) + + # 1x1x1 + self.conv1 = nn.Sequential( + nn.Conv3d(inplanes, planes, kernel_size=1, bias=False), + nn.BatchNorm3d(planes), + nn.ReLU(inplace=True) + ) + # Second kernel + self.conv2 = nn.Sequential( + conv_builder(planes, planes, midplanes, stride), + nn.BatchNorm3d(planes), + nn.ReLU(inplace=True) + ) + + # 1x1x1 + self.conv3 = nn.Sequential( + nn.Conv3d(planes, planes * self.expansion, kernel_size=1, bias=False), + nn.BatchNorm3d(planes * self.expansion) + ) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.conv2(out) + out = self.conv3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class BasicStem(nn.Sequential): + """The default conv-batchnorm-relu stem + """ + def __init__(self): + super(BasicStem, self).__init__( + nn.Conv3d(3, 64, kernel_size=(3, 7, 7), stride=(1, 2, 2), + padding=(1, 3, 3), bias=False), + nn.BatchNorm3d(64), + nn.ReLU(inplace=True)) + + +class R2Plus1dStem(nn.Sequential): + """R(2+1)D stem is different than the default one as it uses separated 3D convolution + """ + def __init__(self): + super(R2Plus1dStem, self).__init__( + nn.Conv3d(3, 45, kernel_size=(1, 7, 7), + stride=(1, 2, 2), padding=(0, 3, 3), + bias=False), + nn.BatchNorm3d(45), + nn.ReLU(inplace=True), + nn.Conv3d(45, 64, kernel_size=(3, 1, 1), + stride=(1, 1, 1), padding=(1, 0, 0), + bias=False), + nn.BatchNorm3d(64), + nn.ReLU(inplace=True)) + + +class VideoResNet(nn.Module): + + def __init__(self, block, conv_makers, layers, + stem, num_classes=400, + zero_init_residual=False): + """Generic resnet video generator. + + Args: + block (nn.Module): resnet building block + conv_makers (list(functions)): generator function for each layer + layers (List[int]): number of blocks per layer + stem (nn.Module, optional): Resnet stem, if None, defaults to conv-bn-relu. Defaults to None. + num_classes (int, optional): Dimension of the final FC layer. Defaults to 400. + zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False. + """ + super(VideoResNet, self).__init__() + self.inplanes = 64 + + self.stem = stem() + + self.layer1 = self._make_layer(block, conv_makers[0], 64, layers[0], stride=1) + self.layer2 = self._make_layer(block, conv_makers[1], 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, conv_makers[2], 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, conv_makers[3], 512, layers[3], stride=2) + + self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1)) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + # init weights + self._initialize_weights() + + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + + def forward(self, x): + x = self.stem(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + # Flatten the layer to fc + x = x.flatten(1) + x = self.fc(x) + + return x + + def _make_layer(self, block, conv_builder, planes, blocks, stride=1): + downsample = None + + if stride != 1 or self.inplanes != planes * block.expansion: + ds_stride = conv_builder.get_downsample_stride(stride) + downsample = nn.Sequential( + nn.Conv3d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=ds_stride, bias=False), + nn.BatchNorm3d(planes * block.expansion) + ) + layers = [] + layers.append(block(self.inplanes, planes, conv_builder, stride, downsample)) + + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes, conv_builder)) + + return nn.Sequential(*layers) + + def _initialize_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv3d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', + nonlinearity='relu') + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm3d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.constant_(m.bias, 0) + + +def _video_resnet(arch, pretrained=False, progress=True, **kwargs): + model = VideoResNet(**kwargs) + + if pretrained: + state_dict = load_state_dict_from_url(model_urls[arch], + progress=progress) + model.load_state_dict(state_dict) + return model + + +def r3d_18(pretrained=False, progress=True, **kwargs): + """Construct 18 layer Resnet3D model as in + https://arxiv.org/abs/1711.11248 + + Args: + pretrained (bool): If True, returns a model pre-trained on Kinetics-400 + progress (bool): If True, displays a progress bar of the download to stderr + + Returns: + nn.Module: R3D-18 network + """ + + return _video_resnet('r3d_18', + pretrained, progress, + block=BasicBlock, + conv_makers=[Conv3DSimple] * 4, + layers=[2, 2, 2, 2], + stem=BasicStem, **kwargs) + + +def mc3_18(pretrained=False, progress=True, **kwargs): + """Constructor for 18 layer Mixed Convolution network as in + https://arxiv.org/abs/1711.11248 + + Args: + pretrained (bool): If True, returns a model pre-trained on Kinetics-400 + progress (bool): If True, displays a progress bar of the download to stderr + + Returns: + nn.Module: MC3 Network definition + """ + return _video_resnet('mc3_18', + pretrained, progress, + block=BasicBlock, + conv_makers=[Conv3DSimple] + [Conv3DNoTemporal] * 3, + layers=[2, 2, 2, 2], + stem=BasicStem, **kwargs) + + +def r2plus1d_18(pretrained=False, progress=True, **kwargs): + """Constructor for the 18 layer deep R(2+1)D network as in + https://arxiv.org/abs/1711.11248 + + Args: + pretrained (bool): If True, returns a model pre-trained on Kinetics-400 + progress (bool): If True, displays a progress bar of the download to stderr + + Returns: + nn.Module: R(2+1)D-18 network + """ + return _video_resnet('r2plus1d_18', + pretrained, progress, + block=BasicBlock, + conv_makers=[Conv2Plus1D] * 4, + layers=[2, 2, 2, 2], + stem=R2Plus1dStem, **kwargs) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/__init__.py new file mode 100644 index 0000000000..0ff2b0be2c --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/__init__.py @@ -0,0 +1,20 @@ +from .boxes import nms, box_iou +from .new_empty_tensor import _new_empty_tensor +from .deform_conv import deform_conv2d, DeformConv2d +from .roi_align import roi_align, RoIAlign +from .roi_pool import roi_pool, RoIPool +from .ps_roi_align import ps_roi_align, PSRoIAlign +from .ps_roi_pool import ps_roi_pool, PSRoIPool +from .poolers import MultiScaleRoIAlign +from .feature_pyramid_network import FeaturePyramidNetwork + +from ._register_onnx_ops import _register_custom_op + +_register_custom_op() + + +__all__ = [ + 'deform_conv2d', 'DeformConv2d', 'nms', 'roi_align', 'RoIAlign', 'roi_pool', + 'RoIPool', '_new_empty_tensor', 'ps_roi_align', 'PSRoIAlign', 'ps_roi_pool', + 'PSRoIPool', 'MultiScaleRoIAlign', 'FeaturePyramidNetwork' +] diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py new file mode 100644 index 0000000000..d9d9c5c094 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py @@ -0,0 +1,51 @@ +import sys +import torch + +_onnx_opset_version = 11 + + +def _register_custom_op(): + from torch.onnx.symbolic_helper import parse_args, scalar_type_to_onnx, scalar_type_to_pytorch_type, \ + cast_pytorch_to_onnx + from torch.onnx.symbolic_opset9 import select, unsqueeze, squeeze, _cast_Long, reshape + + @parse_args('v', 'v', 'f') + def symbolic_multi_label_nms(g, boxes, scores, iou_threshold): + boxes = unsqueeze(g, boxes, 0) + scores = unsqueeze(g, unsqueeze(g, scores, 0), 0) + max_output_per_class = g.op('Constant', value_t=torch.tensor([sys.maxsize], dtype=torch.long)) + iou_threshold = g.op('Constant', value_t=torch.tensor([iou_threshold], dtype=torch.float)) + nms_out = g.op('NonMaxSuppression', boxes, scores, max_output_per_class, iou_threshold) + return squeeze(g, select(g, nms_out, 1, g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))), 1) + + @parse_args('v', 'v', 'f', 'i', 'i', 'i', 'i') + def roi_align(g, input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned): + if(aligned): + raise RuntimeError('Unsupported: ONNX export of roi_align with aligned') + batch_indices = _cast_Long(g, squeeze(g, select(g, rois, 1, g.op('Constant', + value_t=torch.tensor([0], dtype=torch.long))), 1), False) + rois = select(g, rois, 1, g.op('Constant', value_t=torch.tensor([1, 2, 3, 4], dtype=torch.long))) + return g.op('RoiAlign', input, rois, batch_indices, spatial_scale_f=spatial_scale, + output_height_i=pooled_height, output_width_i=pooled_width, sampling_ratio_i=sampling_ratio) + + @parse_args('v', 'v', 'f', 'i', 'i') + def roi_pool(g, input, rois, spatial_scale, pooled_height, pooled_width): + roi_pool = g.op('MaxRoiPool', input, rois, + pooled_shape_i=(pooled_height, pooled_width), spatial_scale_f=spatial_scale) + return roi_pool, None + + @parse_args('v', 'is') + def new_empty_tensor_op(g, input, shape): + dtype = input.type().scalarType() + if dtype is None: + dtype = 'Float' + dtype = scalar_type_to_onnx.index(cast_pytorch_to_onnx[dtype]) + shape = g.op("Constant", value_t=torch.tensor(shape)) + return g.op("ConstantOfShape", shape, + value_t=torch.tensor([0], dtype=scalar_type_to_pytorch_type[dtype])) + + from torch.onnx import register_custom_op_symbolic + register_custom_op_symbolic('torchvision::nms', symbolic_multi_label_nms, _onnx_opset_version) + register_custom_op_symbolic('torchvision::roi_align', roi_align, _onnx_opset_version) + register_custom_op_symbolic('torchvision::roi_pool', roi_pool, _onnx_opset_version) + register_custom_op_symbolic('torchvision::_new_empty_tensor_op', new_empty_tensor_op, _onnx_opset_version) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py new file mode 100644 index 0000000000..3a07c747f5 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py @@ -0,0 +1,63 @@ +from typing import List, Optional, Tuple, Union + +import torch +from torch import nn, Tensor + + +def _cat(tensors: List[Tensor], dim: int = 0) -> Tensor: + """ + Efficient version of torch.cat that avoids a copy if there is only a single element in a list + """ + # TODO add back the assert + # assert isinstance(tensors, (list, tuple)) + if len(tensors) == 1: + return tensors[0] + return torch.cat(tensors, dim) + + +def convert_boxes_to_roi_format(boxes: List[Tensor]) -> Tensor: + concat_boxes = _cat([b for b in boxes], dim=0) + temp = [] + for i, b in enumerate(boxes): + temp.append(torch.full_like(b[:, :1], i)) + ids = _cat(temp, dim=0) + rois = torch.cat([ids, concat_boxes], dim=1) + return rois + + +def check_roi_boxes_shape(boxes: Union[Tensor, List[Tensor]]): + if isinstance(boxes, (list, tuple)): + for _tensor in boxes: + assert ( + _tensor.size(1) == 4 + ), "The shape of the tensor in the boxes list is not correct as List[Tensor[L, 4]]" + elif isinstance(boxes, torch.Tensor): + assert boxes.size(1) == 5, "The boxes tensor shape is not correct as Tensor[K, 5]" + else: + assert False, "boxes is expected to be a Tensor[L, 5] or a List[Tensor[K, 4]]" + return + + +def split_normalization_params( + model: nn.Module, norm_classes: Optional[List[type]] = None +) -> Tuple[List[Tensor], List[Tensor]]: + # Adapted from https://github.com/facebookresearch/ClassyVision/blob/659d7f78/classy_vision/generic/util.py#L501 + if not norm_classes: + norm_classes = [nn.modules.batchnorm._BatchNorm, nn.LayerNorm, nn.GroupNorm] + + for t in norm_classes: + if not issubclass(t, nn.Module): + raise ValueError(f"Class {t} is not a subclass of nn.Module.") + + classes = tuple(norm_classes) + + norm_params = [] + other_params = [] + for module in model.modules(): + if next(module.children(), None): + other_params.extend(p for p in module.parameters(recurse=False) if p.requires_grad) + elif isinstance(module, classes): + norm_params.extend(p for p in module.parameters() if p.requires_grad) + else: + other_params.extend(p for p in module.parameters() if p.requires_grad) + return norm_params, other_params diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py new file mode 100644 index 0000000000..714022f042 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py @@ -0,0 +1,38 @@ +import torch +from torch import Tensor +from torch.jit.annotations import List + + +def _cat(tensors, dim=0): + # type: (List[Tensor], int) -> Tensor + """ + Efficient version of torch.cat that avoids a copy if there is only a single element in a list + """ + # TODO add back the assert + # assert isinstance(tensors, (list, tuple)) + if len(tensors) == 1: + return tensors[0] + return torch.cat(tensors, dim) + + +def convert_boxes_to_roi_format(boxes): + # type: (List[Tensor]) -> Tensor + concat_boxes = _cat([b for b in boxes], dim=0) + temp = [] + for i, b in enumerate(boxes): + temp.append(torch.full_like(b[:, :1], i)) + ids = _cat(temp, dim=0) + rois = torch.cat([ids, concat_boxes], dim=1) + return rois + + +def check_roi_boxes_shape(boxes): + if isinstance(boxes, list): + for _tensor in boxes: + assert _tensor.size(1) == 4, \ + 'The shape of the tensor in the boxes list is not correct as List[Tensor[L, 4]]' + elif isinstance(boxes, torch.Tensor): + assert boxes.size(1) == 5, 'The boxes tensor shape is not correct as Tensor[K, 5]' + else: + assert False, 'boxes is expected to be a Tensor[L, 5] or a List[Tensor[K, 4]]' + return diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py new file mode 100644 index 0000000000..ac0dba1fe7 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py @@ -0,0 +1,237 @@ +import torch +from torch.jit.annotations import Tuple +from torch import Tensor +import torchvision +import sys + + +# for servers to immediately record the logs +def flush_print(func): + def new_print(*args, **kwargs): + func(*args, **kwargs) + sys.stdout.flush() + return new_print +print = flush_print(print) + + +def nms_origin(boxes, scores, iou_threshold): + # type: (Tensor, Tensor, float) + """ + Performs non-maximum suppression (NMS) on the boxes according + to their intersection-over-union (IoU). + + NMS iteratively removes lower scoring boxes which have an + IoU greater than iou_threshold with another (higher scoring) + box. + + Parameters + ---------- + boxes : Tensor[N, 4]) + boxes to perform NMS on. They + are expected to be in (x1, y1, x2, y2) format + scores : Tensor[N] + scores for each one of the boxes + iou_threshold : float + discards all overlapping + boxes with IoU > iou_threshold + + Returns + ------- + keep : Tensor + int64 tensor with the indices + of the elements that have been kept + by NMS, sorted in decreasing order of scores + """ + keep = [] # 最终保留的结果, 在boxes中对应的索引; + idxs = scores.argsort() # 值从小到大的 索引 + while idxs.numel() > 0: # 循环直到null; numel(): 数组元素个数 + # 得分最大框对应的索引, 以及对应的坐标 + max_score_index = idxs[-1] + max_score_box = boxes[max_score_index][None, :] # [1, 4] + keep.append(max_score_index) + if idxs.size(0) == 1: # 就剩余一个框了; + break + idxs = idxs[:-1] # 将得分最大框 从索引中删除; 剩余索引对应的框 和 得分最大框 计算IoU; + other_boxes = boxes[idxs] # [?, 4] + ious = box_iou(max_score_box, other_boxes) # 一个框和其余框比较 1XM + idxs = idxs[ious[0] <= iou_threshold] + + keep = idxs.new(keep) # Tensor + return keep + # return torch.ops.torchvision.nms(boxes, scores, iou_threshold) + + +def nms(bboxes, scores, threshold=0.5): + x1 = bboxes[:, 0] + y1 = bboxes[:, 1] + x2 = bboxes[:, 2] + y2 = bboxes[:, 3] + areas = (x2 - x1) * (y2 - y1) + _, order = scores.sort(0, descending=True) + + keep = [] + while order.numel() > 0: + if order.numel() == 1: + i = order.item() + keep.append(i) + break + else: + i = order[0].item() + keep.append(i) + + xx1 = x1[order[1:]].clamp(min=x1[i].item()) + yy1 = y1[order[1:]].clamp(min=y1[i].item()) + xx2 = x2[order[1:]].clamp(max=x2[i].item()) + yy2 = y2[order[1:]].clamp(max=y2[i].item()) + inter = (xx2 - xx1).clamp(min=0) * (yy2 - yy1).clamp(min=0) + + iou = inter / (areas[i] + areas[order[1:]] - inter) + idx = (iou <= threshold).nonzero().squeeze() + if idx.numel() == 0: + break + order = order[idx + 1] + return torch.LongTensor(keep) + + +def batched_nms(boxes, scores, idxs, iou_threshold): + # type: (Tensor, Tensor, Tensor, float) + """ + Performs non-maximum suppression in a batched fashion. + + Each index value correspond to a category, and NMS + will not be applied between elements of different categories. + + Parameters + ---------- + boxes : Tensor[N, 4] + boxes where NMS will be performed. They + are expected to be in (x1, y1, x2, y2) format + scores : Tensor[N] + scores for each one of the boxes + idxs : Tensor[N] + indices of the categories for each one of the boxes. + iou_threshold : float + discards all overlapping boxes + with IoU > iou_threshold + + Returns + ------- + keep : Tensor + int64 tensor with the indices of + the elements that have been kept by NMS, sorted + in decreasing order of scores + """ + if boxes.numel() == 0: + return torch.empty((0,), dtype=torch.int64, device=boxes.device) + # strategy: in order to perform NMS independently per class. + # we add an offset to all the boxes. The offset is dependent + # only on the class idx, and is large enough so that boxes + # from different classes do not overlap + max_coordinate = boxes.max() + offsets = idxs.to(boxes) * (max_coordinate + 1) + boxes_for_nms = boxes + offsets[:, None] + keep = nms(boxes_for_nms, scores, iou_threshold) + return keep + + +def remove_small_boxes(boxes, min_size): + # type: (Tensor, float) + """ + Remove boxes which contains at least one side smaller than min_size. + + Arguments: + boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format + min_size (float): minimum size + + Returns: + keep (Tensor[K]): indices of the boxes that have both sides + larger than min_size + """ + ws, hs = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1] + keep = (ws >= min_size) & (hs >= min_size) + keep = keep.nonzero().squeeze(1) + return keep + + +def clip_boxes_to_image(boxes, size): + # type: (Tensor, Tuple[int, int]) + """ + Clip boxes so that they lie inside an image of size `size`. + + Arguments: + boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format + size (Tuple[height, width]): size of the image + + Returns: + clipped_boxes (Tensor[N, 4]) + """ + dim = boxes.dim() + boxes_x = boxes[..., 0::2] + boxes_y = boxes[..., 1::2] + height, width = size + + if torchvision._is_tracing(): + boxes_x = torch.max(boxes_x, torch.tensor(0, dtype=boxes.dtype, device=boxes.device)) + boxes_x = torch.min(boxes_x, torch.tensor(width, dtype=boxes.dtype, device=boxes.device)) + boxes_y = torch.max(boxes_y, torch.tensor(0, dtype=boxes.dtype, device=boxes.device)) + boxes_y = torch.min(boxes_y, torch.tensor(height, dtype=boxes.dtype, device=boxes.device)) + else: + boxes_x = boxes_x.clamp(min=0, max=width) + boxes_y = boxes_y.clamp(min=0, max=height) + + clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim) + return clipped_boxes.reshape(boxes.shape) + + +def box_area(boxes): + """ + Computes the area of a set of bounding boxes, which are specified by its + (x1, y1, x2, y2) coordinates. + + Arguments: + boxes (Tensor[N, 4]): boxes for which the area will be computed. They + are expected to be in (x1, y1, x2, y2) format + + Returns: + area (Tensor[N]): area for each box + """ + + # torch.save(boxes, 'boxes.pth') + a = (boxes[:, 2] - boxes[:, 0]) + b = (boxes[:, 3] - boxes[:, 1]) + c = a * b + return c + # return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) + + +# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py +# with slight modifications +def box_iou(boxes1, boxes2): + """ + Return intersection-over-union (Jaccard index) of boxes. + + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + + Arguments: + boxes1 (Tensor[N, 4]) + boxes2 (Tensor[M, 4]) + + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + area1 = box_area(boxes1) + area2 = box_area(boxes2) + + # lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] + # rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] + lt = torch.max(boxes1[:, None, :2], boxes2[:, :2].float()) # [N,M,2] + rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:].float()) # [N,M,2] + + wh = (rb - lt).clamp(min=0) # [N,M,2] + a = wh[:, :, 0] + b = wh[:, :, 1] + inter = a * b # [N,M] + + iou = inter / (area1[:, None] + area2 - inter) + return iou diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py new file mode 100644 index 0000000000..c948b16419 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py @@ -0,0 +1,139 @@ +import math + +import torch +from torch import nn, Tensor +from torch.nn import init +from torch.nn.parameter import Parameter +from torch.nn.modules.utils import _pair +from torch.jit.annotations import Optional, Tuple + + +def deform_conv2d(input, offset, weight, bias=None, stride=(1, 1), padding=(0, 0), dilation=(1, 1)): + # type: (Tensor, Tensor, Tensor, Optional[Tensor], Tuple[int, int], Tuple[int, int], Tuple[int, int]) -> Tensor + """ + Performs Deformable Convolution, described in Deformable Convolutional Networks + + Arguments: + input (Tensor[batch_size, in_channels, in_height, in_width]): input tensor + offset (Tensor[batch_size, 2 * offset_groups * kernel_height * kernel_width, + out_height, out_width]): offsets to be applied for each position in the + convolution kernel. + weight (Tensor[out_channels, in_channels // groups, kernel_height, kernel_width]): + convolution weights, split into groups of size (in_channels // groups) + bias (Tensor[out_channels]): optional bias of shape (out_channels,). Default: None + stride (int or Tuple[int, int]): distance between convolution centers. Default: 1 + padding (int or Tuple[int, int]): height/width of padding of zeroes around + each image. Default: 0 + dilation (int or Tuple[int, int]): the spacing between kernel elements. Default: 1 + + Returns: + output (Tensor[batch_sz, out_channels, out_h, out_w]): result of convolution + + + Examples:: + >>> input = torch.rand(1, 3, 10, 10) + >>> kh, kw = 3, 3 + >>> weight = torch.rand(5, 3, kh, kw) + >>> # offset should have the same spatial size as the output + >>> # of the convolution. In this case, for an input of 10, stride of 1 + >>> # and kernel size of 3, without padding, the output size is 8 + >>> offset = torch.rand(5, 2 * kh * kw, 8, 8) + >>> out = deform_conv2d(input, offset, weight) + >>> print(out.shape) + >>> # returns + >>> torch.Size([1, 5, 8, 8]) + """ + + out_channels = weight.shape[0] + if bias is None: + bias = torch.zeros(out_channels, device=input.device, dtype=input.dtype) + + stride_h, stride_w = _pair(stride) + pad_h, pad_w = _pair(padding) + dil_h, dil_w = _pair(dilation) + weights_h, weights_w = weight.shape[-2:] + _, n_in_channels, in_h, in_w = input.shape + + n_offset_grps = offset.shape[1] // (2 * weights_h * weights_w) + n_weight_grps = n_in_channels // weight.shape[1] + + if n_offset_grps == 0: + raise RuntimeError( + "the shape of the offset tensor at dimension 1 is not valid. It should " + "be a multiple of 2 * weight.size[2] * weight.size[3].\n" + "Got offset.shape[1]={}, while 2 * weight.size[2] * weight.size[3]={}".format( + offset.shape[1], 2 * weights_h * weights_w)) + + return torch.ops.torchvision.deform_conv2d( + input, + weight, + offset, + bias, + stride_h, stride_w, + pad_h, pad_w, + dil_h, dil_w, + n_weight_grps, + n_offset_grps) + + +class DeformConv2d(nn.Module): + """ + See deform_conv2d + """ + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, + dilation=1, groups=1, bias=True): + super(DeformConv2d, self).__init__() + + if in_channels % groups != 0: + raise ValueError('in_channels must be divisible by groups') + if out_channels % groups != 0: + raise ValueError('out_channels must be divisible by groups') + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + + self.weight = Parameter(torch.empty(out_channels, in_channels // groups, + self.kernel_size[0], self.kernel_size[1])) + + if bias: + self.bias = Parameter(torch.empty(out_channels)) + else: + self.register_parameter('bias', None) + + self.reset_parameters() + + def reset_parameters(self): + init.kaiming_uniform_(self.weight, a=math.sqrt(5)) + if self.bias is not None: + fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) + bound = 1 / math.sqrt(fan_in) + init.uniform_(self.bias, -bound, bound) + + def forward(self, input, offset): + """ + Arguments: + input (Tensor[batch_size, in_channels, in_height, in_width]): input tensor + offset (Tensor[batch_size, 2 * offset_groups * kernel_height * kernel_width, + out_height, out_width]): offsets to be applied for each position in the + convolution kernel. + """ + return deform_conv2d(input, offset, self.weight, self.bias, stride=self.stride, + padding=self.padding, dilation=self.dilation) + + def __repr__(self): + s = self.__class__.__name__ + '(' + s += '{in_channels}' + s += ', {out_channels}' + s += ', kernel_size={kernel_size}' + s += ', stride={stride}' + s += ', padding={padding}' if self.padding != (0, 0) else '' + s += ', dilation={dilation}' if self.dilation != (1, 1) else '' + s += ', groups={groups}' if self.groups != 1 else '' + s += ', bias=False' if self.bias is None else '' + s += ')' + return s.format(**self.__dict__) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py new file mode 100644 index 0000000000..09e79cc7ef --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py @@ -0,0 +1,193 @@ +from collections import OrderedDict + +import torch +import torch.nn.functional as F +from torch import nn, Tensor + +from torch.jit.annotations import Tuple, List, Dict + + +class FeaturePyramidNetwork(nn.Module): + """ + Module that adds a FPN from on top of a set of feature maps. This is based on + `"Feature Pyramid Network for Object Detection" `_. + + The feature maps are currently supposed to be in increasing depth + order. + + The input to the model is expected to be an OrderedDict[Tensor], containing + the feature maps on top of which the FPN will be added. + + Arguments: + in_channels_list (list[int]): number of channels for each feature map that + is passed to the module + out_channels (int): number of channels of the FPN representation + extra_blocks (ExtraFPNBlock or None): if provided, extra operations will + be performed. It is expected to take the fpn features, the original + features and the names of the original features as input, and returns + a new list of feature maps and their corresponding names + + Examples:: + + >>> m = torchvision.ops.FeaturePyramidNetwork([10, 20, 30], 5) + >>> # get some dummy data + >>> x = OrderedDict() + >>> x['feat0'] = torch.rand(1, 10, 64, 64) + >>> x['feat2'] = torch.rand(1, 20, 16, 16) + >>> x['feat3'] = torch.rand(1, 30, 8, 8) + >>> # compute the FPN on top of x + >>> output = m(x) + >>> print([(k, v.shape) for k, v in output.items()]) + >>> # returns + >>> [('feat0', torch.Size([1, 5, 64, 64])), + >>> ('feat2', torch.Size([1, 5, 16, 16])), + >>> ('feat3', torch.Size([1, 5, 8, 8]))] + + """ + def __init__(self, in_channels_list, out_channels, extra_blocks=None): + super(FeaturePyramidNetwork, self).__init__() + self.inner_blocks = nn.ModuleList() + self.layer_blocks = nn.ModuleList() + for in_channels in in_channels_list: + if in_channels == 0: + raise ValueError("in_channels=0 is currently not supported") + inner_block_module = nn.Conv2d(in_channels, out_channels, 1) + layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1) + self.inner_blocks.append(inner_block_module) + self.layer_blocks.append(layer_block_module) + + # initialize parameters now to avoid modifying the initialization of top_blocks + for m in self.children(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_uniform_(m.weight, a=1) + nn.init.constant_(m.bias, 0) + + if extra_blocks is not None: + assert isinstance(extra_blocks, ExtraFPNBlock) + self.extra_blocks = extra_blocks + + def get_result_from_inner_blocks(self, x, idx): + # type: (Tensor, int) + """ + This is equivalent to self.inner_blocks[idx](x), + but torchscript doesn't support this yet + """ + num_blocks = 0 + for m in self.inner_blocks: + num_blocks += 1 + if idx < 0: + idx += num_blocks + i = 0 + out = x + for module in self.inner_blocks: + if i == idx: + out = module(x) + i += 1 + return out + + def get_result_from_layer_blocks(self, x, idx): + # type: (Tensor, int) + """ + This is equivalent to self.layer_blocks[idx](x), + but torchscript doesn't support this yet + """ + num_blocks = 0 + for m in self.layer_blocks: + num_blocks += 1 + if idx < 0: + idx += num_blocks + i = 0 + out = x + for module in self.layer_blocks: + if i == idx: + out = module(x) + i += 1 + return out + + def forward(self, x): + # type: (Dict[str, Tensor]) + """ + Computes the FPN for a set of feature maps. + + Arguments: + x (OrderedDict[Tensor]): feature maps for each feature level. + + Returns: + results (OrderedDict[Tensor]): feature maps after FPN layers. + They are ordered from highest resolution first. + """ + # unpack OrderedDict into two lists for easier handling + names = list(x.keys()) + x = list(x.values()) + + last_inner = self.get_result_from_inner_blocks(x[-1], -1) + results = [] + results.append(self.get_result_from_layer_blocks(last_inner, -1)) + + for idx in range(len(x) - 2, -1, -1): + inner_lateral = self.get_result_from_inner_blocks(x[idx], idx) + feat_shape = inner_lateral.shape[-2:] + inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest") + last_inner = inner_lateral + inner_top_down + results.insert(0, self.get_result_from_layer_blocks(last_inner, idx)) + + if self.extra_blocks is not None: + results, names = self.extra_blocks(results, x, names) + + # make it back an OrderedDict + out = OrderedDict([(k, v) for k, v in zip(names, results)]) + + return out + + +class ExtraFPNBlock(nn.Module): + """ + Base class for the extra block in the FPN. + + Arguments: + results (List[Tensor]): the result of the FPN + x (List[Tensor]): the original feature maps + names (List[str]): the names for each one of the + original feature maps + + Returns: + results (List[Tensor]): the extended set of results + of the FPN + names (List[str]): the extended set of names for the results + """ + def forward(self, results, x, names): + pass + + +class LastLevelMaxPool(ExtraFPNBlock): + """ + Applies a max_pool2d on top of the last feature map + """ + def forward(self, x, y, names): + # type: (List[Tensor], List[Tensor], List[str]) + names.append("pool") + x.append(F.max_pool2d(x[-1], 1, 2, 0)) + return x, names + + +class LastLevelP6P7(ExtraFPNBlock): + """ + This module is used in RetinaNet to generate extra layers, P6 and P7. + """ + def __init__(self, in_channels, out_channels): + super(LastLevelP6P7, self).__init__() + self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) + self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) + for module in [self.p6, self.p7]: + nn.init.kaiming_uniform_(module.weight, a=1) + nn.init.constant_(module.bias, 0) + self.use_P5 = in_channels == out_channels + + def forward(self, p, c, names): + p5, c5 = p[-1], c[-1] + x = p5 if self.use_P5 else c5 + p6 = self.p6(x) + p7 = self.p7(F.relu(p6)) + p.extend([p6, p7]) + names.extend(["p6", "p7"]) + return p, names diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py new file mode 100644 index 0000000000..caf0d999f7 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py @@ -0,0 +1,168 @@ +""" +helper class that supports empty tensors on some nn functions. + +Ideally, add support directly in PyTorch to empty tensors in +those functions. + +This can be removed once https://github.com/pytorch/pytorch/issues/12013 +is implemented +""" + +import warnings +from typing import Callable, List, Optional + +import torch +from torch import Tensor + + +class Conv2d(torch.nn.Conv2d): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + "torchvision.ops.misc.Conv2d is deprecated and will be " + "removed in future versions, use torch.nn.Conv2d instead.", + FutureWarning, + ) + + +class ConvTranspose2d(torch.nn.ConvTranspose2d): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + "torchvision.ops.misc.ConvTranspose2d is deprecated and will be " + "removed in future versions, use torch.nn.ConvTranspose2d instead.", + FutureWarning, + ) + + +class BatchNorm2d(torch.nn.BatchNorm2d): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + "torchvision.ops.misc.BatchNorm2d is deprecated and will be " + "removed in future versions, use torch.nn.BatchNorm2d instead.", + FutureWarning, + ) + + +interpolate = torch.nn.functional.interpolate + + +# This is not in nn +class FrozenBatchNorm2d(torch.nn.Module): + """ + BatchNorm2d where the batch statistics and the affine parameters + are fixed + """ + + def __init__( + self, + num_features: int, + eps: float = 1e-5, + n: Optional[int] = None, + ): + # n=None for backward-compatibility + if n is not None: + warnings.warn("`n` argument is deprecated and has been renamed `num_features`", DeprecationWarning) + num_features = n + super().__init__() + self.eps = eps + self.register_buffer("weight", torch.ones(num_features)) + self.register_buffer("bias", torch.zeros(num_features)) + self.register_buffer("running_mean", torch.zeros(num_features)) + self.register_buffer("running_var", torch.ones(num_features)) + + def _load_from_state_dict( + self, + state_dict: dict, + prefix: str, + local_metadata: dict, + strict: bool, + missing_keys: List[str], + unexpected_keys: List[str], + error_msgs: List[str], + ): + num_batches_tracked_key = prefix + "num_batches_tracked" + if num_batches_tracked_key in state_dict: + del state_dict[num_batches_tracked_key] + + super()._load_from_state_dict( + state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ) + + def forward(self, x: Tensor) -> Tensor: + # move reshapes to the beginning + # to make it fuser-friendly + w = self.weight.reshape(1, -1, 1, 1) + b = self.bias.reshape(1, -1, 1, 1) + rv = self.running_var.reshape(1, -1, 1, 1) + rm = self.running_mean.reshape(1, -1, 1, 1) + scale = w * (rv + self.eps).rsqrt() + bias = b - rm * scale + return x * scale + bias + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.weight.shape[0]}, eps={self.eps})" + + +class ConvNormActivation(torch.nn.Sequential): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int = 3, + stride: int = 1, + padding: Optional[int] = None, + groups: int = 1, + norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d, + activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU, + dilation: int = 1, + inplace: bool = True, + ) -> None: + if padding is None: + padding = (kernel_size - 1) // 2 * dilation + layers = [ + torch.nn.Conv2d( + in_channels, + out_channels, + kernel_size, + stride, + padding, + dilation=dilation, + groups=groups, + bias=norm_layer is None, + ) + ] + if norm_layer is not None: + layers.append(norm_layer(out_channels)) + if activation_layer is not None: + layers.append(activation_layer(inplace=inplace)) + super().__init__(*layers) + self.out_channels = out_channels + + +class SqueezeExcitation(torch.nn.Module): + def __init__( + self, + input_channels: int, + squeeze_channels: int, + activation: Callable[..., torch.nn.Module] = torch.nn.ReLU, + scale_activation: Callable[..., torch.nn.Module] = torch.nn.Sigmoid, + ) -> None: + super().__init__() + self.avgpool = torch.nn.AdaptiveAvgPool2d(1) + self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1) + self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1) + self.activation = activation() + self.scale_activation = scale_activation() + + def _scale(self, input: Tensor) -> Tensor: + scale = self.avgpool(input) + scale = self.fc1(scale) + scale = self.activation(scale) + scale = self.fc2(scale) + return self.scale_activation(scale) + + def forward(self, input: Tensor) -> Tensor: + scale = self._scale(input) + return scale * input diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py new file mode 100644 index 0000000000..65e150700a --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py @@ -0,0 +1,153 @@ +from collections import OrderedDict +from torch.jit.annotations import Optional, List +from torch import Tensor + +""" +helper class that supports empty tensors on some nn functions. + +Ideally, add support directly in PyTorch to empty tensors in +those functions. + +This can be removed once https://github.com/pytorch/pytorch/issues/12013 +is implemented +""" + +import math +import torch +from torchvision.ops import _new_empty_tensor +from torch.nn import Module, Conv2d +import torch.nn.functional as F + + +class ConvTranspose2d(torch.nn.ConvTranspose2d): + """ + Equivalent to nn.ConvTranspose2d, but with support for empty batch sizes. + This will eventually be supported natively by PyTorch, and this + class can go away. + """ + def forward(self, x): + if x.numel() > 0: + return self.super_forward(x) + # get output shape + + output_shape = [ + (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op + for i, p, di, k, d, op in zip( + x.shape[-2:], + list(self.padding), + list(self.dilation), + list(self.kernel_size), + list(self.stride), + list(self.output_padding), + ) + ] + output_shape = [x.shape[0], self.out_channels] + output_shape + return _new_empty_tensor(x, output_shape) + + def super_forward(self, input, output_size=None): + # type: (Tensor, Optional[List[int]]) -> Tensor + if self.padding_mode != 'zeros': + raise ValueError('Only `zeros` padding mode is supported for ConvTranspose2d') + + output_padding = self._output_padding(input, output_size, self.stride, self.padding, self.kernel_size) + + return F.conv_transpose2d( + input, self.weight, self.bias, self.stride, self.padding, + output_padding, self.groups, self.dilation) + + +class BatchNorm2d(torch.nn.BatchNorm2d): + """ + Equivalent to nn.BatchNorm2d, but with support for empty batch sizes. + This will eventually be supported natively by PyTorch, and this + class can go away. + """ + def forward(self, x): + if x.numel() > 0: + return super(BatchNorm2d, self).forward(x) + # get output shape + output_shape = x.shape + return _new_empty_tensor(x, output_shape) + + +def _check_size_scale_factor(dim, size, scale_factor): + # type: (int, Optional[List[int]], Optional[float]) -> None + if size is None and scale_factor is None: + raise ValueError("either size or scale_factor should be defined") + if size is not None and scale_factor is not None: + raise ValueError("only one of size or scale_factor should be defined") + if scale_factor is not None: + if isinstance(scale_factor, (list, tuple)): + if len(scale_factor) != dim: + raise ValueError( + "scale_factor shape must match input shape. " + "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor)) + ) + + +def _output_size(dim, input, size, scale_factor): + # type: (int, Tensor, Optional[List[int]], Optional[float]) -> List[int] + assert dim == 2 + _check_size_scale_factor(dim, size, scale_factor) + if size is not None: + return size + # if dim is not 2 or scale_factor is iterable use _ntuple instead of concat + assert scale_factor is not None and isinstance(scale_factor, (int, float)) + scale_factors = [scale_factor, scale_factor] + # math.floor might return float in py2.7 + return [ + int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim) + ] + + +def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None): + # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor + """ + Equivalent to nn.functional.interpolate, but with support for empty batch sizes. + This will eventually be supported natively by PyTorch, and this + class can go away. + """ + if input.numel() > 0: + return torch.nn.functional.interpolate( + input, size, scale_factor, mode, align_corners + ) + + output_shape = _output_size(2, input, size, scale_factor) + output_shape = list(input.shape[:-2]) + output_shape + return _new_empty_tensor(input, output_shape) + + +# This is not in nn +class FrozenBatchNorm2d(torch.nn.Module): + """ + BatchNorm2d where the batch statistics and the affine parameters + are fixed + """ + + def __init__(self, n): + super(FrozenBatchNorm2d, self).__init__() + self.register_buffer("weight", torch.ones(n)) + self.register_buffer("bias", torch.zeros(n)) + self.register_buffer("running_mean", torch.zeros(n)) + self.register_buffer("running_var", torch.ones(n)) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + num_batches_tracked_key = prefix + 'num_batches_tracked' + if num_batches_tracked_key in state_dict: + del state_dict[num_batches_tracked_key] + + super(FrozenBatchNorm2d, self)._load_from_state_dict( + state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs) + + def forward(self, x): + # move reshapes to the beginning + # to make it fuser-friendly + w = self.weight.reshape(1, -1, 1, 1) + b = self.bias.reshape(1, -1, 1, 1) + rv = self.running_var.reshape(1, -1, 1, 1) + rm = self.running_mean.reshape(1, -1, 1, 1) + scale = w * rv.rsqrt() + bias = b - rm * scale + return x * scale + bias diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py new file mode 100644 index 0000000000..74455a98c4 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py @@ -0,0 +1,16 @@ +import torch +from torch.jit.annotations import List +from torch import Tensor + + +def _new_empty_tensor(x, shape): + # type: (Tensor, List[int]) -> Tensor + """ + Arguments: + input (Tensor): input tensor + shape List[int]: the new empty tensor shape + + Returns: + output (Tensor) + """ + return torch.ops.torchvision._new_empty_tensor_op(x, shape) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py new file mode 100644 index 0000000000..b94a9eb405 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py @@ -0,0 +1,232 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import torch +import torch.nn.functional as F +from torch import nn, Tensor + +from torchvision.ops import roi_align +from torchvision.ops.boxes import box_area + +from torch.jit.annotations import Optional, List, Dict, Tuple +import torchvision + +# copying result_idx_in_level to a specific index in result[] +# is not supported by ONNX tracing yet. +# _onnx_merge_levels() is an implementation supported by ONNX +# that merges the levels to the right indices +@torch.jit.unused +def _onnx_merge_levels(levels, unmerged_results): + # type: (Tensor, List[Tensor]) -> Tensor + first_result = unmerged_results[0] + dtype, device = first_result.dtype, first_result.device + res = torch.zeros((levels.size(0), first_result.size(1), + first_result.size(2), first_result.size(3)), + dtype=dtype, device=device) + for l in range(len(unmerged_results)): + index = (levels == l).nonzero().view(-1, 1, 1, 1) + index = index.expand(index.size(0), + unmerged_results[l].size(1), + unmerged_results[l].size(2), + unmerged_results[l].size(3)) + res = res.scatter(0, index, unmerged_results[l]) + return res + + +# TODO: (eellison) T54974082 https://github.com/pytorch/pytorch/issues/26744/pytorch/issues/26744 +def initLevelMapper(k_min, k_max, canonical_scale=224, canonical_level=4, eps=1e-6): + # type: (int, int, int, int, float) + return LevelMapper(k_min, k_max, canonical_scale, canonical_level, eps) + + +@torch.jit.script +class LevelMapper(object): + """Determine which FPN level each RoI in a set of RoIs should map to based + on the heuristic in the FPN paper. + + Arguments: + k_min (int) + k_max (int) + canonical_scale (int) + canonical_level (int) + eps (float) + """ + + def __init__(self, k_min, k_max, canonical_scale=224, canonical_level=4, eps=1e-6): + # type: (int, int, int, int, float) + self.k_min = k_min + self.k_max = k_max + self.s0 = canonical_scale + self.lvl0 = canonical_level + self.eps = eps + + def __call__(self, boxlists): + # type: (List[Tensor]) + """ + Arguments: + boxlists (list[BoxList]) + """ + # Compute level ids + s = torch.sqrt(torch.cat([box_area(boxlist) for boxlist in boxlists])) + + # Eqn.(1) in FPN paper + target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0) + torch.tensor(self.eps, dtype=s.dtype)) + target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max) + return (target_lvls.to(torch.int64) - self.k_min).to(torch.int64) + + +class MultiScaleRoIAlign(nn.Module): + """ + Multi-scale RoIAlign pooling, which is useful for detection with or without FPN. + + It infers the scale of the pooling via the heuristics present in the FPN paper. + + Arguments: + featmap_names (List[str]): the names of the feature maps that will be used + for the pooling. + output_size (List[Tuple[int, int]] or List[int]): output size for the pooled region + sampling_ratio (int): sampling ratio for ROIAlign + + Examples:: + + >>> m = torchvision.ops.MultiScaleRoIAlign(['feat1', 'feat3'], 3, 2) + >>> i = OrderedDict() + >>> i['feat1'] = torch.rand(1, 5, 64, 64) + >>> i['feat2'] = torch.rand(1, 5, 32, 32) # this feature won't be used in the pooling + >>> i['feat3'] = torch.rand(1, 5, 16, 16) + >>> # create some random bounding boxes + >>> boxes = torch.rand(6, 4) * 256; boxes[:, 2:] += boxes[:, :2] + >>> # original image size, before computing the feature maps + >>> image_sizes = [(512, 512)] + >>> output = m(i, [boxes], image_sizes) + >>> print(output.shape) + >>> torch.Size([6, 5, 3, 3]) + + """ + + __annotations__ = { + 'scales': Optional[List[float]], + 'map_levels': Optional[LevelMapper] + } + + def __init__(self, featmap_names, output_size, sampling_ratio): + super(MultiScaleRoIAlign, self).__init__() + if isinstance(output_size, int): + output_size = (output_size, output_size) + self.featmap_names = featmap_names + self.sampling_ratio = sampling_ratio + self.output_size = tuple(output_size) + self.scales = None + self.map_levels = None + + def convert_to_roi_format(self, boxes): + # type: (List[Tensor]) + concat_boxes = torch.cat(boxes, dim=0) + device, dtype = concat_boxes.device, concat_boxes.dtype + ids = torch.cat( + [ + torch.full_like(b[:, :1], i, dtype=dtype, layout=torch.strided, device=device) + for i, b in enumerate(boxes) + ], + dim=0, + ) + rois = torch.cat([ids, concat_boxes], dim=1) + return rois + + def infer_scale(self, feature, original_size): + # type: (Tensor, List[int]) + # assumption: the scale is of the form 2 ** (-k), with k integer + size = feature.shape[-2:] + possible_scales = torch.jit.annotate(List[float], []) + for s1, s2 in zip(size, original_size): + approx_scale = float(s1) / float(s2) + scale = 2 ** float(torch.tensor(approx_scale).log2().round()) + possible_scales.append(scale) + assert possible_scales[0] == possible_scales[1] + return possible_scales[0] + + def setup_scales(self, features, image_shapes): + # type: (List[Tensor], List[Tuple[int, int]]) + assert len(image_shapes) != 0 + max_x = 0 + max_y = 0 + for shape in image_shapes: + max_x = max(shape[0], max_x) + max_y = max(shape[1], max_y) + original_input_shape = (max_x, max_y) + + scales = [self.infer_scale(feat, original_input_shape) for feat in features] + # get the levels in the feature map by leveraging the fact that the network always + # downsamples by a factor of 2 at each level. + lvl_min = -torch.log2(torch.tensor(scales[0], dtype=torch.float32)).item() + lvl_max = -torch.log2(torch.tensor(scales[-1], dtype=torch.float32)).item() + self.scales = scales + self.map_levels = initLevelMapper(int(lvl_min), int(lvl_max)) + + def forward(self, x, boxes, image_shapes): + # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]]) + """ + Arguments: + x (OrderedDict[Tensor]): feature maps for each level. They are assumed to have + all the same number of channels, but they can have different sizes. + boxes (List[Tensor[N, 4]]): boxes to be used to perform the pooling operation, in + (x1, y1, x2, y2) format and in the image reference size, not the feature map + reference. + image_shapes (List[Tuple[height, width]]): the sizes of each image before they + have been fed to a CNN to obtain feature maps. This allows us to infer the + scale factor for each one of the levels to be pooled. + Returns: + result (Tensor) + """ + x_filtered = [] + for k, v in x.items(): + if k in self.featmap_names: + x_filtered.append(v) + num_levels = len(x_filtered) + rois = self.convert_to_roi_format(boxes) + if self.scales is None: + self.setup_scales(x_filtered, image_shapes) + + scales = self.scales + assert scales is not None + + if num_levels == 1: + return roi_align( + x_filtered[0], rois, + output_size=self.output_size, + spatial_scale=scales[0], + sampling_ratio=self.sampling_ratio + ) + + mapper = self.map_levels + assert mapper is not None + + levels = mapper(boxes) + + num_rois = len(rois) + num_channels = x_filtered[0].shape[1] + + dtype, device = x_filtered[0].dtype, x_filtered[0].device + result = torch.zeros( + (num_rois, num_channels,) + self.output_size, + dtype=dtype, + device=device, + ) + + tracing_results = [] + for level, (per_level_feature, scale) in enumerate(zip(x_filtered, scales)): + idx_in_level = torch.nonzero(levels == level).squeeze(1) + rois_per_level = rois[idx_in_level] + + result_idx_in_level = roi_align( + per_level_feature, rois_per_level, + output_size=self.output_size, + spatial_scale=scale, sampling_ratio=self.sampling_ratio) + + if torchvision._is_tracing(): + tracing_results.append(result_idx_in_level.to(dtype)) + else: + result[idx_in_level] = result_idx_in_level + + if torchvision._is_tracing(): + result = _onnx_merge_levels(levels, tracing_results) + + return result diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py new file mode 100644 index 0000000000..c0c761b72c --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py @@ -0,0 +1,68 @@ +import torch +from torch import nn, Tensor + +from torch.nn.modules.utils import _pair +from torch.jit.annotations import List + +from ._utils import convert_boxes_to_roi_format, check_roi_boxes_shape + + +def ps_roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1): + # type: (Tensor, Tensor, int, float, int) -> Tensor + """ + Performs Position-Sensitive Region of Interest (RoI) Align operator + mentioned in Light-Head R-CNN. + + Arguments: + input (Tensor[N, C, H, W]): input tensor + boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2) + format where the regions will be taken from. If a single Tensor is passed, + then the first column should contain the batch index. If a list of Tensors + is passed, then each Tensor will correspond to the boxes for an element i + in a batch + output_size (int or Tuple[int, int]): the size of the output after the cropping + is performed, as (height, width) + spatial_scale (float): a scaling factor that maps the input coordinates to + the box coordinates. Default: 1.0 + sampling_ratio (int): number of sampling points in the interpolation grid + used to compute the output value of each pooled output bin. If > 0 + then exactly sampling_ratio x sampling_ratio grid points are used. + If <= 0, then an adaptive number of grid points are used (computed as + ceil(roi_width / pooled_w), and likewise for height). Default: -1 + + Returns: + output (Tensor[K, C, output_size[0], output_size[1]]) + """ + check_roi_boxes_shape(boxes) + rois = boxes + output_size = _pair(output_size) + if not isinstance(rois, torch.Tensor): + rois = convert_boxes_to_roi_format(rois) + output, _ = torch.ops.torchvision.ps_roi_align(input, rois, spatial_scale, + output_size[0], + output_size[1], + sampling_ratio) + return output + + +class PSRoIAlign(nn.Module): + """ + See ps_roi_align + """ + def __init__(self, output_size, spatial_scale, sampling_ratio): + super(PSRoIAlign, self).__init__() + self.output_size = output_size + self.spatial_scale = spatial_scale + self.sampling_ratio = sampling_ratio + + def forward(self, input, rois): + return ps_roi_align(input, rois, self.output_size, self.spatial_scale, + self.sampling_ratio) + + def __repr__(self): + tmpstr = self.__class__.__name__ + '(' + tmpstr += 'output_size=' + str(self.output_size) + tmpstr += ', spatial_scale=' + str(self.spatial_scale) + tmpstr += ', sampling_ratio=' + str(self.sampling_ratio) + tmpstr += ')' + return tmpstr diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py new file mode 100644 index 0000000000..710f2cb019 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py @@ -0,0 +1,59 @@ +import torch +from torch import nn, Tensor + +from torch.nn.modules.utils import _pair +from torch.jit.annotations import List + +from ._utils import convert_boxes_to_roi_format, check_roi_boxes_shape + + +def ps_roi_pool(input, boxes, output_size, spatial_scale=1.0): + # type: (Tensor, Tensor, int, float) -> Tensor + """ + Performs Position-Sensitive Region of Interest (RoI) Pool operator + described in R-FCN + + Arguments: + input (Tensor[N, C, H, W]): input tensor + boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2) + format where the regions will be taken from. If a single Tensor is passed, + then the first column should contain the batch index. If a list of Tensors + is passed, then each Tensor will correspond to the boxes for an element i + in a batch + output_size (int or Tuple[int, int]): the size of the output after the cropping + is performed, as (height, width) + spatial_scale (float): a scaling factor that maps the input coordinates to + the box coordinates. Default: 1.0 + + Returns: + output (Tensor[K, C, output_size[0], output_size[1]]) + """ + check_roi_boxes_shape(boxes) + rois = boxes + output_size = _pair(output_size) + if not isinstance(rois, torch.Tensor): + rois = convert_boxes_to_roi_format(rois) + output, _ = torch.ops.torchvision.ps_roi_pool(input, rois, spatial_scale, + output_size[0], + output_size[1]) + return output + + +class PSRoIPool(nn.Module): + """ + See ps_roi_pool + """ + def __init__(self, output_size, spatial_scale): + super(PSRoIPool, self).__init__() + self.output_size = output_size + self.spatial_scale = spatial_scale + + def forward(self, input, rois): + return ps_roi_pool(input, rois, self.output_size, self.spatial_scale) + + def __repr__(self): + tmpstr = self.__class__.__name__ + '(' + tmpstr += 'output_size=' + str(self.output_size) + tmpstr += ', spatial_scale=' + str(self.spatial_scale) + tmpstr += ')' + return tmpstr diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py new file mode 100644 index 0000000000..14224d8a83 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py @@ -0,0 +1,69 @@ +import torch +from torch import nn, Tensor + +from torch.nn.modules.utils import _pair +from torch.jit.annotations import List, BroadcastingList2 + +from ._utils import convert_boxes_to_roi_format, check_roi_boxes_shape + + +def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1, aligned=False): + # type: (Tensor, Tensor, BroadcastingList2[int], float, int, bool) -> Tensor + """ + Performs Region of Interest (RoI) Align operator described in Mask R-CNN + + Arguments: + input (Tensor[N, C, H, W]): input tensor + boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2) + format where the regions will be taken from. If a single Tensor is passed, + then the first column should contain the batch index. If a list of Tensors + is passed, then each Tensor will correspond to the boxes for an element i + in a batch + output_size (int or Tuple[int, int]): the size of the output after the cropping + is performed, as (height, width) + spatial_scale (float): a scaling factor that maps the input coordinates to + the box coordinates. Default: 1.0 + sampling_ratio (int): number of sampling points in the interpolation grid + used to compute the output value of each pooled output bin. If > 0, + then exactly sampling_ratio x sampling_ratio grid points are used. If + <= 0, then an adaptive number of grid points are used (computed as + ceil(roi_width / pooled_w), and likewise for height). Default: -1 + aligned (bool): If False, use the legacy implementation. + If True, pixel shift it by -0.5 for align more perfectly about two neighboring pixel indices. + This version in Detectron2 + + Returns: + output (Tensor[K, C, output_size[0], output_size[1]]) + """ + check_roi_boxes_shape(boxes) + rois = boxes + output_size = _pair(output_size) + if not isinstance(rois, torch.Tensor): + rois = convert_boxes_to_roi_format(rois) + return torch.ops.torchvision.roi_align(input, rois, spatial_scale, + output_size[0], output_size[1], + sampling_ratio, aligned) + + +class RoIAlign(nn.Module): + """ + See roi_align + """ + def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=False): + super(RoIAlign, self).__init__() + self.output_size = output_size + self.spatial_scale = spatial_scale + self.sampling_ratio = sampling_ratio + self.aligned = aligned + + def forward(self, input, rois): + return roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned) + + def __repr__(self): + tmpstr = self.__class__.__name__ + '(' + tmpstr += 'output_size=' + str(self.output_size) + tmpstr += ', spatial_scale=' + str(self.spatial_scale) + tmpstr += ', sampling_ratio=' + str(self.sampling_ratio) + tmpstr += ', aligned=' + str(self.aligned) + tmpstr += ')' + return tmpstr diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py new file mode 100644 index 0000000000..10232f16b4 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py @@ -0,0 +1,57 @@ +import torch +from torch import nn, Tensor + +from torch.nn.modules.utils import _pair +from torch.jit.annotations import List, BroadcastingList2 + +from ._utils import convert_boxes_to_roi_format, check_roi_boxes_shape + + +def roi_pool(input, boxes, output_size, spatial_scale=1.0): + # type: (Tensor, Tensor, BroadcastingList2[int], float) -> Tensor + """ + Performs Region of Interest (RoI) Pool operator described in Fast R-CNN + + Arguments: + input (Tensor[N, C, H, W]): input tensor + boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2) + format where the regions will be taken from. If a single Tensor is passed, + then the first column should contain the batch index. If a list of Tensors + is passed, then each Tensor will correspond to the boxes for an element i + in a batch + output_size (int or Tuple[int, int]): the size of the output after the cropping + is performed, as (height, width) + spatial_scale (float): a scaling factor that maps the input coordinates to + the box coordinates. Default: 1.0 + + Returns: + output (Tensor[K, C, output_size[0], output_size[1]]) + """ + check_roi_boxes_shape(boxes) + rois = boxes + output_size = _pair(output_size) + if not isinstance(rois, torch.Tensor): + rois = convert_boxes_to_roi_format(rois) + output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale, + output_size[0], output_size[1]) + return output + + +class RoIPool(nn.Module): + """ + See roi_pool + """ + def __init__(self, output_size, spatial_scale): + super(RoIPool, self).__init__() + self.output_size = output_size + self.spatial_scale = spatial_scale + + def forward(self, input, rois): + return roi_pool(input, rois, self.output_size, self.spatial_scale) + + def __repr__(self): + tmpstr = self.__class__.__name__ + '(' + tmpstr += 'output_size=' + str(self.output_size) + tmpstr += ', spatial_scale=' + str(self.spatial_scale) + tmpstr += ')' + return tmpstr diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/__init__.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/__init__.py new file mode 100644 index 0000000000..7986cdd642 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/__init__.py @@ -0,0 +1 @@ +from .transforms import * diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py new file mode 100644 index 0000000000..06c3071690 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py @@ -0,0 +1,101 @@ +import torch + + +def _is_tensor_video_clip(clip): + if not torch.is_tensor(clip): + raise TypeError("clip should be Tesnor. Got %s" % type(clip)) + + if not clip.ndimension() == 4: + raise ValueError("clip should be 4D. Got %dD" % clip.dim()) + + return True + + +def crop(clip, i, j, h, w): + """ + Args: + clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) + """ + assert len(clip.size()) == 4, "clip should be a 4D tensor" + return clip[..., i:i + h, j:j + w] + + +def resize(clip, target_size, interpolation_mode): + assert len(target_size) == 2, "target size should be tuple (height, width)" + return torch.nn.functional.interpolate( + clip, size=target_size, mode=interpolation_mode + ) + + +def resized_crop(clip, i, j, h, w, size, interpolation_mode="bilinear"): + """ + Do spatial cropping and resizing to the video clip + Args: + clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) + i (int): i in (i,j) i.e coordinates of the upper left corner. + j (int): j in (i,j) i.e coordinates of the upper left corner. + h (int): Height of the cropped region. + w (int): Width of the cropped region. + size (tuple(int, int)): height and width of resized clip + Returns: + clip (torch.tensor): Resized and cropped clip. Size is (C, T, H, W) + """ + assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor" + clip = crop(clip, i, j, h, w) + clip = resize(clip, size, interpolation_mode) + return clip + + +def center_crop(clip, crop_size): + assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor" + h, w = clip.size(-2), clip.size(-1) + th, tw = crop_size + assert h >= th and w >= tw, "height and width must be no smaller than crop_size" + + i = int(round((h - th) / 2.0)) + j = int(round((w - tw) / 2.0)) + return crop(clip, i, j, th, tw) + + +def to_tensor(clip): + """ + Convert tensor data type from uint8 to float, divide value by 255.0 and + permute the dimenions of clip tensor + Args: + clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C) + Return: + clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W) + """ + _is_tensor_video_clip(clip) + if not clip.dtype == torch.uint8: + raise TypeError("clip tensor should have data type uint8. Got %s" % str(clip.dtype)) + return clip.float().permute(3, 0, 1, 2) / 255.0 + + +def normalize(clip, mean, std, inplace=False): + """ + Args: + clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W) + mean (tuple): pixel RGB mean. Size is (3) + std (tuple): pixel standard deviation. Size is (3) + Returns: + normalized clip (torch.tensor): Size is (C, T, H, W) + """ + assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor" + if not inplace: + clip = clip.clone() + mean = torch.as_tensor(mean, dtype=clip.dtype, device=clip.device) + std = torch.as_tensor(std, dtype=clip.dtype, device=clip.device) + clip.sub_(mean[:, None, None, None]).div_(std[:, None, None, None]) + return clip + + +def hflip(clip): + """ + Args: + clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W) + Returns: + flipped clip (torch.tensor): Size is (C, T, H, W) + """ + assert _is_tensor_video_clip(clip), "clip should be a 4D torch.tensor" + return clip.flip((-1)) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py new file mode 100644 index 0000000000..aa1a4b0531 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 + +import numbers +import random + +from torchvision.transforms import ( + RandomCrop, + RandomResizedCrop, +) + +from . import _functional_video as F + + +__all__ = [ + "RandomCropVideo", + "RandomResizedCropVideo", + "CenterCropVideo", + "NormalizeVideo", + "ToTensorVideo", + "RandomHorizontalFlipVideo", +] + + +class RandomCropVideo(RandomCrop): + def __init__(self, size): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + + def __call__(self, clip): + """ + Args: + clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) + Returns: + torch.tensor: randomly cropped/resized video clip. + size is (C, T, OH, OW) + """ + i, j, h, w = self.get_params(clip, self.size) + return F.crop(clip, i, j, h, w) + + def __repr__(self): + return self.__class__.__name__ + '(size={0})'.format(self.size) + + +class RandomResizedCropVideo(RandomResizedCrop): + def __init__( + self, + size, + scale=(0.08, 1.0), + ratio=(3.0 / 4.0, 4.0 / 3.0), + interpolation_mode="bilinear", + ): + if isinstance(size, tuple): + assert len(size) == 2, "size should be tuple (height, width)" + self.size = size + else: + self.size = (size, size) + + self.interpolation_mode = interpolation_mode + self.scale = scale + self.ratio = ratio + + def __call__(self, clip): + """ + Args: + clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) + Returns: + torch.tensor: randomly cropped/resized video clip. + size is (C, T, H, W) + """ + i, j, h, w = self.get_params(clip, self.scale, self.ratio) + return F.resized_crop(clip, i, j, h, w, self.size, self.interpolation_mode) + + def __repr__(self): + return self.__class__.__name__ + \ + '(size={0}, interpolation_mode={1}, scale={2}, ratio={3})'.format( + self.size, self.interpolation_mode, self.scale, self.ratio + ) + + +class CenterCropVideo(object): + def __init__(self, crop_size): + if isinstance(crop_size, numbers.Number): + self.crop_size = (int(crop_size), int(crop_size)) + else: + self.crop_size = crop_size + + def __call__(self, clip): + """ + Args: + clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W) + Returns: + torch.tensor: central cropping of video clip. Size is + (C, T, crop_size, crop_size) + """ + return F.center_crop(clip, self.crop_size) + + def __repr__(self): + return self.__class__.__name__ + '(crop_size={0})'.format(self.crop_size) + + +class NormalizeVideo(object): + """ + Normalize the video clip by mean subtraction and division by standard deviation + Args: + mean (3-tuple): pixel RGB mean + std (3-tuple): pixel RGB standard deviation + inplace (boolean): whether do in-place normalization + """ + + def __init__(self, mean, std, inplace=False): + self.mean = mean + self.std = std + self.inplace = inplace + + def __call__(self, clip): + """ + Args: + clip (torch.tensor): video clip to be normalized. Size is (C, T, H, W) + """ + return F.normalize(clip, self.mean, self.std, self.inplace) + + def __repr__(self): + return self.__class__.__name__ + '(mean={0}, std={1}, inplace={2})'.format( + self.mean, self.std, self.inplace) + + +class ToTensorVideo(object): + """ + Convert tensor data type from uint8 to float, divide value by 255.0 and + permute the dimenions of clip tensor + """ + + def __init__(self): + pass + + def __call__(self, clip): + """ + Args: + clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C) + Return: + clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W) + """ + return F.to_tensor(clip) + + def __repr__(self): + return self.__class__.__name__ + + +class RandomHorizontalFlipVideo(object): + """ + Flip the video clip along the horizonal direction with a given probability + Args: + p (float): probability of the clip being flipped. Default value is 0.5 + """ + + def __init__(self, p=0.5): + self.p = p + + def __call__(self, clip): + """ + Args: + clip (torch.tensor): Size is (C, T, H, W) + Return: + clip (torch.tensor): Size is (C, T, H, W) + """ + if random.random() < self.p: + clip = F.hflip(clip) + return clip + + def __repr__(self): + return self.__class__.__name__ + "(p={0})".format(self.p) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py new file mode 100644 index 0000000000..bd5b170626 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py @@ -0,0 +1,1392 @@ +import math +import numbers +import warnings +from enum import Enum +from typing import List, Tuple, Any, Optional + +import numpy as np +import torch +from PIL import Image +from torch import Tensor + +try: + import accimage +except ImportError: + accimage = None + +from . import functional_pil as F_pil +from . import functional_tensor as F_t + + +class InterpolationMode(Enum): + """Interpolation modes + Available interpolation methods are ``nearest``, ``bilinear``, ``bicubic``, ``box``, ``hamming``, and ``lanczos``. + """ + + NEAREST = "nearest" + BILINEAR = "bilinear" + BICUBIC = "bicubic" + # For PIL compatibility + BOX = "box" + HAMMING = "hamming" + LANCZOS = "lanczos" + + +# TODO: Once torchscript supports Enums with staticmethod +# this can be put into InterpolationMode as staticmethod +def _interpolation_modes_from_int(i: int) -> InterpolationMode: + inverse_modes_mapping = { + 0: InterpolationMode.NEAREST, + 2: InterpolationMode.BILINEAR, + 3: InterpolationMode.BICUBIC, + 4: InterpolationMode.BOX, + 5: InterpolationMode.HAMMING, + 1: InterpolationMode.LANCZOS, + } + return inverse_modes_mapping[i] + + +pil_modes_mapping = { + InterpolationMode.NEAREST: 0, + InterpolationMode.BILINEAR: 2, + InterpolationMode.BICUBIC: 3, + InterpolationMode.BOX: 4, + InterpolationMode.HAMMING: 5, + InterpolationMode.LANCZOS: 1, +} + +_is_pil_image = F_pil._is_pil_image + + +def get_image_size(img: Tensor) -> List[int]: + """Returns the size of an image as [width, height]. + + Args: + img (PIL Image or Tensor): The image to be checked. + + Returns: + List[int]: The image size. + """ + if isinstance(img, torch.Tensor): + return F_t.get_image_size(img) + + return F_pil.get_image_size(img) + + +def get_image_num_channels(img: Tensor) -> int: + """Returns the number of channels of an image. + + Args: + img (PIL Image or Tensor): The image to be checked. + + Returns: + int: The number of channels. + """ + if isinstance(img, torch.Tensor): + return F_t.get_image_num_channels(img) + + return F_pil.get_image_num_channels(img) + + +@torch.jit.unused +def _is_numpy(img: Any) -> bool: + return isinstance(img, np.ndarray) + + +@torch.jit.unused +def _is_numpy_image(img: Any) -> bool: + return img.ndim in {2, 3} + + +def to_tensor(pic): + """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. + This function does not support torchscript. + + See :class:`~torchvision.transforms.ToTensor` for more details. + + Args: + pic (PIL Image or numpy.ndarray): Image to be converted to tensor. + + Returns: + Tensor: Converted image. + """ + if not (F_pil._is_pil_image(pic) or _is_numpy(pic)): + raise TypeError(f"pic should be PIL Image or ndarray. Got {type(pic)}") + + if _is_numpy(pic) and not _is_numpy_image(pic): + raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions.") + + default_float_dtype = torch.get_default_dtype() + + if isinstance(pic, np.ndarray): + # handle numpy array + if pic.ndim == 2: + pic = pic[:, :, None] + + img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous() + # backward compatibility + if isinstance(img, torch.ByteTensor): + return img.to(dtype=default_float_dtype).div(255) + else: + return img + + if accimage is not None and isinstance(pic, accimage.Image): + nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32) + pic.copyto(nppic) + return torch.from_numpy(nppic).to(dtype=default_float_dtype) + + # handle PIL Image + mode_to_nptype = {"I": np.int32, "I;16": np.int16, "F": np.float32} + img = torch.from_numpy(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True)) + + if pic.mode == "1": + img = 255 * img + img = img.view(pic.size[1], pic.size[0], len(pic.getbands())) + # put it from HWC to CHW format + img = img.permute((2, 0, 1)).contiguous() + if isinstance(img, torch.ByteTensor): + return img.to(dtype=default_float_dtype).div(255) + else: + return img + + +def pil_to_tensor(pic): + """Convert a ``PIL Image`` to a tensor of the same type. + This function does not support torchscript. + + See :class:`~torchvision.transforms.PILToTensor` for more details. + + .. note:: + + A deep copy of the underlying array is performed. + + Args: + pic (PIL Image): Image to be converted to tensor. + + Returns: + Tensor: Converted image. + """ + if not F_pil._is_pil_image(pic): + raise TypeError(f"pic should be PIL Image. Got {type(pic)}") + + if accimage is not None and isinstance(pic, accimage.Image): + # accimage format is always uint8 internally, so always return uint8 here + nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.uint8) + pic.copyto(nppic) + return torch.as_tensor(nppic) + + # handle PIL Image + img = torch.as_tensor(np.array(pic, copy=True)) + img = img.view(pic.size[1], pic.size[0], len(pic.getbands())) + # put it from HWC to CHW format + img = img.permute((2, 0, 1)) + return img + + +def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor: + """Convert a tensor image to the given ``dtype`` and scale the values accordingly + This function does not support PIL Image. + + Args: + image (torch.Tensor): Image to be converted + dtype (torch.dtype): Desired data type of the output + + Returns: + Tensor: Converted image + + .. note:: + + When converting from a smaller to a larger integer ``dtype`` the maximum values are **not** mapped exactly. + If converted back and forth, this mismatch has no effect. + + Raises: + RuntimeError: When trying to cast :class:`torch.float32` to :class:`torch.int32` or :class:`torch.int64` as + well as for trying to cast :class:`torch.float64` to :class:`torch.int64`. These conversions might lead to + overflow errors since the floating point ``dtype`` cannot store consecutive integers over the whole range + of the integer ``dtype``. + """ + if not isinstance(image, torch.Tensor): + raise TypeError("Input img should be Tensor Image") + + return F_t.convert_image_dtype(image, dtype) + + +def to_pil_image(pic, mode=None): + """Convert a tensor or an ndarray to PIL Image. This function does not support torchscript. + + See :class:`~torchvision.transforms.ToPILImage` for more details. + + Args: + pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. + mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). + + .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes + + Returns: + PIL Image: Image converted to PIL Image. + """ + if not (isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)): + raise TypeError(f"pic should be Tensor or ndarray. Got {type(pic)}.") + + elif isinstance(pic, torch.Tensor): + if pic.ndimension() not in {2, 3}: + raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndimension()} dimensions.") + + elif pic.ndimension() == 2: + # if 2D image, add channel dimension (CHW) + pic = pic.unsqueeze(0) + + # check number of channels + if pic.shape[-3] > 4: + raise ValueError(f"pic should not have > 4 channels. Got {pic.shape[-3]} channels.") + + elif isinstance(pic, np.ndarray): + if pic.ndim not in {2, 3}: + raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions.") + + elif pic.ndim == 2: + # if 2D image, add channel dimension (HWC) + pic = np.expand_dims(pic, 2) + + # check number of channels + if pic.shape[-1] > 4: + raise ValueError(f"pic should not have > 4 channels. Got {pic.shape[-1]} channels.") + + npimg = pic + if isinstance(pic, torch.Tensor): + if pic.is_floating_point() and mode != "F": + pic = pic.mul(255).byte() + npimg = np.transpose(pic.cpu().numpy(), (1, 2, 0)) + + if not isinstance(npimg, np.ndarray): + raise TypeError("Input pic must be a torch.Tensor or NumPy ndarray, not {type(npimg)}") + + if npimg.shape[2] == 1: + expected_mode = None + npimg = npimg[:, :, 0] + if npimg.dtype == np.uint8: + expected_mode = "L" + elif npimg.dtype == np.int16: + expected_mode = "I;16" + elif npimg.dtype == np.int32: + expected_mode = "I" + elif npimg.dtype == np.float32: + expected_mode = "F" + if mode is not None and mode != expected_mode: + raise ValueError(f"Incorrect mode ({mode}) supplied for input type {np.dtype}. Should be {expected_mode}") + mode = expected_mode + + elif npimg.shape[2] == 2: + permitted_2_channel_modes = ["LA"] + if mode is not None and mode not in permitted_2_channel_modes: + raise ValueError(f"Only modes {permitted_2_channel_modes} are supported for 2D inputs") + + if mode is None and npimg.dtype == np.uint8: + mode = "LA" + + elif npimg.shape[2] == 4: + permitted_4_channel_modes = ["RGBA", "CMYK", "RGBX"] + if mode is not None and mode not in permitted_4_channel_modes: + raise ValueError(f"Only modes {permitted_4_channel_modes} are supported for 4D inputs") + + if mode is None and npimg.dtype == np.uint8: + mode = "RGBA" + else: + permitted_3_channel_modes = ["RGB", "YCbCr", "HSV"] + if mode is not None and mode not in permitted_3_channel_modes: + raise ValueError(f"Only modes {permitted_3_channel_modes} are supported for 3D inputs") + if mode is None and npimg.dtype == np.uint8: + mode = "RGB" + + if mode is None: + raise TypeError(f"Input type {npimg.dtype} is not supported") + + return Image.fromarray(npimg, mode=mode) + + +def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool = False) -> Tensor: + """Normalize a float tensor image with mean and standard deviation. + This transform does not support PIL Image. + + .. note:: + This transform acts out of place by default, i.e., it does not mutates the input tensor. + + See :class:`~torchvision.transforms.Normalize` for more details. + + Args: + tensor (Tensor): Float tensor image of size (C, H, W) or (B, C, H, W) to be normalized. + mean (sequence): Sequence of means for each channel. + std (sequence): Sequence of standard deviations for each channel. + inplace(bool,optional): Bool to make this operation inplace. + + Returns: + Tensor: Normalized Tensor image. + """ + if not isinstance(tensor, torch.Tensor): + raise TypeError(f"Input tensor should be a torch tensor. Got {type(tensor)}.") + + if not tensor.is_floating_point(): + raise TypeError(f"Input tensor should be a float tensor. Got {tensor.dtype}.") + + if tensor.ndim < 3: + raise ValueError( + f"Expected tensor to be a tensor image of size (..., C, H, W). Got tensor.size() = {tensor.size()}" + ) + + if not inplace: + tensor = tensor.clone() + + dtype = tensor.dtype + mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device) + std = torch.as_tensor(std, dtype=dtype, device=tensor.device) + if (std == 0).any(): + raise ValueError(f"std evaluated to zero after conversion to {dtype}, leading to division by zero.") + if mean.ndim == 1: + mean = mean.view(-1, 1, 1) + if std.ndim == 1: + std = std.view(-1, 1, 1) + tensor.sub_(mean).div_(std) + return tensor + + +def resize( + img: Tensor, + size: List[int], + interpolation: InterpolationMode = InterpolationMode.BILINEAR, + max_size: Optional[int] = None, + antialias: Optional[bool] = None, +) -> Tensor: + r"""Resize the input image to the given size. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions + + .. warning:: + The output image might be different depending on its type: when downsampling, the interpolation of PIL images + and tensors is slightly different, because PIL applies antialiasing. This may lead to significant differences + in the performance of a network. Therefore, it is preferable to train and serve a model with the same input + types. See also below the ``antialias`` parameter, which can help making the output of PIL images and tensors + closer. + + Args: + img (PIL Image or Tensor): Image to be resized. + size (sequence or int): Desired output size. If size is a sequence like + (h, w), the output size will be matched to this. If size is an int, + the smaller edge of the image will be matched to this number maintaining + the aspect ratio. i.e, if height > width, then image will be rescaled to + :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`. + + .. note:: + In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. + Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``, + ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. + max_size (int, optional): The maximum allowed for the longer edge of + the resized image: if the longer edge of the image is greater + than ``max_size`` after being resized according to ``size``, then + the image is resized again so that the longer edge is equal to + ``max_size``. As a result, ``size`` might be overruled, i.e the + smaller edge may be shorter than ``size``. This is only supported + if ``size`` is an int (or a sequence of length 1 in torchscript + mode). + antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias + is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for + ``InterpolationMode.BILINEAR`` only mode. This can help making the output for PIL images and tensors + closer. + + .. warning:: + There is no autodiff support for ``antialias=True`` option with input ``img`` as Tensor. + + Returns: + PIL Image or Tensor: Resized image. + """ + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn( + "Argument interpolation should be of type InterpolationMode instead of int. " + "Please, use InterpolationMode enum." + ) + interpolation = _interpolation_modes_from_int(interpolation) + + if not isinstance(interpolation, InterpolationMode): + raise TypeError("Argument interpolation should be a InterpolationMode") + + if not isinstance(img, torch.Tensor): + if antialias is not None and not antialias: + warnings.warn("Anti-alias option is always applied for PIL Image input. Argument antialias is ignored.") + pil_interpolation = pil_modes_mapping[interpolation] + return F_pil.resize(img, size=size, interpolation=pil_interpolation, max_size=max_size) + + return F_t.resize(img, size=size, interpolation=interpolation.value, max_size=max_size, antialias=antialias) + + +def scale(*args, **kwargs): + warnings.warn("The use of the transforms.Scale transform is deprecated, please use transforms.Resize instead.") + return resize(*args, **kwargs) + + +def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor: + r"""Pad the given image on all sides with the given "pad" value. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric, + at most 3 leading dimensions for mode edge, + and an arbitrary number of leading dimensions for mode constant + + Args: + img (PIL Image or Tensor): Image to be padded. + padding (int or sequence): Padding on each border. If a single int is provided this + is used to pad all borders. If sequence of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a sequence of length 4 is provided + this is the padding for the left, top, right and bottom borders respectively. + + .. note:: + In torchscript mode padding as single int is not supported, use a sequence of + length 1: ``[padding, ]``. + fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. + If a tuple of length 3, it is used to fill R, G, B channels respectively. + This value is only used when the padding_mode is constant. + Only number is supported for torch Tensor. + Only int or str or tuple value is supported for PIL Image. + padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. + Default is constant. + + - constant: pads with a constant value, this value is specified with fill + + - edge: pads with the last value at the edge of the image. + If input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2 + + - reflect: pads with reflection of image without repeating the last value on the edge. + For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode + will result in [3, 2, 1, 2, 3, 4, 3, 2] + + - symmetric: pads with reflection of image repeating the last value on the edge. + For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode + will result in [2, 1, 1, 2, 3, 4, 4, 3] + + Returns: + PIL Image or Tensor: Padded image. + """ + if not isinstance(img, torch.Tensor): + return F_pil.pad(img, padding=padding, fill=fill, padding_mode=padding_mode) + + return F_t.pad(img, padding=padding, fill=fill, padding_mode=padding_mode) + + +def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor: + """Crop the given image at specified location and output size. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + If image size is smaller than output size along any edge, image is padded with 0 and then cropped. + + Args: + img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. + top (int): Vertical component of the top left corner of the crop box. + left (int): Horizontal component of the top left corner of the crop box. + height (int): Height of the crop box. + width (int): Width of the crop box. + + Returns: + PIL Image or Tensor: Cropped image. + """ + + if not isinstance(img, torch.Tensor): + return F_pil.crop(img, top, left, height, width) + + return F_t.crop(img, top, left, height, width) + + +def center_crop(img: Tensor, output_size: List[int]) -> Tensor: + """Crops the given image at the center. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + If image size is smaller than output size along any edge, image is padded with 0 and then center cropped. + + Args: + img (PIL Image or Tensor): Image to be cropped. + output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int, + it is used for both directions. + + Returns: + PIL Image or Tensor: Cropped image. + """ + if isinstance(output_size, numbers.Number): + output_size = (int(output_size), int(output_size)) + elif isinstance(output_size, (tuple, list)) and len(output_size) == 1: + output_size = (output_size[0], output_size[0]) + + image_width, image_height = get_image_size(img) + crop_height, crop_width = output_size + + if crop_width > image_width or crop_height > image_height: + padding_ltrb = [ + (crop_width - image_width) // 2 if crop_width > image_width else 0, + (crop_height - image_height) // 2 if crop_height > image_height else 0, + (crop_width - image_width + 1) // 2 if crop_width > image_width else 0, + (crop_height - image_height + 1) // 2 if crop_height > image_height else 0, + ] + img = pad(img, padding_ltrb, fill=0) # PIL uses fill value 0 + image_width, image_height = get_image_size(img) + if crop_width == image_width and crop_height == image_height: + return img + + crop_top = int(round((image_height - crop_height) / 2.0)) + crop_left = int(round((image_width - crop_width) / 2.0)) + return crop(img, crop_top, crop_left, crop_height, crop_width) + + +def resized_crop( + img: Tensor, + top: int, + left: int, + height: int, + width: int, + size: List[int], + interpolation: InterpolationMode = InterpolationMode.BILINEAR, +) -> Tensor: + """Crop the given image and resize it to desired size. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions + + Notably used in :class:`~torchvision.transforms.RandomResizedCrop`. + + Args: + img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. + top (int): Vertical component of the top left corner of the crop box. + left (int): Horizontal component of the top left corner of the crop box. + height (int): Height of the crop box. + width (int): Width of the crop box. + size (sequence or int): Desired output size. Same semantics as ``resize``. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. + Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``, + ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. + + Returns: + PIL Image or Tensor: Cropped image. + """ + img = crop(img, top, left, height, width) + img = resize(img, size, interpolation) + return img + + +def hflip(img: Tensor) -> Tensor: + """Horizontally flip the given image. + + Args: + img (PIL Image or Tensor): Image to be flipped. If img + is a Tensor, it is expected to be in [..., H, W] format, + where ... means it can have an arbitrary number of leading + dimensions. + + Returns: + PIL Image or Tensor: Horizontally flipped image. + """ + if not isinstance(img, torch.Tensor): + return F_pil.hflip(img) + + return F_t.hflip(img) + + +def _get_perspective_coeffs(startpoints: List[List[int]], endpoints: List[List[int]]) -> List[float]: + """Helper function to get the coefficients (a, b, c, d, e, f, g, h) for the perspective transforms. + + In Perspective Transform each pixel (x, y) in the original image gets transformed as, + (x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) ) + + Args: + startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners + ``[top-left, top-right, bottom-right, bottom-left]`` of the original image. + endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners + ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image. + + Returns: + octuple (a, b, c, d, e, f, g, h) for transforming each pixel. + """ + a_matrix = torch.zeros(2 * len(startpoints), 8, dtype=torch.float) + + for i, (p1, p2) in enumerate(zip(endpoints, startpoints)): + a_matrix[2 * i, :] = torch.tensor([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]]) + a_matrix[2 * i + 1, :] = torch.tensor([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]]) + + b_matrix = torch.tensor(startpoints, dtype=torch.float).view(8) + res = torch.linalg.lstsq(a_matrix, b_matrix, driver="gels").solution + + output: List[float] = res.tolist() + return output + + +def perspective( + img: Tensor, + startpoints: List[List[int]], + endpoints: List[List[int]], + interpolation: InterpolationMode = InterpolationMode.BILINEAR, + fill: Optional[List[float]] = None, +) -> Tensor: + """Perform perspective transform of the given image. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + img (PIL Image or Tensor): Image to be transformed. + startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners + ``[top-left, top-right, bottom-right, bottom-left]`` of the original image. + endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners + ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. + + .. note:: + In torchscript mode single int/float value is not supported, please use a sequence + of length 1: ``[value, ]``. + + Returns: + PIL Image or Tensor: transformed Image. + """ + + coeffs = _get_perspective_coeffs(startpoints, endpoints) + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn( + "Argument interpolation should be of type InterpolationMode instead of int. " + "Please, use InterpolationMode enum." + ) + interpolation = _interpolation_modes_from_int(interpolation) + + if not isinstance(interpolation, InterpolationMode): + raise TypeError("Argument interpolation should be a InterpolationMode") + + if not isinstance(img, torch.Tensor): + pil_interpolation = pil_modes_mapping[interpolation] + return F_pil.perspective(img, coeffs, interpolation=pil_interpolation, fill=fill) + + return F_t.perspective(img, coeffs, interpolation=interpolation.value, fill=fill) + + +def vflip(img: Tensor) -> Tensor: + """Vertically flip the given image. + + Args: + img (PIL Image or Tensor): Image to be flipped. If img + is a Tensor, it is expected to be in [..., H, W] format, + where ... means it can have an arbitrary number of leading + dimensions. + + Returns: + PIL Image or Tensor: Vertically flipped image. + """ + if not isinstance(img, torch.Tensor): + return F_pil.vflip(img) + + return F_t.vflip(img) + + +def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: + """Crop the given image into four corners and the central crop. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions + + .. Note:: + This transform returns a tuple of images and there may be a + mismatch in the number of inputs and targets your ``Dataset`` returns. + + Args: + img (PIL Image or Tensor): Image to be cropped. + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). + + Returns: + tuple: tuple (tl, tr, bl, br, center) + Corresponding top left, top right, bottom left, bottom right and center crop. + """ + if isinstance(size, numbers.Number): + size = (int(size), int(size)) + elif isinstance(size, (tuple, list)) and len(size) == 1: + size = (size[0], size[0]) + + if len(size) != 2: + raise ValueError("Please provide only two dimensions (h, w) for size.") + + image_width, image_height = get_image_size(img) + crop_height, crop_width = size + if crop_width > image_width or crop_height > image_height: + msg = "Requested crop size {} is bigger than input size {}" + raise ValueError(msg.format(size, (image_height, image_width))) + + tl = crop(img, 0, 0, crop_height, crop_width) + tr = crop(img, 0, image_width - crop_width, crop_height, crop_width) + bl = crop(img, image_height - crop_height, 0, crop_height, crop_width) + br = crop(img, image_height - crop_height, image_width - crop_width, crop_height, crop_width) + + center = center_crop(img, [crop_height, crop_width]) + + return tl, tr, bl, br, center + + +def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[Tensor]: + """Generate ten cropped images from the given image. + Crop the given image into four corners and the central crop plus the + flipped version of these (horizontal flipping is used by default). + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions + + .. Note:: + This transform returns a tuple of images and there may be a + mismatch in the number of inputs and targets your ``Dataset`` returns. + + Args: + img (PIL Image or Tensor): Image to be cropped. + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). + vertical_flip (bool): Use vertical flipping instead of horizontal + + Returns: + tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip) + Corresponding top left, top right, bottom left, bottom right and + center crop and same for the flipped image. + """ + if isinstance(size, numbers.Number): + size = (int(size), int(size)) + elif isinstance(size, (tuple, list)) and len(size) == 1: + size = (size[0], size[0]) + + if len(size) != 2: + raise ValueError("Please provide only two dimensions (h, w) for size.") + + first_five = five_crop(img, size) + + if vertical_flip: + img = vflip(img) + else: + img = hflip(img) + + second_five = five_crop(img, size) + return first_five + second_five + + +def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: + """Adjust brightness of an image. + + Args: + img (PIL Image or Tensor): Image to be adjusted. + If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + brightness_factor (float): How much to adjust the brightness. Can be + any non negative number. 0 gives a black image, 1 gives the + original image while 2 increases the brightness by a factor of 2. + + Returns: + PIL Image or Tensor: Brightness adjusted image. + """ + if not isinstance(img, torch.Tensor): + return F_pil.adjust_brightness(img, brightness_factor) + + return F_t.adjust_brightness(img, brightness_factor) + + +def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: + """Adjust contrast of an image. + + Args: + img (PIL Image or Tensor): Image to be adjusted. + If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + contrast_factor (float): How much to adjust the contrast. Can be any + non negative number. 0 gives a solid gray image, 1 gives the + original image while 2 increases the contrast by a factor of 2. + + Returns: + PIL Image or Tensor: Contrast adjusted image. + """ + if not isinstance(img, torch.Tensor): + return F_pil.adjust_contrast(img, contrast_factor) + + return F_t.adjust_contrast(img, contrast_factor) + + +def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: + """Adjust color saturation of an image. + + Args: + img (PIL Image or Tensor): Image to be adjusted. + If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + saturation_factor (float): How much to adjust the saturation. 0 will + give a black and white image, 1 will give the original image while + 2 will enhance the saturation by a factor of 2. + + Returns: + PIL Image or Tensor: Saturation adjusted image. + """ + if not isinstance(img, torch.Tensor): + return F_pil.adjust_saturation(img, saturation_factor) + + return F_t.adjust_saturation(img, saturation_factor) + + +def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: + """Adjust hue of an image. + + The image hue is adjusted by converting the image to HSV and + cyclically shifting the intensities in the hue channel (H). + The image is then converted back to original image mode. + + `hue_factor` is the amount of shift in H channel and must be in the + interval `[-0.5, 0.5]`. + + See `Hue`_ for more details. + + .. _Hue: https://en.wikipedia.org/wiki/Hue + + Args: + img (PIL Image or Tensor): Image to be adjusted. + If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + If img is PIL Image mode "1", "I", "F" and modes with transparency (alpha channel) are not supported. + hue_factor (float): How much to shift the hue channel. Should be in + [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in + HSV space in positive and negative direction respectively. + 0 means no shift. Therefore, both -0.5 and 0.5 will give an image + with complementary colors while 0 gives the original image. + + Returns: + PIL Image or Tensor: Hue adjusted image. + """ + if not isinstance(img, torch.Tensor): + return F_pil.adjust_hue(img, hue_factor) + + return F_t.adjust_hue(img, hue_factor) + + +def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor: + r"""Perform gamma correction on an image. + + Also known as Power Law Transform. Intensities in RGB mode are adjusted + based on the following equation: + + .. math:: + I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma} + + See `Gamma Correction`_ for more details. + + .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction + + Args: + img (PIL Image or Tensor): PIL Image to be adjusted. + If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + If img is PIL Image, modes with transparency (alpha channel) are not supported. + gamma (float): Non negative real number, same as :math:`\gamma` in the equation. + gamma larger than 1 make the shadows darker, + while gamma smaller than 1 make dark regions lighter. + gain (float): The constant multiplier. + Returns: + PIL Image or Tensor: Gamma correction adjusted image. + """ + if not isinstance(img, torch.Tensor): + return F_pil.adjust_gamma(img, gamma, gain) + + return F_t.adjust_gamma(img, gamma, gain) + + +def _get_inverse_affine_matrix( + center: List[float], angle: float, translate: List[float], scale: float, shear: List[float] +) -> List[float]: + # Helper method to compute inverse matrix for affine transformation + + # As it is explained in PIL.Image.rotate + # We need compute INVERSE of affine transformation matrix: M = T * C * RSS * C^-1 + # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1] + # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1] + # RSS is rotation with scale and shear matrix + # RSS(a, s, (sx, sy)) = + # = R(a) * S(s) * SHy(sy) * SHx(sx) + # = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(x)/cos(y) - sin(a)), 0 ] + # [ s*sin(a + sy)/cos(sy), s*(-sin(a - sy)*tan(x)/cos(y) + cos(a)), 0 ] + # [ 0 , 0 , 1 ] + # + # where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears: + # SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0] + # [0, 1 ] [-tan(s), 1] + # + # Thus, the inverse is M^-1 = C * RSS^-1 * C^-1 * T^-1 + + rot = math.radians(angle) + sx = math.radians(shear[0]) + sy = math.radians(shear[1]) + + cx, cy = center + tx, ty = translate + + # RSS without scaling + a = math.cos(rot - sy) / math.cos(sy) + b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot) + c = math.sin(rot - sy) / math.cos(sy) + d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot) + + # Inverted rotation matrix with scale and shear + # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1 + matrix = [d, -b, 0.0, -c, a, 0.0] + matrix = [x / scale for x in matrix] + + # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1 + matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty) + matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty) + + # Apply center translation: C * RSS^-1 * C^-1 * T^-1 + matrix[2] += cx + matrix[5] += cy + + return matrix + + +def rotate( + img: Tensor, + angle: float, + interpolation: InterpolationMode = InterpolationMode.NEAREST, + expand: bool = False, + center: Optional[List[int]] = None, + fill: Optional[List[float]] = None, + resample: Optional[int] = None, +) -> Tensor: + """Rotate the image by angle. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + img (PIL Image or Tensor): image to be rotated. + angle (number): rotation angle value in degrees, counter-clockwise. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. + expand (bool, optional): Optional expansion flag. + If true, expands the output image to make it large enough to hold the entire rotated image. + If false or omitted, make the output image the same size as the input image. + Note that the expand flag assumes rotation around the center and no translation. + center (sequence, optional): Optional center of rotation. Origin is the upper left corner. + Default is the center of the image. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. + + .. note:: + In torchscript mode single int/float value is not supported, please use a sequence + of length 1: ``[value, ]``. + + Returns: + PIL Image or Tensor: Rotated image. + + .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters + + """ + if resample is not None: + warnings.warn( + "Argument resample is deprecated and will be removed since v0.10.0. Please, use interpolation instead" + ) + interpolation = _interpolation_modes_from_int(resample) + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn( + "Argument interpolation should be of type InterpolationMode instead of int. " + "Please, use InterpolationMode enum." + ) + interpolation = _interpolation_modes_from_int(interpolation) + + if not isinstance(angle, (int, float)): + raise TypeError("Argument angle should be int or float") + + if center is not None and not isinstance(center, (list, tuple)): + raise TypeError("Argument center should be a sequence") + + if not isinstance(interpolation, InterpolationMode): + raise TypeError("Argument interpolation should be a InterpolationMode") + + if not isinstance(img, torch.Tensor): + pil_interpolation = pil_modes_mapping[interpolation] + return F_pil.rotate(img, angle=angle, interpolation=pil_interpolation, expand=expand, center=center, fill=fill) + + center_f = [0.0, 0.0] + if center is not None: + img_size = get_image_size(img) + # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center. + center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, img_size)] + + # due to current incoherence of rotation angle direction between affine and rotate implementations + # we need to set -angle. + matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0]) + return F_t.rotate(img, matrix=matrix, interpolation=interpolation.value, expand=expand, fill=fill) + + +def affine( + img: Tensor, + angle: float, + translate: List[int], + scale: float, + shear: List[float], + interpolation: InterpolationMode = InterpolationMode.NEAREST, + fill: Optional[List[float]] = None, + resample: Optional[int] = None, + fillcolor: Optional[List[float]] = None, +) -> Tensor: + """Apply affine transformation on the image keeping image center invariant. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + img (PIL Image or Tensor): image to transform. + angle (number): rotation angle in degrees between -180 and 180, clockwise direction. + translate (sequence of integers): horizontal and vertical translations (post-rotation translation) + scale (float): overall scale + shear (float or sequence): shear angle value in degrees between -180 to 180, clockwise direction. + If a sequence is specified, the first value corresponds to a shear parallel to the x axis, while + the second value corresponds to a shear parallel to the y axis. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. + + .. note:: + In torchscript mode single int/float value is not supported, please use a sequence + of length 1: ``[value, ]``. + fillcolor (sequence, int, float): deprecated argument and will be removed since v0.10.0. + Please use the ``fill`` parameter instead. + resample (int, optional): deprecated argument and will be removed since v0.10.0. + Please use the ``interpolation`` parameter instead. + + Returns: + PIL Image or Tensor: Transformed image. + """ + if resample is not None: + warnings.warn( + "Argument resample is deprecated and will be removed since v0.10.0. Please, use interpolation instead" + ) + interpolation = _interpolation_modes_from_int(resample) + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn( + "Argument interpolation should be of type InterpolationMode instead of int. " + "Please, use InterpolationMode enum." + ) + interpolation = _interpolation_modes_from_int(interpolation) + + if fillcolor is not None: + warnings.warn("Argument fillcolor is deprecated and will be removed since v0.10.0. Please, use fill instead") + fill = fillcolor + + if not isinstance(angle, (int, float)): + raise TypeError("Argument angle should be int or float") + + if not isinstance(translate, (list, tuple)): + raise TypeError("Argument translate should be a sequence") + + if len(translate) != 2: + raise ValueError("Argument translate should be a sequence of length 2") + + if scale <= 0.0: + raise ValueError("Argument scale should be positive") + + if not isinstance(shear, (numbers.Number, (list, tuple))): + raise TypeError("Shear should be either a single value or a sequence of two values") + + if not isinstance(interpolation, InterpolationMode): + raise TypeError("Argument interpolation should be a InterpolationMode") + + if isinstance(angle, int): + angle = float(angle) + + if isinstance(translate, tuple): + translate = list(translate) + + if isinstance(shear, numbers.Number): + shear = [shear, 0.0] + + if isinstance(shear, tuple): + shear = list(shear) + + if len(shear) == 1: + shear = [shear[0], shear[0]] + + if len(shear) != 2: + raise ValueError(f"Shear should be a sequence containing two values. Got {shear}") + + img_size = get_image_size(img) + if not isinstance(img, torch.Tensor): + # center = (img_size[0] * 0.5 + 0.5, img_size[1] * 0.5 + 0.5) + # it is visually better to estimate the center without 0.5 offset + # otherwise image rotated by 90 degrees is shifted vs output image of torch.rot90 or F_t.affine + center = [img_size[0] * 0.5, img_size[1] * 0.5] + matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear) + pil_interpolation = pil_modes_mapping[interpolation] + return F_pil.affine(img, matrix=matrix, interpolation=pil_interpolation, fill=fill) + + translate_f = [1.0 * t for t in translate] + matrix = _get_inverse_affine_matrix([0.0, 0.0], angle, translate_f, scale, shear) + return F_t.affine(img, matrix=matrix, interpolation=interpolation.value, fill=fill) + + +@torch.jit.unused +def to_grayscale(img, num_output_channels=1): + """Convert PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image. + This transform does not support torch Tensor. + + Args: + img (PIL Image): PIL Image to be converted to grayscale. + num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default is 1. + + Returns: + PIL Image: Grayscale version of the image. + + - if num_output_channels = 1 : returned image is single channel + - if num_output_channels = 3 : returned image is 3 channel with r = g = b + """ + if isinstance(img, Image.Image): + return F_pil.to_grayscale(img, num_output_channels) + + raise TypeError("Input should be PIL Image") + + +def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: + """Convert RGB image to grayscale version of image. + If the image is torch Tensor, it is expected + to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions + + Note: + Please, note that this method supports only RGB images as input. For inputs in other color spaces, + please, consider using meth:`~torchvision.transforms.functional.to_grayscale` with PIL Image. + + Args: + img (PIL Image or Tensor): RGB Image to be converted to grayscale. + num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1. + + Returns: + PIL Image or Tensor: Grayscale version of the image. + + - if num_output_channels = 1 : returned image is single channel + - if num_output_channels = 3 : returned image is 3 channel with r = g = b + """ + if not isinstance(img, torch.Tensor): + return F_pil.to_grayscale(img, num_output_channels) + + return F_t.rgb_to_grayscale(img, num_output_channels) + + +def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool = False) -> Tensor: + """Erase the input Tensor Image with given value. + This transform does not support PIL Image. + + Args: + img (Tensor Image): Tensor image of size (C, H, W) to be erased + i (int): i in (i,j) i.e coordinates of the upper left corner. + j (int): j in (i,j) i.e coordinates of the upper left corner. + h (int): Height of the erased region. + w (int): Width of the erased region. + v: Erasing value. + inplace(bool, optional): For in-place operations. By default is set False. + + Returns: + Tensor Image: Erased image. + """ + if not isinstance(img, torch.Tensor): + raise TypeError(f"img should be Tensor Image. Got {type(img)}") + + if not inplace: + img = img.clone() + + img[..., i : i + h, j : j + w] = v + return img + + +def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Tensor: + """Performs Gaussian blurring on the image by given kernel. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + img (PIL Image or Tensor): Image to be blurred + kernel_size (sequence of ints or int): Gaussian kernel size. Can be a sequence of integers + like ``(kx, ky)`` or a single integer for square kernels. + + .. note:: + In torchscript mode kernel_size as single int is not supported, use a sequence of + length 1: ``[ksize, ]``. + sigma (sequence of floats or float, optional): Gaussian kernel standard deviation. Can be a + sequence of floats like ``(sigma_x, sigma_y)`` or a single float to define the + same sigma in both X/Y directions. If None, then it is computed using + ``kernel_size`` as ``sigma = 0.3 * ((kernel_size - 1) * 0.5 - 1) + 0.8``. + Default, None. + + .. note:: + In torchscript mode sigma as single float is + not supported, use a sequence of length 1: ``[sigma, ]``. + + Returns: + PIL Image or Tensor: Gaussian Blurred version of the image. + """ + if not isinstance(kernel_size, (int, list, tuple)): + raise TypeError(f"kernel_size should be int or a sequence of integers. Got {type(kernel_size)}") + if isinstance(kernel_size, int): + kernel_size = [kernel_size, kernel_size] + if len(kernel_size) != 2: + raise ValueError(f"If kernel_size is a sequence its length should be 2. Got {len(kernel_size)}") + for ksize in kernel_size: + if ksize % 2 == 0 or ksize < 0: + raise ValueError(f"kernel_size should have odd and positive integers. Got {kernel_size}") + + if sigma is None: + sigma = [ksize * 0.15 + 0.35 for ksize in kernel_size] + + if sigma is not None and not isinstance(sigma, (int, float, list, tuple)): + raise TypeError(f"sigma should be either float or sequence of floats. Got {type(sigma)}") + if isinstance(sigma, (int, float)): + sigma = [float(sigma), float(sigma)] + if isinstance(sigma, (list, tuple)) and len(sigma) == 1: + sigma = [sigma[0], sigma[0]] + if len(sigma) != 2: + raise ValueError(f"If sigma is a sequence, its length should be 2. Got {len(sigma)}") + for s in sigma: + if s <= 0.0: + raise ValueError(f"sigma should have positive values. Got {sigma}") + + t_img = img + if not isinstance(img, torch.Tensor): + if not F_pil._is_pil_image(img): + raise TypeError(f"img should be PIL Image or Tensor. Got {type(img)}") + + t_img = to_tensor(img) + + output = F_t.gaussian_blur(t_img, kernel_size, sigma) + + if not isinstance(img, torch.Tensor): + output = to_pil_image(output) + return output + + +def invert(img: Tensor) -> Tensor: + """Invert the colors of an RGB/grayscale image. + + Args: + img (PIL Image or Tensor): Image to have its colors inverted. + If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". + + Returns: + PIL Image or Tensor: Color inverted image. + """ + if not isinstance(img, torch.Tensor): + return F_pil.invert(img) + + return F_t.invert(img) + + +def posterize(img: Tensor, bits: int) -> Tensor: + """Posterize an image by reducing the number of bits for each color channel. + + Args: + img (PIL Image or Tensor): Image to have its colors posterized. + If img is torch Tensor, it should be of type torch.uint8 and + it is expected to be in [..., 1 or 3, H, W] format, where ... means + it can have an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". + bits (int): The number of bits to keep for each channel (0-8). + Returns: + PIL Image or Tensor: Posterized image. + """ + if not (0 <= bits <= 8): + raise ValueError(f"The number if bits should be between 0 and 8. Got {bits}") + + if not isinstance(img, torch.Tensor): + return F_pil.posterize(img, bits) + + return F_t.posterize(img, bits) + + +def solarize(img: Tensor, threshold: float) -> Tensor: + """Solarize an RGB/grayscale image by inverting all pixel values above a threshold. + + Args: + img (PIL Image or Tensor): Image to have its colors inverted. + If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". + threshold (float): All pixels equal or above this value are inverted. + Returns: + PIL Image or Tensor: Solarized image. + """ + if not isinstance(img, torch.Tensor): + return F_pil.solarize(img, threshold) + + return F_t.solarize(img, threshold) + + +def adjust_sharpness(img: Tensor, sharpness_factor: float) -> Tensor: + """Adjust the sharpness of an image. + + Args: + img (PIL Image or Tensor): Image to be adjusted. + If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + sharpness_factor (float): How much to adjust the sharpness. Can be + any non negative number. 0 gives a blurred image, 1 gives the + original image while 2 increases the sharpness by a factor of 2. + + Returns: + PIL Image or Tensor: Sharpness adjusted image. + """ + if not isinstance(img, torch.Tensor): + return F_pil.adjust_sharpness(img, sharpness_factor) + + return F_t.adjust_sharpness(img, sharpness_factor) + + +def autocontrast(img: Tensor) -> Tensor: + """Maximize contrast of an image by remapping its + pixels per channel so that the lowest becomes black and the lightest + becomes white. + + Args: + img (PIL Image or Tensor): Image on which autocontrast is applied. + If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". + + Returns: + PIL Image or Tensor: An image that was autocontrasted. + """ + if not isinstance(img, torch.Tensor): + return F_pil.autocontrast(img) + + return F_t.autocontrast(img) + + +def equalize(img: Tensor) -> Tensor: + """Equalize the histogram of an image by applying + a non-linear mapping to the input in order to create a uniform + distribution of grayscale values in the output. + + Args: + img (PIL Image or Tensor): Image on which equalize is applied. + If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + The tensor dtype must be ``torch.uint8`` and values are expected to be in ``[0, 255]``. + If img is PIL Image, it is expected to be in mode "P", "L" or "RGB". + + Returns: + PIL Image or Tensor: An image that was equalized. + """ + if not isinstance(img, torch.Tensor): + return F_pil.equalize(img) + + return F_t.equalize(img) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py new file mode 100644 index 0000000000..7ce1fb6ab3 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py @@ -0,0 +1,906 @@ +import torch +import math +from PIL import Image, ImageOps, ImageEnhance, __version__ as PILLOW_VERSION +try: + import accimage +except ImportError: + accimage = None +import numpy as np +from numpy import sin, cos, tan +import numbers +from collections.abc import Sequence, Iterable +import warnings + + +def _is_pil_image(img): + if accimage is not None: + return isinstance(img, (Image.Image, accimage.Image)) + else: + return isinstance(img, Image.Image) + + +def _is_numpy(img): + return isinstance(img, np.ndarray) + + +def _is_numpy_image(img): + return img.ndim in {2, 3} + + +def to_tensor(pic): + """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. + + See ``ToTensor`` for more details. + + Args: + pic (PIL Image or numpy.ndarray): Image to be converted to tensor. + + Returns: + Tensor: Converted image. + """ + if not(_is_pil_image(pic) or _is_numpy(pic)): + raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic))) + + if _is_numpy(pic) and not _is_numpy_image(pic): + raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim)) + + if isinstance(pic, np.ndarray): + # handle numpy array + if pic.ndim == 2: + pic = pic[:, :, None] + + img = torch.from_numpy(pic.transpose((2, 0, 1))) + # backward compatibility + if isinstance(img, torch.ByteTensor): + return img.float().div(255) + else: + return img + + if accimage is not None and isinstance(pic, accimage.Image): + nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32) + pic.copyto(nppic) + return torch.from_numpy(nppic) + + # handle PIL Image + if pic.mode == 'I': + img = torch.from_numpy(np.array(pic, np.int32, copy=False)) + elif pic.mode == 'I;16': + img = torch.from_numpy(np.array(pic, np.int16, copy=False)) + elif pic.mode == 'F': + img = torch.from_numpy(np.array(pic, np.float32, copy=False)) + elif pic.mode == '1': + img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False)) + else: + img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) + + img = img.view(pic.size[1], pic.size[0], len(pic.getbands())) + # put it from HWC to CHW format + img = img.permute((2, 0, 1)).contiguous() + if isinstance(img, torch.ByteTensor): + return img.float().div(255) + else: + return img + + +def to_pil_image(pic, mode=None): + """Convert a tensor or an ndarray to PIL Image. + + See :class:`~torchvision.transforms.ToPILImage` for more details. + + Args: + pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. + mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). + + .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes + + Returns: + PIL Image: Image converted to PIL Image. + """ + if not(isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)): + raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic))) + + elif isinstance(pic, torch.Tensor): + if pic.ndimension() not in {2, 3}: + raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndimension())) + + elif pic.ndimension() == 2: + # if 2D image, add channel dimension (CHW) + pic = pic.unsqueeze(0) + + elif isinstance(pic, np.ndarray): + if pic.ndim not in {2, 3}: + raise ValueError('pic should be 2/3 dimensional. Got {} dimensions.'.format(pic.ndim)) + + elif pic.ndim == 2: + # if 2D image, add channel dimension (HWC) + pic = np.expand_dims(pic, 2) + + npimg = pic + if isinstance(pic, torch.FloatTensor) and mode != 'F': + pic = pic.mul(255).byte() + if isinstance(pic, torch.Tensor): + npimg = np.transpose(pic.numpy(), (1, 2, 0)) + + if not isinstance(npimg, np.ndarray): + raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' + + 'not {}'.format(type(npimg))) + + if npimg.shape[2] == 1: + expected_mode = None + npimg = npimg[:, :, 0] + if npimg.dtype == np.uint8: + expected_mode = 'L' + elif npimg.dtype == np.int16: + expected_mode = 'I;16' + elif npimg.dtype == np.int32: + expected_mode = 'I' + elif npimg.dtype == np.float32: + expected_mode = 'F' + if mode is not None and mode != expected_mode: + raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}" + .format(mode, np.dtype, expected_mode)) + mode = expected_mode + + elif npimg.shape[2] == 2: + permitted_2_channel_modes = ['LA'] + if mode is not None and mode not in permitted_2_channel_modes: + raise ValueError("Only modes {} are supported for 2D inputs".format(permitted_2_channel_modes)) + + if mode is None and npimg.dtype == np.uint8: + mode = 'LA' + + elif npimg.shape[2] == 4: + permitted_4_channel_modes = ['RGBA', 'CMYK', 'RGBX'] + if mode is not None and mode not in permitted_4_channel_modes: + raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes)) + + if mode is None and npimg.dtype == np.uint8: + mode = 'RGBA' + else: + permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV'] + if mode is not None and mode not in permitted_3_channel_modes: + raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes)) + if mode is None and npimg.dtype == np.uint8: + mode = 'RGB' + + if mode is None: + raise TypeError('Input type {} is not supported'.format(npimg.dtype)) + + return Image.fromarray(npimg, mode=mode) + + +def normalize(tensor, mean, std, inplace=False): + """Normalize a tensor image with mean and standard deviation. + + .. note:: + This transform acts out of place by default, i.e., it does not mutates the input tensor. + + See :class:`~torchvision.transforms.Normalize` for more details. + + Args: + tensor (Tensor): Tensor image of size (C, H, W) to be normalized. + mean (sequence): Sequence of means for each channel. + std (sequence): Sequence of standard deviations for each channel. + inplace(bool,optional): Bool to make this operation inplace. + + Returns: + Tensor: Normalized Tensor image. + """ + if not torch.is_tensor(tensor): + raise TypeError('tensor should be a torch tensor. Got {}.'.format(type(tensor))) + + if tensor.ndimension() != 3: + raise ValueError('Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = ' + '{}.'.format(tensor.size())) + + if not inplace: + tensor = tensor.clone() + + dtype = tensor.dtype + mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device) + std = torch.as_tensor(std, dtype=dtype, device=tensor.device) + if (std == 0).any(): + raise ValueError('std evaluated to zero after conversion to {}, leading to division by zero.'.format(dtype)) + if mean.ndim == 1: + mean = mean[:, None, None] + if std.ndim == 1: + std = std[:, None, None] + tensor.sub_(mean).div_(std) + return tensor + + +def resize(img, size, interpolation=Image.BILINEAR): + r"""Resize the input PIL Image to the given size. + + Args: + img (PIL Image): Image to be resized. + size (sequence or int): Desired output size. If size is a sequence like + (h, w), the output size will be matched to this. If size is an int, + the smaller edge of the image will be matched to this number maintaing + the aspect ratio. i.e, if height > width, then image will be rescaled to + :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)` + interpolation (int, optional): Desired interpolation. Default is + ``PIL.Image.BILINEAR`` + + Returns: + PIL Image: Resized image. + """ + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + if not (isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)): + raise TypeError('Got inappropriate size arg: {}'.format(size)) + + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +def scale(*args, **kwargs): + warnings.warn("The use of the transforms.Scale transform is deprecated, " + + "please use transforms.Resize instead.") + return resize(*args, **kwargs) + + +def pad(img, padding, fill=0, padding_mode='constant'): + r"""Pad the given PIL Image on all sides with specified padding mode and fill value. + + Args: + img (PIL Image): Image to be padded. + padding (int or tuple): Padding on each border. If a single int is provided this + is used to pad all borders. If tuple of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a tuple of length 4 is provided + this is the padding for the left, top, right and bottom borders + respectively. + fill: Pixel fill value for constant fill. Default is 0. If a tuple of + length 3, it is used to fill R, G, B channels respectively. + This value is only used when the padding_mode is constant + padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. + + - constant: pads with a constant value, this value is specified with fill + + - edge: pads with the last value on the edge of the image + + - reflect: pads with reflection of image (without repeating the last value on the edge) + + padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode + will result in [3, 2, 1, 2, 3, 4, 3, 2] + + - symmetric: pads with reflection of image (repeating the last value on the edge) + + padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode + will result in [2, 1, 1, 2, 3, 4, 4, 3] + + Returns: + PIL Image: Padded image. + """ + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + if not isinstance(padding, (numbers.Number, tuple)): + raise TypeError('Got inappropriate padding arg') + if not isinstance(fill, (numbers.Number, str, tuple)): + raise TypeError('Got inappropriate fill arg') + if not isinstance(padding_mode, str): + raise TypeError('Got inappropriate padding_mode arg') + + if isinstance(padding, Sequence) and len(padding) not in [2, 4]: + raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " + + "{} element tuple".format(len(padding))) + + assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'], \ + 'Padding mode should be either constant, edge, reflect or symmetric' + + if padding_mode == 'constant': + if img.mode == 'P': + palette = img.getpalette() + image = ImageOps.expand(img, border=padding, fill=fill) + image.putpalette(palette) + return image + + return ImageOps.expand(img, border=padding, fill=fill) + else: + if isinstance(padding, int): + pad_left = pad_right = pad_top = pad_bottom = padding + if isinstance(padding, Sequence) and len(padding) == 2: + pad_left = pad_right = padding[0] + pad_top = pad_bottom = padding[1] + if isinstance(padding, Sequence) and len(padding) == 4: + pad_left = padding[0] + pad_top = padding[1] + pad_right = padding[2] + pad_bottom = padding[3] + + if img.mode == 'P': + palette = img.getpalette() + img = np.asarray(img) + img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode) + img = Image.fromarray(img) + img.putpalette(palette) + return img + + img = np.asarray(img) + # RGB image + if len(img.shape) == 3: + img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), padding_mode) + # Grayscale image + if len(img.shape) == 2: + img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode) + + return Image.fromarray(img) + + +def crop(img, top, left, height, width): + """Crop the given PIL Image. + + Args: + img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image. + top (int): Vertical component of the top left corner of the crop box. + left (int): Horizontal component of the top left corner of the crop box. + height (int): Height of the crop box. + width (int): Width of the crop box. + + Returns: + PIL Image: Cropped image. + """ + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + return img.crop((left, top, left + width, top + height)) + + +def center_crop(img, output_size): + """Crop the given PIL Image and resize it to desired size. + + Args: + img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image. + output_size (sequence or int): (height, width) of the crop box. If int, + it is used for both directions + Returns: + PIL Image: Cropped image. + """ + if isinstance(output_size, numbers.Number): + output_size = (int(output_size), int(output_size)) + image_width, image_height = img.size + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return crop(img, crop_top, crop_left, crop_height, crop_width) + + +def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINEAR): + """Crop the given PIL Image and resize it to desired size. + + Notably used in :class:`~torchvision.transforms.RandomResizedCrop`. + + Args: + img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image. + top (int): Vertical component of the top left corner of the crop box. + left (int): Horizontal component of the top left corner of the crop box. + height (int): Height of the crop box. + width (int): Width of the crop box. + size (sequence or int): Desired output size. Same semantics as ``resize``. + interpolation (int, optional): Desired interpolation. Default is + ``PIL.Image.BILINEAR``. + Returns: + PIL Image: Cropped image. + """ + assert _is_pil_image(img), 'img should be PIL Image' + img = crop(img, top, left, height, width) + img = resize(img, size, interpolation) + return img + + +def hflip(img): + """Horizontally flip the given PIL Image. + + Args: + img (PIL Image): Image to be flipped. + + Returns: + PIL Image: Horizontall flipped image. + """ + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + return img.transpose(Image.FLIP_LEFT_RIGHT) + + +def _parse_fill(fill, img, min_pil_version): + """Helper function to get the fill color for rotate and perspective transforms. + + Args: + fill (n-tuple or int or float): Pixel fill value for area outside the transformed + image. If int or float, the value is used for all bands respectively. + Defaults to 0 for all bands. + img (PIL Image): Image to be filled. + min_pil_version (str): The minimum PILLOW version for when the ``fillcolor`` option + was first introduced in the calling function. (e.g. rotate->5.2.0, perspective->5.0.0) + + Returns: + dict: kwarg for ``fillcolor`` + """ + major_found, minor_found = (int(v) for v in PILLOW_VERSION.split('.')[:2]) + major_required, minor_required = (int(v) for v in min_pil_version.split('.')[:2]) + if major_found < major_required or (major_found == major_required and minor_found < minor_required): + if fill is None: + return {} + else: + msg = ("The option to fill background area of the transformed image, " + "requires pillow>={}") + raise RuntimeError(msg.format(min_pil_version)) + + num_bands = len(img.getbands()) + if fill is None: + fill = 0 + if isinstance(fill, (int, float)) and num_bands > 1: + fill = tuple([fill] * num_bands) + if not isinstance(fill, (int, float)) and len(fill) != num_bands: + msg = ("The number of elements in 'fill' does not match the number of " + "bands of the image ({} != {})") + raise ValueError(msg.format(len(fill), num_bands)) + + return {"fillcolor": fill} + + +def _get_perspective_coeffs(startpoints, endpoints): + """Helper function to get the coefficients (a, b, c, d, e, f, g, h) for the perspective transforms. + + In Perspective Transform each pixel (x, y) in the orignal image gets transformed as, + (x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) ) + + Args: + List containing [top-left, top-right, bottom-right, bottom-left] of the orignal image, + List containing [top-left, top-right, bottom-right, bottom-left] of the transformed + image + Returns: + octuple (a, b, c, d, e, f, g, h) for transforming each pixel. + """ + matrix = [] + + for p1, p2 in zip(endpoints, startpoints): + matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]]) + matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]]) + + A = torch.tensor(matrix, dtype=torch.float) + B = torch.tensor(startpoints, dtype=torch.float).view(8) + res = torch.lstsq(B, A)[0] + return res.squeeze_(1).tolist() + + +def perspective(img, startpoints, endpoints, interpolation=Image.BICUBIC, fill=None): + """Perform perspective transform of the given PIL Image. + + Args: + img (PIL Image): Image to be transformed. + startpoints: List containing [top-left, top-right, bottom-right, bottom-left] of the orignal image + endpoints: List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image + interpolation: Default- Image.BICUBIC + fill (n-tuple or int or float): Pixel fill value for area outside the rotated + image. If int or float, the value is used for all bands respectively. + This option is only available for ``pillow>=5.0.0``. + + Returns: + PIL Image: Perspectively transformed Image. + """ + + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + opts = _parse_fill(fill, img, '5.0.0') + + coeffs = _get_perspective_coeffs(startpoints, endpoints) + return img.transform(img.size, Image.PERSPECTIVE, coeffs, interpolation, **opts) + + +def vflip(img): + """Vertically flip the given PIL Image. + + Args: + img (PIL Image): Image to be flipped. + + Returns: + PIL Image: Vertically flipped image. + """ + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + return img.transpose(Image.FLIP_TOP_BOTTOM) + + +def five_crop(img, size): + """Crop the given PIL Image into four corners and the central crop. + + .. Note:: + This transform returns a tuple of images and there may be a + mismatch in the number of inputs and targets your ``Dataset`` returns. + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. + + Returns: + tuple: tuple (tl, tr, bl, br, center) + Corresponding top left, top right, bottom left, bottom right and center crop. + """ + if isinstance(size, numbers.Number): + size = (int(size), int(size)) + else: + assert len(size) == 2, "Please provide only two dimensions (h, w) for size." + + image_width, image_height = img.size + crop_height, crop_width = size + if crop_width > image_width or crop_height > image_height: + msg = "Requested crop size {} is bigger than input size {}" + raise ValueError(msg.format(size, (image_height, image_width))) + + tl = img.crop((0, 0, crop_width, crop_height)) + tr = img.crop((image_width - crop_width, 0, image_width, crop_height)) + bl = img.crop((0, image_height - crop_height, crop_width, image_height)) + br = img.crop((image_width - crop_width, image_height - crop_height, + image_width, image_height)) + center = center_crop(img, (crop_height, crop_width)) + return (tl, tr, bl, br, center) + + +def ten_crop(img, size, vertical_flip=False): + """Generate ten cropped images from the given PIL Image. + Crop the given PIL Image into four corners and the central crop plus the + flipped version of these (horizontal flipping is used by default). + + .. Note:: + This transform returns a tuple of images and there may be a + mismatch in the number of inputs and targets your ``Dataset`` returns. + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. + vertical_flip (bool): Use vertical flipping instead of horizontal + + Returns: + tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip) + Corresponding top left, top right, bottom left, bottom right and + center crop and same for the flipped image. + """ + if isinstance(size, numbers.Number): + size = (int(size), int(size)) + else: + assert len(size) == 2, "Please provide only two dimensions (h, w) for size." + + first_five = five_crop(img, size) + + if vertical_flip: + img = vflip(img) + else: + img = hflip(img) + + second_five = five_crop(img, size) + return first_five + second_five + + +def adjust_brightness(img, brightness_factor): + """Adjust brightness of an Image. + + Args: + img (PIL Image): PIL Image to be adjusted. + brightness_factor (float): How much to adjust the brightness. Can be + any non negative number. 0 gives a black image, 1 gives the + original image while 2 increases the brightness by a factor of 2. + + Returns: + PIL Image: Brightness adjusted image. + """ + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + enhancer = ImageEnhance.Brightness(img) + img = enhancer.enhance(brightness_factor) + return img + + +def adjust_contrast(img, contrast_factor): + """Adjust contrast of an Image. + + Args: + img (PIL Image): PIL Image to be adjusted. + contrast_factor (float): How much to adjust the contrast. Can be any + non negative number. 0 gives a solid gray image, 1 gives the + original image while 2 increases the contrast by a factor of 2. + + Returns: + PIL Image: Contrast adjusted image. + """ + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + enhancer = ImageEnhance.Contrast(img) + img = enhancer.enhance(contrast_factor) + return img + + +def adjust_saturation(img, saturation_factor): + """Adjust color saturation of an image. + + Args: + img (PIL Image): PIL Image to be adjusted. + saturation_factor (float): How much to adjust the saturation. 0 will + give a black and white image, 1 will give the original image while + 2 will enhance the saturation by a factor of 2. + + Returns: + PIL Image: Saturation adjusted image. + """ + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + enhancer = ImageEnhance.Color(img) + img = enhancer.enhance(saturation_factor) + return img + + +def adjust_hue(img, hue_factor): + """Adjust hue of an image. + + The image hue is adjusted by converting the image to HSV and + cyclically shifting the intensities in the hue channel (H). + The image is then converted back to original image mode. + + `hue_factor` is the amount of shift in H channel and must be in the + interval `[-0.5, 0.5]`. + + See `Hue`_ for more details. + + .. _Hue: https://en.wikipedia.org/wiki/Hue + + Args: + img (PIL Image): PIL Image to be adjusted. + hue_factor (float): How much to shift the hue channel. Should be in + [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in + HSV space in positive and negative direction respectively. + 0 means no shift. Therefore, both -0.5 and 0.5 will give an image + with complementary colors while 0 gives the original image. + + Returns: + PIL Image: Hue adjusted image. + """ + if not(-0.5 <= hue_factor <= 0.5): + raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor)) + + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + input_mode = img.mode + if input_mode in {'L', '1', 'I', 'F'}: + return img + + h, s, v = img.convert('HSV').split() + + np_h = np.array(h, dtype=np.uint8) + # uint8 addition take cares of rotation across boundaries + with np.errstate(over='ignore'): + np_h += np.uint8(hue_factor * 255) + h = Image.fromarray(np_h, 'L') + + img = Image.merge('HSV', (h, s, v)).convert(input_mode) + return img + + +def adjust_gamma(img, gamma, gain=1): + r"""Perform gamma correction on an image. + + Also known as Power Law Transform. Intensities in RGB mode are adjusted + based on the following equation: + + .. math:: + I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma} + + See `Gamma Correction`_ for more details. + + .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction + + Args: + img (PIL Image): PIL Image to be adjusted. + gamma (float): Non negative real number, same as :math:`\gamma` in the equation. + gamma larger than 1 make the shadows darker, + while gamma smaller than 1 make dark regions lighter. + gain (float): The constant multiplier. + """ + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + if gamma < 0: + raise ValueError('Gamma should be a non-negative real number') + + input_mode = img.mode + img = img.convert('RGB') + + gamma_map = [255 * gain * pow(ele / 255., gamma) for ele in range(256)] * 3 + img = img.point(gamma_map) # use PIL's point-function to accelerate this part + + img = img.convert(input_mode) + return img + + +def rotate(img, angle, resample=False, expand=False, center=None, fill=None): + """Rotate the image by angle. + + + Args: + img (PIL Image): PIL Image to be rotated. + angle (float or int): In degrees degrees counter clockwise order. + resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional): + An optional resampling filter. See `filters`_ for more information. + If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``. + expand (bool, optional): Optional expansion flag. + If true, expands the output image to make it large enough to hold the entire rotated image. + If false or omitted, make the output image the same size as the input image. + Note that the expand flag assumes rotation around the center and no translation. + center (2-tuple, optional): Optional center of rotation. + Origin is the upper left corner. + Default is the center of the image. + fill (n-tuple or int or float): Pixel fill value for area outside the rotated + image. If int or float, the value is used for all bands respectively. + Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``. + + .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters + + """ + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + opts = _parse_fill(fill, img, '5.2.0') + + return img.rotate(angle, resample, expand, center, **opts) + + +def _get_inverse_affine_matrix(center, angle, translate, scale, shear): + # Helper method to compute inverse matrix for affine transformation + + # As it is explained in PIL.Image.rotate + # We need compute INVERSE of affine transformation matrix: M = T * C * RSS * C^-1 + # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1] + # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1] + # RSS is rotation with scale and shear matrix + # RSS(a, s, (sx, sy)) = + # = R(a) * S(s) * SHy(sy) * SHx(sx) + # = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(x)/cos(y) - sin(a)), 0 ] + # [ s*sin(a + sy)/cos(sy), s*(-sin(a - sy)*tan(x)/cos(y) + cos(a)), 0 ] + # [ 0 , 0 , 1 ] + # + # where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears: + # SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0] + # [0, 1 ] [-tan(s), 1] + # + # Thus, the inverse is M^-1 = C * RSS^-1 * C^-1 * T^-1 + + if isinstance(shear, numbers.Number): + shear = [shear, 0] + + if not isinstance(shear, (tuple, list)) and len(shear) == 2: + raise ValueError( + "Shear should be a single value or a tuple/list containing " + + "two values. Got {}".format(shear)) + + rot = math.radians(angle) + sx, sy = [math.radians(s) for s in shear] + + cx, cy = center + tx, ty = translate + + # RSS without scaling + a = cos(rot - sy) / cos(sy) + b = -cos(rot - sy) * tan(sx) / cos(sy) - sin(rot) + c = sin(rot - sy) / cos(sy) + d = -sin(rot - sy) * tan(sx) / cos(sy) + cos(rot) + + # Inverted rotation matrix with scale and shear + # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1 + M = [d, -b, 0, + -c, a, 0] + M = [x / scale for x in M] + + # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1 + M[2] += M[0] * (-cx - tx) + M[1] * (-cy - ty) + M[5] += M[3] * (-cx - tx) + M[4] * (-cy - ty) + + # Apply center translation: C * RSS^-1 * C^-1 * T^-1 + M[2] += cx + M[5] += cy + return M + + +def affine(img, angle, translate, scale, shear, resample=0, fillcolor=None): + """Apply affine transformation on the image keeping image center invariant + + Args: + img (PIL Image): PIL Image to be rotated. + angle (float or int): rotation angle in degrees between -180 and 180, clockwise direction. + translate (list or tuple of integers): horizontal and vertical translations (post-rotation translation) + scale (float): overall scale + shear (float or tuple or list): shear angle value in degrees between -180 to 180, clockwise direction. + If a tuple of list is specified, the first value corresponds to a shear parallel to the x axis, while + the second value corresponds to a shear parallel to the y axis. + resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional): + An optional resampling filter. + See `filters`_ for more information. + If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``. + fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0) + """ + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + assert isinstance(translate, (tuple, list)) and len(translate) == 2, \ + "Argument translate should be a list or tuple of length 2" + + assert scale > 0.0, "Argument scale should be positive" + + output_size = img.size + center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5) + matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear) + kwargs = {"fillcolor": fillcolor} if int(PILLOW_VERSION.split('.')[0]) >= 5 else {} + return img.transform(output_size, Image.AFFINE, matrix, resample, **kwargs) + + +def to_grayscale(img, num_output_channels=1): + """Convert image to grayscale version of image. + + Args: + img (PIL Image): Image to be converted to grayscale. + + Returns: + PIL Image: Grayscale version of the image. + if num_output_channels = 1 : returned image is single channel + + if num_output_channels = 3 : returned image is 3 channel with r = g = b + """ + if not _is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + if num_output_channels == 1: + img = img.convert('L') + elif num_output_channels == 3: + img = img.convert('L') + np_img = np.array(img, dtype=np.uint8) + np_img = np.dstack([np_img, np_img, np_img]) + img = Image.fromarray(np_img, 'RGB') + else: + raise ValueError('num_output_channels should be either 1 or 3') + + return img + + +def erase(img, i, j, h, w, v, inplace=False): + """ Erase the input Tensor Image with given value. + + Args: + img (Tensor Image): Tensor image of size (C, H, W) to be erased + i (int): i in (i,j) i.e coordinates of the upper left corner. + j (int): j in (i,j) i.e coordinates of the upper left corner. + h (int): Height of the erased region. + w (int): Width of the erased region. + v: Erasing value. + inplace(bool, optional): For in-place operations. By default is set False. + + Returns: + Tensor Image: Erased image. + """ + if not isinstance(img, torch.Tensor): + raise TypeError('img should be Tensor Image. Got {}'.format(type(img))) + + if not inplace: + img = img.clone() + + img[:, i:i + h, j:j + w] = v + return img diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py new file mode 100644 index 0000000000..fdaf5f7de1 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py @@ -0,0 +1,399 @@ +import numbers +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union + +import numpy as np +import torch +from PIL import Image, ImageOps, ImageEnhance + +try: + import accimage +except ImportError: + accimage = None + + +@torch.jit.unused +def _is_pil_image(img: Any) -> bool: + if accimage is not None: + return isinstance(img, (Image.Image, accimage.Image)) + else: + return isinstance(img, Image.Image) + + +@torch.jit.unused +def get_image_size(img: Any) -> List[int]: + if _is_pil_image(img): + return list(img.size) + raise TypeError(f"Unexpected type {type(img)}") + + +@torch.jit.unused +def get_image_num_channels(img: Any) -> int: + if _is_pil_image(img): + return 1 if img.mode == "L" else 3 + raise TypeError(f"Unexpected type {type(img)}") + + +@torch.jit.unused +def hflip(img: Image.Image) -> Image.Image: + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + return img.transpose(Image.FLIP_LEFT_RIGHT) + + +@torch.jit.unused +def vflip(img: Image.Image) -> Image.Image: + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + return img.transpose(Image.FLIP_TOP_BOTTOM) + + +@torch.jit.unused +def adjust_brightness(img: Image.Image, brightness_factor: float) -> Image.Image: + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + enhancer = ImageEnhance.Brightness(img) + img = enhancer.enhance(brightness_factor) + return img + + +@torch.jit.unused +def adjust_contrast(img: Image.Image, contrast_factor: float) -> Image.Image: + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + enhancer = ImageEnhance.Contrast(img) + img = enhancer.enhance(contrast_factor) + return img + + +@torch.jit.unused +def adjust_saturation(img: Image.Image, saturation_factor: float) -> Image.Image: + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + enhancer = ImageEnhance.Color(img) + img = enhancer.enhance(saturation_factor) + return img + + +@torch.jit.unused +def adjust_hue(img: Image.Image, hue_factor: float) -> Image.Image: + if not (-0.5 <= hue_factor <= 0.5): + raise ValueError(f"hue_factor ({hue_factor}) is not in [-0.5, 0.5].") + + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + input_mode = img.mode + if input_mode in {"L", "1", "I", "F"}: + return img + + h, s, v = img.convert("HSV").split() + + np_h = np.array(h, dtype=np.uint8) + # uint8 addition take cares of rotation across boundaries + with np.errstate(over="ignore"): + np_h += np.uint8(hue_factor * 255) + h = Image.fromarray(np_h, "L") + + img = Image.merge("HSV", (h, s, v)).convert(input_mode) + return img + + +@torch.jit.unused +def adjust_gamma( + img: Image.Image, + gamma: float, + gain: float = 1.0, +) -> Image.Image: + + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + if gamma < 0: + raise ValueError("Gamma should be a non-negative real number") + + input_mode = img.mode + img = img.convert("RGB") + gamma_map = [(255 + 1 - 1e-3) * gain * pow(ele / 255.0, gamma) for ele in range(256)] * 3 + img = img.point(gamma_map) # use PIL's point-function to accelerate this part + + img = img.convert(input_mode) + return img + + +@torch.jit.unused +def pad( + img: Image.Image, + padding: Union[int, List[int], Tuple[int, ...]], + fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0, + padding_mode: str = "constant", +) -> Image.Image: + + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + if not isinstance(padding, (numbers.Number, tuple, list)): + raise TypeError("Got inappropriate padding arg") + if not isinstance(fill, (numbers.Number, str, tuple)): + raise TypeError("Got inappropriate fill arg") + if not isinstance(padding_mode, str): + raise TypeError("Got inappropriate padding_mode arg") + + if isinstance(padding, list): + padding = tuple(padding) + + if isinstance(padding, tuple) and len(padding) not in [1, 2, 4]: + raise ValueError(f"Padding must be an int or a 1, 2, or 4 element tuple, not a {len(padding)} element tuple") + + if isinstance(padding, tuple) and len(padding) == 1: + # Compatibility with `functional_tensor.pad` + padding = padding[0] + + if padding_mode not in ["constant", "edge", "reflect", "symmetric"]: + raise ValueError("Padding mode should be either constant, edge, reflect or symmetric") + + if padding_mode == "constant": + opts = _parse_fill(fill, img, name="fill") + if img.mode == "P": + palette = img.getpalette() + image = ImageOps.expand(img, border=padding, **opts) + image.putpalette(palette) + return image + + return ImageOps.expand(img, border=padding, **opts) + else: + if isinstance(padding, int): + pad_left = pad_right = pad_top = pad_bottom = padding + if isinstance(padding, tuple) and len(padding) == 2: + pad_left = pad_right = padding[0] + pad_top = pad_bottom = padding[1] + if isinstance(padding, tuple) and len(padding) == 4: + pad_left = padding[0] + pad_top = padding[1] + pad_right = padding[2] + pad_bottom = padding[3] + + p = [pad_left, pad_top, pad_right, pad_bottom] + cropping = -np.minimum(p, 0) + + if cropping.any(): + crop_left, crop_top, crop_right, crop_bottom = cropping + img = img.crop((crop_left, crop_top, img.width - crop_right, img.height - crop_bottom)) + + pad_left, pad_top, pad_right, pad_bottom = np.maximum(p, 0) + + if img.mode == "P": + palette = img.getpalette() + img = np.asarray(img) + img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode) + img = Image.fromarray(img) + img.putpalette(palette) + return img + + img = np.asarray(img) + # RGB image + if len(img.shape) == 3: + img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), padding_mode) + # Grayscale image + if len(img.shape) == 2: + img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode) + + return Image.fromarray(img) + + +@torch.jit.unused +def crop( + img: Image.Image, + top: int, + left: int, + height: int, + width: int, +) -> Image.Image: + + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + return img.crop((left, top, left + width, top + height)) + + +@torch.jit.unused +def resize( + img: Image.Image, + size: Union[Sequence[int], int], + interpolation: int = Image.BILINEAR, + max_size: Optional[int] = None, +) -> Image.Image: + + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))): + raise TypeError(f"Got inappropriate size arg: {size}") + + if isinstance(size, Sequence) and len(size) == 1: + size = size[0] + if isinstance(size, int): + w, h = img.size + + short, long = (w, h) if w <= h else (h, w) + if short == size: + return img + + new_short, new_long = size, int(size * long / short) + + if max_size is not None: + if max_size <= size: + raise ValueError( + f"max_size = {max_size} must be strictly greater than the requested " + f"size for the smaller edge size = {size}" + ) + if new_long > max_size: + new_short, new_long = int(max_size * new_short / new_long), max_size + + new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short) + return img.resize((new_w, new_h), interpolation) + else: + if max_size is not None: + raise ValueError( + "max_size should only be passed if size specifies the length of the smaller edge, " + "i.e. size should be an int or a sequence of length 1 in torchscript mode." + ) + return img.resize(size[::-1], interpolation) + + +@torch.jit.unused +def _parse_fill( + fill: Optional[Union[float, List[float], Tuple[float, ...]]], + img: Image.Image, + name: str = "fillcolor", +) -> Dict[str, Optional[Union[float, List[float], Tuple[float, ...]]]]: + + # Process fill color for affine transforms + num_bands = len(img.getbands()) + if fill is None: + fill = 0 + if isinstance(fill, (int, float)) and num_bands > 1: + fill = tuple([fill] * num_bands) + if isinstance(fill, (list, tuple)): + if len(fill) != num_bands: + msg = "The number of elements in 'fill' does not match the number of bands of the image ({} != {})" + raise ValueError(msg.format(len(fill), num_bands)) + + fill = tuple(fill) + + return {name: fill} + + +@torch.jit.unused +def affine( + img: Image.Image, + matrix: List[float], + interpolation: int = Image.NEAREST, + fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0, +) -> Image.Image: + + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + output_size = img.size + opts = _parse_fill(fill, img) + return img.transform(output_size, Image.AFFINE, matrix, interpolation, **opts) + + +@torch.jit.unused +def rotate( + img: Image.Image, + angle: float, + interpolation: int = Image.NEAREST, + expand: bool = False, + center: Optional[Tuple[int, int]] = None, + fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0, +) -> Image.Image: + + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + opts = _parse_fill(fill, img) + return img.rotate(angle, interpolation, expand, center, **opts) + + +@torch.jit.unused +def perspective( + img: Image.Image, + perspective_coeffs: float, + interpolation: int = Image.BICUBIC, + fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0, +) -> Image.Image: + + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + opts = _parse_fill(fill, img) + + return img.transform(img.size, Image.PERSPECTIVE, perspective_coeffs, interpolation, **opts) + + +@torch.jit.unused +def to_grayscale(img: Image.Image, num_output_channels: int) -> Image.Image: + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + if num_output_channels == 1: + img = img.convert("L") + elif num_output_channels == 3: + img = img.convert("L") + np_img = np.array(img, dtype=np.uint8) + np_img = np.dstack([np_img, np_img, np_img]) + img = Image.fromarray(np_img, "RGB") + else: + raise ValueError("num_output_channels should be either 1 or 3") + + return img + + +@torch.jit.unused +def invert(img: Image.Image) -> Image.Image: + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + return ImageOps.invert(img) + + +@torch.jit.unused +def posterize(img: Image.Image, bits: int) -> Image.Image: + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + return ImageOps.posterize(img, bits) + + +@torch.jit.unused +def solarize(img: Image.Image, threshold: int) -> Image.Image: + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + return ImageOps.solarize(img, threshold) + + +@torch.jit.unused +def adjust_sharpness(img: Image.Image, sharpness_factor: float) -> Image.Image: + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + + enhancer = ImageEnhance.Sharpness(img) + img = enhancer.enhance(sharpness_factor) + return img + + +@torch.jit.unused +def autocontrast(img: Image.Image) -> Image.Image: + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + return ImageOps.autocontrast(img) + + +@torch.jit.unused +def equalize(img: Image.Image) -> Image.Image: + if not _is_pil_image(img): + raise TypeError(f"img should be PIL Image. Got {type(img)}") + return ImageOps.equalize(img) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py new file mode 100644 index 0000000000..09ae726931 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py @@ -0,0 +1,987 @@ +import warnings +from typing import Optional, Tuple, List + +import torch +from torch import Tensor +from torch.jit.annotations import BroadcastingList2 +from torch.nn.functional import grid_sample, conv2d, interpolate, pad as torch_pad + + +def _is_tensor_a_torch_image(x: Tensor) -> bool: + return x.ndim >= 2 + + +def _assert_image_tensor(img: Tensor) -> None: + if not _is_tensor_a_torch_image(img): + raise TypeError("Tensor is not a torch image.") + + +def get_image_size(img: Tensor) -> List[int]: + # Returns (w, h) of tensor image + _assert_image_tensor(img) + return [img.shape[-1], img.shape[-2]] + + +def get_image_num_channels(img: Tensor) -> int: + if img.ndim == 2: + return 1 + elif img.ndim > 2: + return img.shape[-3] + + raise TypeError(f"Input ndim should be 2 or more. Got {img.ndim}") + + +def _max_value(dtype: torch.dtype) -> float: + # TODO: replace this method with torch.iinfo when it gets torchscript support. + # https://github.com/pytorch/pytorch/issues/41492 + + a = torch.tensor(2, dtype=dtype) + signed = 1 if torch.tensor(0, dtype=dtype).is_signed() else 0 + bits = 1 + max_value = torch.tensor(-signed, dtype=torch.long) + while True: + next_value = a.pow(bits - signed).sub(1) + if next_value > max_value: + max_value = next_value + bits *= 2 + else: + break + return max_value.item() + + +def _assert_channels(img: Tensor, permitted: List[int]) -> None: + c = get_image_num_channels(img) + if c not in permitted: + raise TypeError(f"Input image tensor permitted channel values are {permitted}, but found {c}") + + +def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor: + if image.dtype == dtype: + return image + + if image.is_floating_point(): + + # TODO: replace with dtype.is_floating_point when torchscript supports it + if torch.tensor(0, dtype=dtype).is_floating_point(): + return image.to(dtype) + + # float to int + if (image.dtype == torch.float32 and dtype in (torch.int32, torch.int64)) or ( + image.dtype == torch.float64 and dtype == torch.int64 + ): + msg = f"The cast from {image.dtype} to {dtype} cannot be performed safely." + raise RuntimeError(msg) + + # https://github.com/pytorch/vision/pull/2078#issuecomment-612045321 + # For data in the range 0-1, (float * 255).to(uint) is only 255 + # when float is exactly 1.0. + # `max + 1 - epsilon` provides more evenly distributed mapping of + # ranges of floats to ints. + eps = 1e-3 + max_val = _max_value(dtype) + result = image.mul(max_val + 1.0 - eps) + return result.to(dtype) + else: + input_max = _max_value(image.dtype) + + # int to float + # TODO: replace with dtype.is_floating_point when torchscript supports it + if torch.tensor(0, dtype=dtype).is_floating_point(): + image = image.to(dtype) + return image / input_max + + output_max = _max_value(dtype) + + # int to int + if input_max > output_max: + # factor should be forced to int for torch jit script + # otherwise factor is a float and image // factor can produce different results + factor = int((input_max + 1) // (output_max + 1)) + image = torch.div(image, factor, rounding_mode="floor") + return image.to(dtype) + else: + # factor should be forced to int for torch jit script + # otherwise factor is a float and image * factor can produce different results + factor = int((output_max + 1) // (input_max + 1)) + image = image.to(dtype) + return image * factor + + +def vflip(img: Tensor) -> Tensor: + _assert_image_tensor(img) + + return img.flip(-2) + + +def hflip(img: Tensor) -> Tensor: + _assert_image_tensor(img) + + return img.flip(-1) + + +def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor: + _assert_image_tensor(img) + + w, h = get_image_size(img) + right = left + width + bottom = top + height + + if left < 0 or top < 0 or right > w or bottom > h: + padding_ltrb = [max(-left, 0), max(-top, 0), max(right - w, 0), max(bottom - h, 0)] + return pad(img[..., max(top, 0) : bottom, max(left, 0) : right], padding_ltrb, fill=0) + return img[..., top:bottom, left:right] + + +def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: + if img.ndim < 3: + raise TypeError(f"Input image tensor should have at least 3 dimensions, but found {img.ndim}") + _assert_channels(img, [3]) + + if num_output_channels not in (1, 3): + raise ValueError("num_output_channels should be either 1 or 3") + + r, g, b = img.unbind(dim=-3) + # This implementation closely follows the TF one: + # https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/ops/image_ops_impl.py#L2105-L2138 + l_img = (0.2989 * r + 0.587 * g + 0.114 * b).to(img.dtype) + l_img = l_img.unsqueeze(dim=-3) + + if num_output_channels == 3: + return l_img.expand(img.shape) + + return l_img + + +def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: + if brightness_factor < 0: + raise ValueError(f"brightness_factor ({brightness_factor}) is not non-negative.") + + _assert_image_tensor(img) + + _assert_channels(img, [1, 3]) + + return _blend(img, torch.zeros_like(img), brightness_factor) + + +def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: + if contrast_factor < 0: + raise ValueError(f"contrast_factor ({contrast_factor}) is not non-negative.") + + _assert_image_tensor(img) + + _assert_channels(img, [3, 1]) + c = get_image_num_channels(img) + dtype = img.dtype if torch.is_floating_point(img) else torch.float32 + if c == 3: + mean = torch.mean(rgb_to_grayscale(img).to(dtype), dim=(-3, -2, -1), keepdim=True) + else: + mean = torch.mean(img.to(dtype), dim=(-3, -2, -1), keepdim=True) + + return _blend(img, mean, contrast_factor) + + +def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: + if not (-0.5 <= hue_factor <= 0.5): + raise ValueError(f"hue_factor ({hue_factor}) is not in [-0.5, 0.5].") + + if not (isinstance(img, torch.Tensor)): + raise TypeError("Input img should be Tensor image") + + _assert_image_tensor(img) + + _assert_channels(img, [1, 3]) + if get_image_num_channels(img) == 1: # Match PIL behaviour + return img + + orig_dtype = img.dtype + if img.dtype == torch.uint8: + img = img.to(dtype=torch.float32) / 255.0 + + img = _rgb2hsv(img) + h, s, v = img.unbind(dim=-3) + h = (h + hue_factor) % 1.0 + img = torch.stack((h, s, v), dim=-3) + img_hue_adj = _hsv2rgb(img) + + if orig_dtype == torch.uint8: + img_hue_adj = (img_hue_adj * 255.0).to(dtype=orig_dtype) + + return img_hue_adj + + +def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: + if saturation_factor < 0: + raise ValueError(f"saturation_factor ({saturation_factor}) is not non-negative.") + + _assert_image_tensor(img) + + _assert_channels(img, [1, 3]) + + if get_image_num_channels(img) == 1: # Match PIL behaviour + return img + + return _blend(img, rgb_to_grayscale(img), saturation_factor) + + +def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor: + if not isinstance(img, torch.Tensor): + raise TypeError("Input img should be a Tensor.") + + _assert_channels(img, [1, 3]) + + if gamma < 0: + raise ValueError("Gamma should be a non-negative real number") + + result = img + dtype = img.dtype + if not torch.is_floating_point(img): + result = convert_image_dtype(result, torch.float32) + + result = (gain * result ** gamma).clamp(0, 1) + + result = convert_image_dtype(result, dtype) + return result + + +def center_crop(img: Tensor, output_size: BroadcastingList2[int]) -> Tensor: + """DEPRECATED""" + warnings.warn( + "This method is deprecated and will be removed in future releases. Please, use ``F.center_crop`` instead." + ) + + _assert_image_tensor(img) + + _, image_width, image_height = img.size() + crop_height, crop_width = output_size + # crop_top = int(round((image_height - crop_height) / 2.)) + # Result can be different between python func and scripted func + # Temporary workaround: + crop_top = int((image_height - crop_height + 1) * 0.5) + # crop_left = int(round((image_width - crop_width) / 2.)) + # Result can be different between python func and scripted func + # Temporary workaround: + crop_left = int((image_width - crop_width + 1) * 0.5) + + return crop(img, crop_top, crop_left, crop_height, crop_width) + + +def five_crop(img: Tensor, size: BroadcastingList2[int]) -> List[Tensor]: + """DEPRECATED""" + warnings.warn( + "This method is deprecated and will be removed in future releases. Please, use ``F.five_crop`` instead." + ) + + _assert_image_tensor(img) + + assert len(size) == 2, "Please provide only two dimensions (h, w) for size." + + _, image_width, image_height = img.size() + crop_height, crop_width = size + if crop_width > image_width or crop_height > image_height: + msg = "Requested crop size {} is bigger than input size {}" + raise ValueError(msg.format(size, (image_height, image_width))) + + tl = crop(img, 0, 0, crop_width, crop_height) + tr = crop(img, image_width - crop_width, 0, image_width, crop_height) + bl = crop(img, 0, image_height - crop_height, crop_width, image_height) + br = crop(img, image_width - crop_width, image_height - crop_height, image_width, image_height) + center = center_crop(img, (crop_height, crop_width)) + + return [tl, tr, bl, br, center] + + +def ten_crop(img: Tensor, size: BroadcastingList2[int], vertical_flip: bool = False) -> List[Tensor]: + """DEPRECATED""" + warnings.warn( + "This method is deprecated and will be removed in future releases. Please, use ``F.ten_crop`` instead." + ) + + _assert_image_tensor(img) + + assert len(size) == 2, "Please provide only two dimensions (h, w) for size." + first_five = five_crop(img, size) + + if vertical_flip: + img = vflip(img) + else: + img = hflip(img) + + second_five = five_crop(img, size) + + return first_five + second_five + + +def _blend(img1: Tensor, img2: Tensor, ratio: float) -> Tensor: + ratio = float(ratio) + bound = 1.0 if img1.is_floating_point() else 255.0 + return (ratio * img1 + (1.0 - ratio) * img2).clamp(0, bound).to(img1.dtype) + + +def _rgb2hsv(img: Tensor) -> Tensor: + r, g, b = img.unbind(dim=-3) + + # Implementation is based on https://github.com/python-pillow/Pillow/blob/4174d4267616897df3746d315d5a2d0f82c656ee/ + # src/libImaging/Convert.c#L330 + maxc = torch.max(img, dim=-3).values + minc = torch.min(img, dim=-3).values + + # The algorithm erases S and H channel where `maxc = minc`. This avoids NaN + # from happening in the results, because + # + S channel has division by `maxc`, which is zero only if `maxc = minc` + # + H channel has division by `(maxc - minc)`. + # + # Instead of overwriting NaN afterwards, we just prevent it from occuring so + # we don't need to deal with it in case we save the NaN in a buffer in + # backprop, if it is ever supported, but it doesn't hurt to do so. + eqc = maxc == minc + + cr = maxc - minc + # Since `eqc => cr = 0`, replacing denominator with 1 when `eqc` is fine. + ones = torch.ones_like(maxc) + s = cr / torch.where(eqc, ones, maxc) + # Note that `eqc => maxc = minc = r = g = b`. So the following calculation + # of `h` would reduce to `bc - gc + 2 + rc - bc + 4 + rc - bc = 6` so it + # would not matter what values `rc`, `gc`, and `bc` have here, and thus + # replacing denominator with 1 when `eqc` is fine. + cr_divisor = torch.where(eqc, ones, cr) + rc = (maxc - r) / cr_divisor + gc = (maxc - g) / cr_divisor + bc = (maxc - b) / cr_divisor + + hr = (maxc == r) * (bc - gc) + hg = ((maxc == g) & (maxc != r)) * (2.0 + rc - bc) + hb = ((maxc != g) & (maxc != r)) * (4.0 + gc - rc) + h = hr + hg + hb + h = torch.fmod((h / 6.0 + 1.0), 1.0) + return torch.stack((h, s, maxc), dim=-3) + + +def _hsv2rgb(img: Tensor) -> Tensor: + h, s, v = img.unbind(dim=-3) + i = torch.floor(h * 6.0) + f = (h * 6.0) - i + i = i.to(dtype=torch.int32) + + p = torch.clamp((v * (1.0 - s)), 0.0, 1.0) + q = torch.clamp((v * (1.0 - s * f)), 0.0, 1.0) + t = torch.clamp((v * (1.0 - s * (1.0 - f))), 0.0, 1.0) + i = i % 6 + + mask = i.unsqueeze(dim=-3) == torch.arange(6, device=i.device).view(-1, 1, 1) + + a1 = torch.stack((v, q, p, p, t, v), dim=-3) + a2 = torch.stack((t, v, v, q, p, p), dim=-3) + a3 = torch.stack((p, p, t, v, v, q), dim=-3) + a4 = torch.stack((a1, a2, a3), dim=-4) + + return torch.einsum("...ijk, ...xijk -> ...xjk", mask.to(dtype=img.dtype), a4) + + +def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor: + # padding is left, right, top, bottom + + # crop if needed + if padding[0] < 0 or padding[1] < 0 or padding[2] < 0 or padding[3] < 0: + neg_min_padding = [-min(x, 0) for x in padding] + crop_left, crop_right, crop_top, crop_bottom = neg_min_padding + img = img[..., crop_top : img.shape[-2] - crop_bottom, crop_left : img.shape[-1] - crop_right] + padding = [max(x, 0) for x in padding] + + in_sizes = img.size() + + _x_indices = [i for i in range(in_sizes[-1])] # [0, 1, 2, 3, ...] + left_indices = [i for i in range(padding[0] - 1, -1, -1)] # e.g. [3, 2, 1, 0] + right_indices = [-(i + 1) for i in range(padding[1])] # e.g. [-1, -2, -3] + x_indices = torch.tensor(left_indices + _x_indices + right_indices, device=img.device) + + _y_indices = [i for i in range(in_sizes[-2])] + top_indices = [i for i in range(padding[2] - 1, -1, -1)] + bottom_indices = [-(i + 1) for i in range(padding[3])] + y_indices = torch.tensor(top_indices + _y_indices + bottom_indices, device=img.device) + + ndim = img.ndim + if ndim == 3: + return img[:, y_indices[:, None], x_indices[None, :]] + elif ndim == 4: + return img[:, :, y_indices[:, None], x_indices[None, :]] + else: + raise RuntimeError("Symmetric padding of N-D tensors are not supported yet") + + +def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor: + _assert_image_tensor(img) + + if not isinstance(padding, (int, tuple, list)): + raise TypeError("Got inappropriate padding arg") + if not isinstance(fill, (int, float)): + raise TypeError("Got inappropriate fill arg") + if not isinstance(padding_mode, str): + raise TypeError("Got inappropriate padding_mode arg") + + if isinstance(padding, tuple): + padding = list(padding) + + if isinstance(padding, list) and len(padding) not in [1, 2, 4]: + raise ValueError(f"Padding must be an int or a 1, 2, or 4 element tuple, not a {len(padding)} element tuple") + + if padding_mode not in ["constant", "edge", "reflect", "symmetric"]: + raise ValueError("Padding mode should be either constant, edge, reflect or symmetric") + + if isinstance(padding, int): + if torch.jit.is_scripting(): + # This maybe unreachable + raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]") + pad_left = pad_right = pad_top = pad_bottom = padding + elif len(padding) == 1: + pad_left = pad_right = pad_top = pad_bottom = padding[0] + elif len(padding) == 2: + pad_left = pad_right = padding[0] + pad_top = pad_bottom = padding[1] + else: + pad_left = padding[0] + pad_top = padding[1] + pad_right = padding[2] + pad_bottom = padding[3] + + p = [pad_left, pad_right, pad_top, pad_bottom] + + if padding_mode == "edge": + # remap padding_mode str + padding_mode = "replicate" + elif padding_mode == "symmetric": + # route to another implementation + return _pad_symmetric(img, p) + + need_squeeze = False + if img.ndim < 4: + img = img.unsqueeze(dim=0) + need_squeeze = True + + out_dtype = img.dtype + need_cast = False + if (padding_mode != "constant") and img.dtype not in (torch.float32, torch.float64): + # Here we temporary cast input tensor to float + # until pytorch issue is resolved : + # https://github.com/pytorch/pytorch/issues/40763 + need_cast = True + img = img.to(torch.float32) + + img = torch_pad(img, p, mode=padding_mode, value=float(fill)) + + if need_squeeze: + img = img.squeeze(dim=0) + + if need_cast: + img = img.to(out_dtype) + + return img + + +def resize( + img: Tensor, + size: List[int], + interpolation: str = "bilinear", + max_size: Optional[int] = None, + antialias: Optional[bool] = None, +) -> Tensor: + _assert_image_tensor(img) + + if not isinstance(size, (int, tuple, list)): + raise TypeError("Got inappropriate size arg") + if not isinstance(interpolation, str): + raise TypeError("Got inappropriate interpolation arg") + + if interpolation not in ["nearest", "bilinear", "bicubic"]: + raise ValueError("This interpolation mode is unsupported with Tensor input") + + if isinstance(size, tuple): + size = list(size) + + if isinstance(size, list): + if len(size) not in [1, 2]: + raise ValueError( + f"Size must be an int or a 1 or 2 element tuple/list, not a {len(size)} element tuple/list" + ) + if max_size is not None and len(size) != 1: + raise ValueError( + "max_size should only be passed if size specifies the length of the smaller edge, " + "i.e. size should be an int or a sequence of length 1 in torchscript mode." + ) + + if antialias is None: + antialias = False + + if antialias and interpolation not in ["bilinear", "bicubic"]: + raise ValueError("Antialias option is supported for bilinear and bicubic interpolation modes only") + + w, h = get_image_size(img) + + if isinstance(size, int) or len(size) == 1: # specified size only for the smallest edge + short, long = (w, h) if w <= h else (h, w) + requested_new_short = size if isinstance(size, int) else size[0] + + if short == requested_new_short: + return img + + new_short, new_long = requested_new_short, int(requested_new_short * long / short) + + if max_size is not None: + if max_size <= requested_new_short: + raise ValueError( + f"max_size = {max_size} must be strictly greater than the requested " + f"size for the smaller edge size = {size}" + ) + if new_long > max_size: + new_short, new_long = int(max_size * new_short / new_long), max_size + + new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short) + + else: # specified both h and w + new_w, new_h = size[1], size[0] + + img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(img, [torch.float32, torch.float64]) + + # Define align_corners to avoid warnings + align_corners = False if interpolation in ["bilinear", "bicubic"] else None + + if antialias: + if interpolation == "bilinear": + img = torch.ops.torchvision._interpolate_bilinear2d_aa(img, [new_h, new_w], align_corners=False) + elif interpolation == "bicubic": + img = torch.ops.torchvision._interpolate_bicubic2d_aa(img, [new_h, new_w], align_corners=False) + else: + img = interpolate(img, size=[new_h, new_w], mode=interpolation, align_corners=align_corners) + + if interpolation == "bicubic" and out_dtype == torch.uint8: + img = img.clamp(min=0, max=255) + + img = _cast_squeeze_out(img, need_cast=need_cast, need_squeeze=need_squeeze, out_dtype=out_dtype) + + return img + + +def _assert_grid_transform_inputs( + img: Tensor, + matrix: Optional[List[float]], + interpolation: str, + fill: Optional[List[float]], + supported_interpolation_modes: List[str], + coeffs: Optional[List[float]] = None, +) -> None: + + if not (isinstance(img, torch.Tensor)): + raise TypeError("Input img should be Tensor") + + _assert_image_tensor(img) + + if matrix is not None and not isinstance(matrix, list): + raise TypeError("Argument matrix should be a list") + + if matrix is not None and len(matrix) != 6: + raise ValueError("Argument matrix should have 6 float values") + + if coeffs is not None and len(coeffs) != 8: + raise ValueError("Argument coeffs should have 8 float values") + + if fill is not None and not isinstance(fill, (int, float, tuple, list)): + warnings.warn("Argument fill should be either int, float, tuple or list") + + # Check fill + num_channels = get_image_num_channels(img) + if isinstance(fill, (tuple, list)) and (len(fill) > 1 and len(fill) != num_channels): + msg = ( + "The number of elements in 'fill' cannot broadcast to match the number of " + "channels of the image ({} != {})" + ) + raise ValueError(msg.format(len(fill), num_channels)) + + if interpolation not in supported_interpolation_modes: + raise ValueError(f"Interpolation mode '{interpolation}' is unsupported with Tensor input") + + +def _cast_squeeze_in(img: Tensor, req_dtypes: List[torch.dtype]) -> Tuple[Tensor, bool, bool, torch.dtype]: + need_squeeze = False + # make image NCHW + if img.ndim < 4: + img = img.unsqueeze(dim=0) + need_squeeze = True + + out_dtype = img.dtype + need_cast = False + if out_dtype not in req_dtypes: + need_cast = True + req_dtype = req_dtypes[0] + img = img.to(req_dtype) + return img, need_cast, need_squeeze, out_dtype + + +def _cast_squeeze_out(img: Tensor, need_cast: bool, need_squeeze: bool, out_dtype: torch.dtype) -> Tensor: + if need_squeeze: + img = img.squeeze(dim=0) + + if need_cast: + if out_dtype in (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64): + # it is better to round before cast + img = torch.round(img) + img = img.to(out_dtype) + + return img + + +def _apply_grid_transform(img: Tensor, grid: Tensor, mode: str, fill: Optional[List[float]]) -> Tensor: + + img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in( + img, + [ + grid.dtype, + ], + ) + + if img.shape[0] > 1: + # Apply same grid to a batch of images + grid = grid.expand(img.shape[0], grid.shape[1], grid.shape[2], grid.shape[3]) + + # Append a dummy mask for customized fill colors, should be faster than grid_sample() twice + if fill is not None: + dummy = torch.ones((img.shape[0], 1, img.shape[2], img.shape[3]), dtype=img.dtype, device=img.device) + img = torch.cat((img, dummy), dim=1) + + img = grid_sample(img, grid, mode=mode, padding_mode="zeros", align_corners=False) + + # Fill with required color + if fill is not None: + mask = img[:, -1:, :, :] # N * 1 * H * W + img = img[:, :-1, :, :] # N * C * H * W + mask = mask.expand_as(img) + len_fill = len(fill) if isinstance(fill, (tuple, list)) else 1 + fill_img = torch.tensor(fill, dtype=img.dtype, device=img.device).view(1, len_fill, 1, 1).expand_as(img) + if mode == "nearest": + mask = mask < 0.5 + img[mask] = fill_img[mask] + else: # 'bilinear' + img = img * mask + (1.0 - mask) * fill_img + + img = _cast_squeeze_out(img, need_cast, need_squeeze, out_dtype) + return img + + +def _gen_affine_grid( + theta: Tensor, + w: int, + h: int, + ow: int, + oh: int, +) -> Tensor: + # https://github.com/pytorch/pytorch/blob/74b65c32be68b15dc7c9e8bb62459efbfbde33d8/aten/src/ATen/native/ + # AffineGridGenerator.cpp#L18 + # Difference with AffineGridGenerator is that: + # 1) we normalize grid values after applying theta + # 2) we can normalize by other image size, such that it covers "extend" option like in PIL.Image.rotate + + d = 0.5 + base_grid = torch.empty(1, oh, ow, 3, dtype=theta.dtype, device=theta.device) + x_grid = torch.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, steps=ow, device=theta.device) + base_grid[..., 0].copy_(x_grid) + y_grid = torch.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, steps=oh, device=theta.device).unsqueeze_(-1) + base_grid[..., 1].copy_(y_grid) + base_grid[..., 2].fill_(1) + + rescaled_theta = theta.transpose(1, 2) / torch.tensor([0.5 * w, 0.5 * h], dtype=theta.dtype, device=theta.device) + output_grid = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta) + return output_grid.view(1, oh, ow, 2) + + +def affine( + img: Tensor, matrix: List[float], interpolation: str = "nearest", fill: Optional[List[float]] = None +) -> Tensor: + _assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"]) + + dtype = img.dtype if torch.is_floating_point(img) else torch.float32 + theta = torch.tensor(matrix, dtype=dtype, device=img.device).reshape(1, 2, 3) + shape = img.shape + # grid will be generated on the same device as theta and img + grid = _gen_affine_grid(theta, w=shape[-1], h=shape[-2], ow=shape[-1], oh=shape[-2]) + return _apply_grid_transform(img, grid, interpolation, fill=fill) + + +def _compute_output_size(matrix: List[float], w: int, h: int) -> Tuple[int, int]: + + # Inspired of PIL implementation: + # https://github.com/python-pillow/Pillow/blob/11de3318867e4398057373ee9f12dcb33db7335c/src/PIL/Image.py#L2054 + + # pts are Top-Left, Top-Right, Bottom-Left, Bottom-Right points. + pts = torch.tensor( + [ + [-0.5 * w, -0.5 * h, 1.0], + [-0.5 * w, 0.5 * h, 1.0], + [0.5 * w, 0.5 * h, 1.0], + [0.5 * w, -0.5 * h, 1.0], + ] + ) + theta = torch.tensor(matrix, dtype=torch.float).reshape(1, 2, 3) + new_pts = pts.view(1, 4, 3).bmm(theta.transpose(1, 2)).view(4, 2) + min_vals, _ = new_pts.min(dim=0) + max_vals, _ = new_pts.max(dim=0) + + # Truncate precision to 1e-4 to avoid ceil of Xe-15 to 1.0 + tol = 1e-4 + cmax = torch.ceil((max_vals / tol).trunc_() * tol) + cmin = torch.floor((min_vals / tol).trunc_() * tol) + size = cmax - cmin + return int(size[0]), int(size[1]) + + +def rotate( + img: Tensor, + matrix: List[float], + interpolation: str = "nearest", + expand: bool = False, + fill: Optional[List[float]] = None, +) -> Tensor: + _assert_grid_transform_inputs(img, matrix, interpolation, fill, ["nearest", "bilinear"]) + w, h = img.shape[-1], img.shape[-2] + ow, oh = _compute_output_size(matrix, w, h) if expand else (w, h) + dtype = img.dtype if torch.is_floating_point(img) else torch.float32 + theta = torch.tensor(matrix, dtype=dtype, device=img.device).reshape(1, 2, 3) + # grid will be generated on the same device as theta and img + grid = _gen_affine_grid(theta, w=w, h=h, ow=ow, oh=oh) + + return _apply_grid_transform(img, grid, interpolation, fill=fill) + + +def _perspective_grid(coeffs: List[float], ow: int, oh: int, dtype: torch.dtype, device: torch.device) -> Tensor: + # https://github.com/python-pillow/Pillow/blob/4634eafe3c695a014267eefdce830b4a825beed7/ + # src/libImaging/Geometry.c#L394 + + # + # x_out = (coeffs[0] * x + coeffs[1] * y + coeffs[2]) / (coeffs[6] * x + coeffs[7] * y + 1) + # y_out = (coeffs[3] * x + coeffs[4] * y + coeffs[5]) / (coeffs[6] * x + coeffs[7] * y + 1) + # + theta1 = torch.tensor( + [[[coeffs[0], coeffs[1], coeffs[2]], [coeffs[3], coeffs[4], coeffs[5]]]], dtype=dtype, device=device + ) + theta2 = torch.tensor([[[coeffs[6], coeffs[7], 1.0], [coeffs[6], coeffs[7], 1.0]]], dtype=dtype, device=device) + + d = 0.5 + base_grid = torch.empty(1, oh, ow, 3, dtype=dtype, device=device) + x_grid = torch.linspace(d, ow * 1.0 + d - 1.0, steps=ow, device=device) + base_grid[..., 0].copy_(x_grid) + y_grid = torch.linspace(d, oh * 1.0 + d - 1.0, steps=oh, device=device).unsqueeze_(-1) + base_grid[..., 1].copy_(y_grid) + base_grid[..., 2].fill_(1) + + rescaled_theta1 = theta1.transpose(1, 2) / torch.tensor([0.5 * ow, 0.5 * oh], dtype=dtype, device=device) + output_grid1 = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta1) + output_grid2 = base_grid.view(1, oh * ow, 3).bmm(theta2.transpose(1, 2)) + + output_grid = output_grid1 / output_grid2 - 1.0 + return output_grid.view(1, oh, ow, 2) + + +def perspective( + img: Tensor, perspective_coeffs: List[float], interpolation: str = "bilinear", fill: Optional[List[float]] = None +) -> Tensor: + if not (isinstance(img, torch.Tensor)): + raise TypeError("Input img should be Tensor.") + + _assert_image_tensor(img) + + _assert_grid_transform_inputs( + img, + matrix=None, + interpolation=interpolation, + fill=fill, + supported_interpolation_modes=["nearest", "bilinear"], + coeffs=perspective_coeffs, + ) + + ow, oh = img.shape[-1], img.shape[-2] + dtype = img.dtype if torch.is_floating_point(img) else torch.float32 + grid = _perspective_grid(perspective_coeffs, ow=ow, oh=oh, dtype=dtype, device=img.device) + return _apply_grid_transform(img, grid, interpolation, fill=fill) + + +def _get_gaussian_kernel1d(kernel_size: int, sigma: float) -> Tensor: + ksize_half = (kernel_size - 1) * 0.5 + + x = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) + pdf = torch.exp(-0.5 * (x / sigma).pow(2)) + kernel1d = pdf / pdf.sum() + + return kernel1d + + +def _get_gaussian_kernel2d( + kernel_size: List[int], sigma: List[float], dtype: torch.dtype, device: torch.device +) -> Tensor: + kernel1d_x = _get_gaussian_kernel1d(kernel_size[0], sigma[0]).to(device, dtype=dtype) + kernel1d_y = _get_gaussian_kernel1d(kernel_size[1], sigma[1]).to(device, dtype=dtype) + kernel2d = torch.mm(kernel1d_y[:, None], kernel1d_x[None, :]) + return kernel2d + + +def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: List[float]) -> Tensor: + if not (isinstance(img, torch.Tensor)): + raise TypeError(f"img should be Tensor. Got {type(img)}") + + _assert_image_tensor(img) + + dtype = img.dtype if torch.is_floating_point(img) else torch.float32 + kernel = _get_gaussian_kernel2d(kernel_size, sigma, dtype=dtype, device=img.device) + kernel = kernel.expand(img.shape[-3], 1, kernel.shape[0], kernel.shape[1]) + + img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in( + img, + [ + kernel.dtype, + ], + ) + + # padding = (left, right, top, bottom) + padding = [kernel_size[0] // 2, kernel_size[0] // 2, kernel_size[1] // 2, kernel_size[1] // 2] + img = torch_pad(img, padding, mode="reflect") + img = conv2d(img, kernel, groups=img.shape[-3]) + + img = _cast_squeeze_out(img, need_cast, need_squeeze, out_dtype) + return img + + +def invert(img: Tensor) -> Tensor: + + _assert_image_tensor(img) + + if img.ndim < 3: + raise TypeError(f"Input image tensor should have at least 3 dimensions, but found {img.ndim}") + + _assert_channels(img, [1, 3]) + + bound = torch.tensor(1 if img.is_floating_point() else 255, dtype=img.dtype, device=img.device) + return bound - img + + +def posterize(img: Tensor, bits: int) -> Tensor: + + _assert_image_tensor(img) + + if img.ndim < 3: + raise TypeError(f"Input image tensor should have at least 3 dimensions, but found {img.ndim}") + if img.dtype != torch.uint8: + raise TypeError(f"Only torch.uint8 image tensors are supported, but found {img.dtype}") + + _assert_channels(img, [1, 3]) + mask = -int(2 ** (8 - bits)) # JIT-friendly for: ~(2 ** (8 - bits) - 1) + return img & mask + + +def solarize(img: Tensor, threshold: float) -> Tensor: + + _assert_image_tensor(img) + + if img.ndim < 3: + raise TypeError(f"Input image tensor should have at least 3 dimensions, but found {img.ndim}") + + _assert_channels(img, [1, 3]) + + inverted_img = invert(img) + return torch.where(img >= threshold, inverted_img, img) + + +def _blurred_degenerate_image(img: Tensor) -> Tensor: + dtype = img.dtype if torch.is_floating_point(img) else torch.float32 + + kernel = torch.ones((3, 3), dtype=dtype, device=img.device) + kernel[1, 1] = 5.0 + kernel /= kernel.sum() + kernel = kernel.expand(img.shape[-3], 1, kernel.shape[0], kernel.shape[1]) + + result_tmp, need_cast, need_squeeze, out_dtype = _cast_squeeze_in( + img, + [ + kernel.dtype, + ], + ) + result_tmp = conv2d(result_tmp, kernel, groups=result_tmp.shape[-3]) + result_tmp = _cast_squeeze_out(result_tmp, need_cast, need_squeeze, out_dtype) + + result = img.clone() + result[..., 1:-1, 1:-1] = result_tmp + + return result + + +def adjust_sharpness(img: Tensor, sharpness_factor: float) -> Tensor: + if sharpness_factor < 0: + raise ValueError(f"sharpness_factor ({sharpness_factor}) is not non-negative.") + + _assert_image_tensor(img) + + _assert_channels(img, [1, 3]) + + if img.size(-1) <= 2 or img.size(-2) <= 2: + return img + + return _blend(img, _blurred_degenerate_image(img), sharpness_factor) + + +def autocontrast(img: Tensor) -> Tensor: + + _assert_image_tensor(img) + + if img.ndim < 3: + raise TypeError(f"Input image tensor should have at least 3 dimensions, but found {img.ndim}") + + _assert_channels(img, [1, 3]) + + bound = 1.0 if img.is_floating_point() else 255.0 + dtype = img.dtype if torch.is_floating_point(img) else torch.float32 + + minimum = img.amin(dim=(-2, -1), keepdim=True).to(dtype) + maximum = img.amax(dim=(-2, -1), keepdim=True).to(dtype) + eq_idxs = torch.where(minimum == maximum)[0] + minimum[eq_idxs] = 0 + maximum[eq_idxs] = bound + scale = bound / (maximum - minimum) + + return ((img - minimum) * scale).clamp(0, bound).to(img.dtype) + + +def _scale_channel(img_chan: Tensor) -> Tensor: + # TODO: we should expect bincount to always be faster than histc, but this + # isn't always the case. Once + # https://github.com/pytorch/pytorch/issues/53194 is fixed, remove the if + # block and only use bincount. + if img_chan.is_cuda: + hist = torch.histc(img_chan.to(torch.float32), bins=256, min=0, max=255) + else: + hist = torch.bincount(img_chan.view(-1), minlength=256) + + nonzero_hist = hist[hist != 0] + step = torch.div(nonzero_hist[:-1].sum(), 255, rounding_mode="floor") + if step == 0: + return img_chan + + lut = torch.div(torch.cumsum(hist, 0) + torch.div(step, 2, rounding_mode="floor"), step, rounding_mode="floor") + lut = torch.nn.functional.pad(lut, [1, 0])[:-1].clamp(0, 255) + + return lut[img_chan.to(torch.int64)].to(torch.uint8) + + +def _equalize_single_image(img: Tensor) -> Tensor: + return torch.stack([_scale_channel(img[c]) for c in range(img.size(0))]) + + +def equalize(img: Tensor) -> Tensor: + + _assert_image_tensor(img) + + if not (3 <= img.ndim <= 4): + raise TypeError(f"Input image tensor should have 3 or 4 dimensions, but found {img.ndim}") + if img.dtype != torch.uint8: + raise TypeError(f"Only torch.uint8 image tensors are supported, but found {img.dtype}") + + _assert_channels(img, [1, 3]) + + if img.ndim == 3: + return _equalize_single_image(img) + + return torch.stack([_equalize_single_image(x) for x in img]) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py new file mode 100644 index 0000000000..b81deed6d4 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py @@ -0,0 +1,238 @@ +import torch +import torchvision.transforms.functional as F +from torch import Tensor +from torch.jit.annotations import Optional, List, BroadcastingList2, Tuple + + +def _is_tensor_a_torch_image(input): + return len(input.shape) == 3 + + +def vflip(img): + # type: (Tensor) -> Tensor + """Vertically flip the given the Image Tensor. + + Args: + img (Tensor): Image Tensor to be flipped in the form [C, H, W]. + + Returns: + Tensor: Vertically flipped image Tensor. + """ + if not _is_tensor_a_torch_image(img): + raise TypeError('tensor is not a torch image.') + + return img.flip(-2) + + +def hflip(img): + # type: (Tensor) -> Tensor + """Horizontally flip the given the Image Tensor. + + Args: + img (Tensor): Image Tensor to be flipped in the form [C, H, W]. + + Returns: + Tensor: Horizontally flipped image Tensor. + """ + if not _is_tensor_a_torch_image(img): + raise TypeError('tensor is not a torch image.') + + return img.flip(-1) + + +def crop(img, top, left, height, width): + # type: (Tensor, int, int, int, int) -> Tensor + """Crop the given Image Tensor. + + Args: + img (Tensor): Image to be cropped in the form [C, H, W]. (0,0) denotes the top left corner of the image. + top (int): Vertical component of the top left corner of the crop box. + left (int): Horizontal component of the top left corner of the crop box. + height (int): Height of the crop box. + width (int): Width of the crop box. + + Returns: + Tensor: Cropped image. + """ + if not _is_tensor_a_torch_image(img): + raise TypeError('tensor is not a torch image.') + + return img[..., top:top + height, left:left + width] + + +def rgb_to_grayscale(img): + # type: (Tensor) -> Tensor + """Convert the given RGB Image Tensor to Grayscale. + For RGB to Grayscale conversion, ITU-R 601-2 luma transform is performed which + is L = R * 0.2989 + G * 0.5870 + B * 0.1140 + + Args: + img (Tensor): Image to be converted to Grayscale in the form [C, H, W]. + + Returns: + Tensor: Grayscale image. + + """ + if img.shape[0] != 3: + raise TypeError('Input Image does not contain 3 Channels') + + return (0.2989 * img[0] + 0.5870 * img[1] + 0.1140 * img[2]).to(img.dtype) + + +def adjust_brightness(img, brightness_factor): + # type: (Tensor, float) -> Tensor + """Adjust brightness of an RGB image. + + Args: + img (Tensor): Image to be adjusted. + brightness_factor (float): How much to adjust the brightness. Can be + any non negative number. 0 gives a black image, 1 gives the + original image while 2 increases the brightness by a factor of 2. + + Returns: + Tensor: Brightness adjusted image. + """ + if not _is_tensor_a_torch_image(img): + raise TypeError('tensor is not a torch image.') + + return _blend(img, torch.zeros_like(img), brightness_factor) + + +def adjust_contrast(img, contrast_factor): + # type: (Tensor, float) -> Tensor + """Adjust contrast of an RGB image. + + Args: + img (Tensor): Image to be adjusted. + contrast_factor (float): How much to adjust the contrast. Can be any + non negative number. 0 gives a solid gray image, 1 gives the + original image while 2 increases the contrast by a factor of 2. + + Returns: + Tensor: Contrast adjusted image. + """ + if not _is_tensor_a_torch_image(img): + raise TypeError('tensor is not a torch image.') + + mean = torch.mean(rgb_to_grayscale(img).to(torch.float)) + + return _blend(img, mean, contrast_factor) + + +def adjust_saturation(img, saturation_factor): + # type: (Tensor, float) -> Tensor + """Adjust color saturation of an RGB image. + + Args: + img (Tensor): Image to be adjusted. + saturation_factor (float): How much to adjust the saturation. 0 will + give a black and white image, 1 will give the original image while + 2 will enhance the saturation by a factor of 2. + + Returns: + Tensor: Saturation adjusted image. + """ + if not _is_tensor_a_torch_image(img): + raise TypeError('tensor is not a torch image.') + + return _blend(img, rgb_to_grayscale(img), saturation_factor) + + +def center_crop(img, output_size): + # type: (Tensor, BroadcastingList2[int]) -> Tensor + """Crop the Image Tensor and resize it to desired size. + + Args: + img (Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. + output_size (sequence or int): (height, width) of the crop box. If int, + it is used for both directions + + Returns: + Tensor: Cropped image. + """ + if not _is_tensor_a_torch_image(img): + raise TypeError('tensor is not a torch image.') + + _, image_width, image_height = img.size() + crop_height, crop_width = output_size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + + return crop(img, crop_top, crop_left, crop_height, crop_width) + + +def five_crop(img, size): + # type: (Tensor, BroadcastingList2[int]) -> List[Tensor] + """Crop the given Image Tensor into four corners and the central crop. + .. Note:: + This transform returns a List of Tensors and there may be a + mismatch in the number of inputs and targets your ``Dataset`` returns. + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. + + Returns: + List: List (tl, tr, bl, br, center) + Corresponding top left, top right, bottom left, bottom right and center crop. + """ + if not _is_tensor_a_torch_image(img): + raise TypeError('tensor is not a torch image.') + + assert len(size) == 2, "Please provide only two dimensions (h, w) for size." + + _, image_width, image_height = img.size() + crop_height, crop_width = size + if crop_width > image_width or crop_height > image_height: + msg = "Requested crop size {} is bigger than input size {}" + raise ValueError(msg.format(size, (image_height, image_width))) + + tl = crop(img, 0, 0, crop_width, crop_height) + tr = crop(img, image_width - crop_width, 0, image_width, crop_height) + bl = crop(img, 0, image_height - crop_height, crop_width, image_height) + br = crop(img, image_width - crop_width, image_height - crop_height, image_width, image_height) + center = center_crop(img, (crop_height, crop_width)) + + return [tl, tr, bl, br, center] + + +def ten_crop(img, size, vertical_flip=False): + # type: (Tensor, BroadcastingList2[int], bool) -> List[Tensor] + """Crop the given Image Tensor into four corners and the central crop plus the + flipped version of these (horizontal flipping is used by default). + .. Note:: + This transform returns a List of images and there may be a + mismatch in the number of inputs and targets your ``Dataset`` returns. + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. + vertical_flip (bool): Use vertical flipping instead of horizontal + + Returns: + List: List (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip) + Corresponding top left, top right, bottom left, bottom right and center crop + and same for the flipped image's tensor. + """ + if not _is_tensor_a_torch_image(img): + raise TypeError('tensor is not a torch image.') + + assert len(size) == 2, "Please provide only two dimensions (h, w) for size." + first_five = five_crop(img, size) + + if vertical_flip: + img = vflip(img) + else: + img = hflip(img) + + second_five = five_crop(img, size) + + return first_five + second_five + + +def _blend(img1, img2, ratio): + # type: (Tensor, Tensor, float) -> Tensor + bound = 1 if img1.dtype in [torch.half, torch.float32, torch.float64] else 255 + return (ratio * img1 + (1 - ratio) * img2).clamp(0, bound).to(img1.dtype) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py new file mode 100644 index 0000000000..a409ff3cbb --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py @@ -0,0 +1,2016 @@ +import math +import numbers +import random +import warnings +from collections.abc import Sequence +from typing import Tuple, List, Optional + +import torch +from torch import Tensor + +try: + import accimage +except ImportError: + accimage = None + +from . import functional as F +from .functional import InterpolationMode, _interpolation_modes_from_int + + +__all__ = [ + "Compose", + "ToTensor", + "PILToTensor", + "ConvertImageDtype", + "ToPILImage", + "Normalize", + "Resize", + "Scale", + "CenterCrop", + "Pad", + "Lambda", + "RandomApply", + "RandomChoice", + "RandomOrder", + "RandomCrop", + "RandomHorizontalFlip", + "RandomVerticalFlip", + "RandomResizedCrop", + "RandomSizedCrop", + "FiveCrop", + "TenCrop", + "LinearTransformation", + "ColorJitter", + "RandomRotation", + "RandomAffine", + "Grayscale", + "RandomGrayscale", + "RandomPerspective", + "RandomErasing", + "GaussianBlur", + "InterpolationMode", + "RandomInvert", + "RandomPosterize", + "RandomSolarize", + "RandomAdjustSharpness", + "RandomAutocontrast", + "RandomEqualize", +] + + +class Compose: + """Composes several transforms together. This transform does not support torchscript. + Please, see the note below. + + Args: + transforms (list of ``Transform`` objects): list of transforms to compose. + + Example: + >>> transforms.Compose([ + >>> transforms.CenterCrop(10), + >>> transforms.PILToTensor(), + >>> transforms.ConvertImageDtype(torch.float), + >>> ]) + + .. note:: + In order to script the transformations, please use ``torch.nn.Sequential`` as below. + + >>> transforms = torch.nn.Sequential( + >>> transforms.CenterCrop(10), + >>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + >>> ) + >>> scripted_transforms = torch.jit.script(transforms) + + Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require + `lambda` functions or ``PIL.Image``. + + """ + + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, img): + for t in self.transforms: + img = t(img) + return img + + def __repr__(self): + format_string = self.__class__.__name__ + "(" + for t in self.transforms: + format_string += "\n" + format_string += f" {t}" + format_string += "\n)" + return format_string + + +class ToTensor: + """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. This transform does not support torchscript. + + Converts a PIL Image or numpy.ndarray (H x W x C) in the range + [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] + if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1) + or if the numpy.ndarray has dtype = np.uint8 + + In the other cases, tensors are returned without scaling. + + .. note:: + Because the input image is scaled to [0.0, 1.0], this transformation should not be used when + transforming target image masks. See the `references`_ for implementing the transforms for image masks. + + .. _references: https://github.com/pytorch/vision/tree/main/references/segmentation + """ + + def __call__(self, pic): + """ + Args: + pic (PIL Image or numpy.ndarray): Image to be converted to tensor. + + Returns: + Tensor: Converted image. + """ + return F.to_tensor(pic) + + def __repr__(self): + return self.__class__.__name__ + "()" + + +class PILToTensor: + """Convert a ``PIL Image`` to a tensor of the same type. This transform does not support torchscript. + + Converts a PIL Image (H x W x C) to a Tensor of shape (C x H x W). + """ + + def __call__(self, pic): + """ + .. note:: + + A deep copy of the underlying array is performed. + + Args: + pic (PIL Image): Image to be converted to tensor. + + Returns: + Tensor: Converted image. + """ + return F.pil_to_tensor(pic) + + def __repr__(self): + return self.__class__.__name__ + "()" + + +class ConvertImageDtype(torch.nn.Module): + """Convert a tensor image to the given ``dtype`` and scale the values accordingly + This function does not support PIL Image. + + Args: + dtype (torch.dtype): Desired data type of the output + + .. note:: + + When converting from a smaller to a larger integer ``dtype`` the maximum values are **not** mapped exactly. + If converted back and forth, this mismatch has no effect. + + Raises: + RuntimeError: When trying to cast :class:`torch.float32` to :class:`torch.int32` or :class:`torch.int64` as + well as for trying to cast :class:`torch.float64` to :class:`torch.int64`. These conversions might lead to + overflow errors since the floating point ``dtype`` cannot store consecutive integers over the whole range + of the integer ``dtype``. + """ + + def __init__(self, dtype: torch.dtype) -> None: + super().__init__() + self.dtype = dtype + + def forward(self, image): + return F.convert_image_dtype(image, self.dtype) + + +class ToPILImage: + """Convert a tensor or an ndarray to PIL Image. This transform does not support torchscript. + + Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape + H x W x C to a PIL Image while preserving the value range. + + Args: + mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). + If ``mode`` is ``None`` (default) there are some assumptions made about the input data: + - If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``. + - If the input has 3 channels, the ``mode`` is assumed to be ``RGB``. + - If the input has 2 channels, the ``mode`` is assumed to be ``LA``. + - If the input has 1 channel, the ``mode`` is determined by the data type (i.e ``int``, ``float``, + ``short``). + + .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes + """ + + def __init__(self, mode=None): + self.mode = mode + + def __call__(self, pic): + """ + Args: + pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. + + Returns: + PIL Image: Image converted to PIL Image. + + """ + return F.to_pil_image(pic, self.mode) + + def __repr__(self): + format_string = self.__class__.__name__ + "(" + if self.mode is not None: + format_string += f"mode={self.mode}" + format_string += ")" + return format_string + + +class Normalize(torch.nn.Module): + """Normalize a tensor image with mean and standard deviation. + This transform does not support PIL Image. + Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n`` + channels, this transform will normalize each channel of the input + ``torch.*Tensor`` i.e., + ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` + + .. note:: + This transform acts out of place, i.e., it does not mutate the input tensor. + + Args: + mean (sequence): Sequence of means for each channel. + std (sequence): Sequence of standard deviations for each channel. + inplace(bool,optional): Bool to make this operation in-place. + + """ + + def __init__(self, mean, std, inplace=False): + super().__init__() + self.mean = mean + self.std = std + self.inplace = inplace + + def forward(self, tensor: Tensor) -> Tensor: + """ + Args: + tensor (Tensor): Tensor image to be normalized. + + Returns: + Tensor: Normalized Tensor image. + """ + return F.normalize(tensor, self.mean, self.std, self.inplace) + + def __repr__(self): + return self.__class__.__name__ + f"(mean={self.mean}, std={self.std})" + + +class Resize(torch.nn.Module): + """Resize the input image to the given size. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions + + .. warning:: + The output image might be different depending on its type: when downsampling, the interpolation of PIL images + and tensors is slightly different, because PIL applies antialiasing. This may lead to significant differences + in the performance of a network. Therefore, it is preferable to train and serve a model with the same input + types. See also below the ``antialias`` parameter, which can help making the output of PIL images and tensors + closer. + + Args: + size (sequence or int): Desired output size. If size is a sequence like + (h, w), output size will be matched to this. If size is an int, + smaller edge of the image will be matched to this number. + i.e, if height > width, then image will be rescaled to + (size * height / width, size). + + .. note:: + In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and + ``InterpolationMode.BICUBIC`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. + max_size (int, optional): The maximum allowed for the longer edge of + the resized image: if the longer edge of the image is greater + than ``max_size`` after being resized according to ``size``, then + the image is resized again so that the longer edge is equal to + ``max_size``. As a result, ``size`` might be overruled, i.e the + smaller edge may be shorter than ``size``. This is only supported + if ``size`` is an int (or a sequence of length 1 in torchscript + mode). + antialias (bool, optional): antialias flag. If ``img`` is PIL Image, the flag is ignored and anti-alias + is always used. If ``img`` is Tensor, the flag is False by default and can be set to True for + ``InterpolationMode.BILINEAR`` only mode. This can help making the output for PIL images and tensors + closer. + + .. warning:: + There is no autodiff support for ``antialias=True`` option with input ``img`` as Tensor. + + """ + + def __init__(self, size, interpolation=InterpolationMode.BILINEAR, max_size=None, antialias=None): + super().__init__() + if not isinstance(size, (int, Sequence)): + raise TypeError(f"Size should be int or sequence. Got {type(size)}") + if isinstance(size, Sequence) and len(size) not in (1, 2): + raise ValueError("If size is a sequence, it should have 1 or 2 values") + self.size = size + self.max_size = max_size + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn( + "Argument interpolation should be of type InterpolationMode instead of int. " + "Please, use InterpolationMode enum." + ) + interpolation = _interpolation_modes_from_int(interpolation) + + self.interpolation = interpolation + self.antialias = antialias + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be scaled. + + Returns: + PIL Image or Tensor: Rescaled image. + """ + return F.resize(img, self.size, self.interpolation, self.max_size, self.antialias) + + def __repr__(self): + detail = f"(size={self.size}, interpolation={self.interpolation.value}, max_size={self.max_size}, antialias={self.antialias})" + return self.__class__.__name__ + detail + + +class Scale(Resize): + """ + Note: This transform is deprecated in favor of Resize. + """ + + def __init__(self, *args, **kwargs): + warnings.warn("The use of the transforms.Scale transform is deprecated, please use transforms.Resize instead.") + super().__init__(*args, **kwargs) + + +class CenterCrop(torch.nn.Module): + """Crops the given image at the center. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + If image size is smaller than output size along any edge, image is padded with 0 and then center cropped. + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). + """ + + def __init__(self, size): + super().__init__() + self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.") + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be cropped. + + Returns: + PIL Image or Tensor: Cropped image. + """ + return F.center_crop(img, self.size) + + def __repr__(self): + return self.__class__.__name__ + f"(size={self.size})" + + +class Pad(torch.nn.Module): + """Pad the given image on all sides with the given "pad" value. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric, + at most 3 leading dimensions for mode edge, + and an arbitrary number of leading dimensions for mode constant + + Args: + padding (int or sequence): Padding on each border. If a single int is provided this + is used to pad all borders. If sequence of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a sequence of length 4 is provided + this is the padding for the left, top, right and bottom borders respectively. + + .. note:: + In torchscript mode padding as single int is not supported, use a sequence of + length 1: ``[padding, ]``. + fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of + length 3, it is used to fill R, G, B channels respectively. + This value is only used when the padding_mode is constant. + Only number is supported for torch Tensor. + Only int or str or tuple value is supported for PIL Image. + padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. + Default is constant. + + - constant: pads with a constant value, this value is specified with fill + + - edge: pads with the last value at the edge of the image. + If input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2 + + - reflect: pads with reflection of image without repeating the last value on the edge. + For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode + will result in [3, 2, 1, 2, 3, 4, 3, 2] + + - symmetric: pads with reflection of image repeating the last value on the edge. + For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode + will result in [2, 1, 1, 2, 3, 4, 4, 3] + """ + + def __init__(self, padding, fill=0, padding_mode="constant"): + super().__init__() + if not isinstance(padding, (numbers.Number, tuple, list)): + raise TypeError("Got inappropriate padding arg") + + if not isinstance(fill, (numbers.Number, str, tuple)): + raise TypeError("Got inappropriate fill arg") + + if padding_mode not in ["constant", "edge", "reflect", "symmetric"]: + raise ValueError("Padding mode should be either constant, edge, reflect or symmetric") + + if isinstance(padding, Sequence) and len(padding) not in [1, 2, 4]: + raise ValueError( + f"Padding must be an int or a 1, 2, or 4 element tuple, not a {len(padding)} element tuple" + ) + + self.padding = padding + self.fill = fill + self.padding_mode = padding_mode + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be padded. + + Returns: + PIL Image or Tensor: Padded image. + """ + return F.pad(img, self.padding, self.fill, self.padding_mode) + + def __repr__(self): + return self.__class__.__name__ + f"(padding={self.padding}, fill={self.fill}, padding_mode={self.padding_mode})" + + +class Lambda: + """Apply a user-defined lambda as a transform. This transform does not support torchscript. + + Args: + lambd (function): Lambda/function to be used for transform. + """ + + def __init__(self, lambd): + if not callable(lambd): + raise TypeError(f"Argument lambd should be callable, got {repr(type(lambd).__name__)}") + self.lambd = lambd + + def __call__(self, img): + return self.lambd(img) + + def __repr__(self): + return self.__class__.__name__ + "()" + + +class RandomTransforms: + """Base class for a list of transformations with randomness + + Args: + transforms (sequence): list of transformations + """ + + def __init__(self, transforms): + if not isinstance(transforms, Sequence): + raise TypeError("Argument transforms should be a sequence") + self.transforms = transforms + + def __call__(self, *args, **kwargs): + raise NotImplementedError() + + def __repr__(self): + format_string = self.__class__.__name__ + "(" + for t in self.transforms: + format_string += "\n" + format_string += f" {t}" + format_string += "\n)" + return format_string + + +class RandomApply(torch.nn.Module): + """Apply randomly a list of transformations with a given probability. + + .. note:: + In order to script the transformation, please use ``torch.nn.ModuleList`` as input instead of list/tuple of + transforms as shown below: + + >>> transforms = transforms.RandomApply(torch.nn.ModuleList([ + >>> transforms.ColorJitter(), + >>> ]), p=0.3) + >>> scripted_transforms = torch.jit.script(transforms) + + Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require + `lambda` functions or ``PIL.Image``. + + Args: + transforms (sequence or torch.nn.Module): list of transformations + p (float): probability + """ + + def __init__(self, transforms, p=0.5): + super().__init__() + self.transforms = transforms + self.p = p + + def forward(self, img): + if self.p < torch.rand(1): + return img + for t in self.transforms: + img = t(img) + return img + + def __repr__(self): + format_string = self.__class__.__name__ + "(" + format_string += f"\n p={self.p}" + for t in self.transforms: + format_string += "\n" + format_string += f" {t}" + format_string += "\n)" + return format_string + + +class RandomOrder(RandomTransforms): + """Apply a list of transformations in a random order. This transform does not support torchscript.""" + + def __call__(self, img): + order = list(range(len(self.transforms))) + random.shuffle(order) + for i in order: + img = self.transforms[i](img) + return img + + +class RandomChoice(RandomTransforms): + """Apply single transformation randomly picked from a list. This transform does not support torchscript.""" + + def __init__(self, transforms, p=None): + super().__init__(transforms) + if p is not None and not isinstance(p, Sequence): + raise TypeError("Argument p should be a sequence") + self.p = p + + def __call__(self, *args): + t = random.choices(self.transforms, weights=self.p)[0] + return t(*args) + + def __repr__(self): + format_string = super().__repr__() + format_string += f"(p={self.p})" + return format_string + + +class RandomCrop(torch.nn.Module): + """Crop the given image at a random location. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions, + but if non-constant padding is used, the input is expected to have at most 2 leading dimensions + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). + padding (int or sequence, optional): Optional padding on each border + of the image. Default is None. If a single int is provided this + is used to pad all borders. If sequence of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a sequence of length 4 is provided + this is the padding for the left, top, right and bottom borders respectively. + + .. note:: + In torchscript mode padding as single int is not supported, use a sequence of + length 1: ``[padding, ]``. + pad_if_needed (boolean): It will pad the image if smaller than the + desired size to avoid raising an exception. Since cropping is done + after padding, the padding seems to be done at a random offset. + fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of + length 3, it is used to fill R, G, B channels respectively. + This value is only used when the padding_mode is constant. + Only number is supported for torch Tensor. + Only int or str or tuple value is supported for PIL Image. + padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. + Default is constant. + + - constant: pads with a constant value, this value is specified with fill + + - edge: pads with the last value at the edge of the image. + If input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2 + + - reflect: pads with reflection of image without repeating the last value on the edge. + For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode + will result in [3, 2, 1, 2, 3, 4, 3, 2] + + - symmetric: pads with reflection of image repeating the last value on the edge. + For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode + will result in [2, 1, 1, 2, 3, 4, 4, 3] + """ + + @staticmethod + def get_params(img: Tensor, output_size: Tuple[int, int]) -> Tuple[int, int, int, int]: + """Get parameters for ``crop`` for a random crop. + + Args: + img (PIL Image or Tensor): Image to be cropped. + output_size (tuple): Expected output size of the crop. + + Returns: + tuple: params (i, j, h, w) to be passed to ``crop`` for random crop. + """ + w, h = F.get_image_size(img) + th, tw = output_size + + if h + 1 < th or w + 1 < tw: + raise ValueError(f"Required crop size {(th, tw)} is larger then input image size {(h, w)}") + + if w == tw and h == th: + return 0, 0, h, w + + i = torch.randint(0, h - th + 1, size=(1,)).item() + j = torch.randint(0, w - tw + 1, size=(1,)).item() + return i, j, th, tw + + def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode="constant"): + super().__init__() + + self.size = tuple(_setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")) + + self.padding = padding + self.pad_if_needed = pad_if_needed + self.fill = fill + self.padding_mode = padding_mode + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be cropped. + + Returns: + PIL Image or Tensor: Cropped image. + """ + if self.padding is not None: + img = F.pad(img, self.padding, self.fill, self.padding_mode) + + width, height = F.get_image_size(img) + # pad the width if needed + if self.pad_if_needed and width < self.size[1]: + padding = [self.size[1] - width, 0] + img = F.pad(img, padding, self.fill, self.padding_mode) + # pad the height if needed + if self.pad_if_needed and height < self.size[0]: + padding = [0, self.size[0] - height] + img = F.pad(img, padding, self.fill, self.padding_mode) + + i, j, h, w = self.get_params(img, self.size) + + return F.crop(img, i, j, h, w) + + def __repr__(self): + return self.__class__.__name__ + f"(size={self.size}, padding={self.padding})" + + +class RandomHorizontalFlip(torch.nn.Module): + """Horizontally flip the given image randomly with a given probability. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading + dimensions + + Args: + p (float): probability of the image being flipped. Default value is 0.5 + """ + + def __init__(self, p=0.5): + super().__init__() + self.p = p + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be flipped. + + Returns: + PIL Image or Tensor: Randomly flipped image. + """ + if torch.rand(1) < self.p: + return F.hflip(img) + return img + + def __repr__(self): + return self.__class__.__name__ + f"(p={self.p})" + + +class RandomVerticalFlip(torch.nn.Module): + """Vertically flip the given image randomly with a given probability. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading + dimensions + + Args: + p (float): probability of the image being flipped. Default value is 0.5 + """ + + def __init__(self, p=0.5): + super().__init__() + self.p = p + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be flipped. + + Returns: + PIL Image or Tensor: Randomly flipped image. + """ + if torch.rand(1) < self.p: + return F.vflip(img) + return img + + def __repr__(self): + return self.__class__.__name__ + f"(p={self.p})" + + +class RandomPerspective(torch.nn.Module): + """Performs a random perspective transformation of the given image with a given probability. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + distortion_scale (float): argument to control the degree of distortion and ranges from 0 to 1. + Default is 0.5. + p (float): probability of the image being transformed. Default is 0.5. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. + fill (sequence or number): Pixel fill value for the area outside the transformed + image. Default is ``0``. If given a number, the value is used for all bands respectively. + """ + + def __init__(self, distortion_scale=0.5, p=0.5, interpolation=InterpolationMode.BILINEAR, fill=0): + super().__init__() + self.p = p + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn( + "Argument interpolation should be of type InterpolationMode instead of int. " + "Please, use InterpolationMode enum." + ) + interpolation = _interpolation_modes_from_int(interpolation) + + self.interpolation = interpolation + self.distortion_scale = distortion_scale + + if fill is None: + fill = 0 + elif not isinstance(fill, (Sequence, numbers.Number)): + raise TypeError("Fill should be either a sequence or a number.") + + self.fill = fill + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be Perspectively transformed. + + Returns: + PIL Image or Tensor: Randomly transformed image. + """ + + fill = self.fill + if isinstance(img, Tensor): + if isinstance(fill, (int, float)): + fill = [float(fill)] * F.get_image_num_channels(img) + else: + fill = [float(f) for f in fill] + + if torch.rand(1) < self.p: + width, height = F.get_image_size(img) + startpoints, endpoints = self.get_params(width, height, self.distortion_scale) + return F.perspective(img, startpoints, endpoints, self.interpolation, fill) + return img + + @staticmethod + def get_params(width: int, height: int, distortion_scale: float) -> Tuple[List[List[int]], List[List[int]]]: + """Get parameters for ``perspective`` for a random perspective transform. + + Args: + width (int): width of the image. + height (int): height of the image. + distortion_scale (float): argument to control the degree of distortion and ranges from 0 to 1. + + Returns: + List containing [top-left, top-right, bottom-right, bottom-left] of the original image, + List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image. + """ + half_height = height // 2 + half_width = width // 2 + topleft = [ + int(torch.randint(0, int(distortion_scale * half_width) + 1, size=(1,)).item()), + int(torch.randint(0, int(distortion_scale * half_height) + 1, size=(1,)).item()), + ] + topright = [ + int(torch.randint(width - int(distortion_scale * half_width) - 1, width, size=(1,)).item()), + int(torch.randint(0, int(distortion_scale * half_height) + 1, size=(1,)).item()), + ] + botright = [ + int(torch.randint(width - int(distortion_scale * half_width) - 1, width, size=(1,)).item()), + int(torch.randint(height - int(distortion_scale * half_height) - 1, height, size=(1,)).item()), + ] + botleft = [ + int(torch.randint(0, int(distortion_scale * half_width) + 1, size=(1,)).item()), + int(torch.randint(height - int(distortion_scale * half_height) - 1, height, size=(1,)).item()), + ] + startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]] + endpoints = [topleft, topright, botright, botleft] + return startpoints, endpoints + + def __repr__(self): + return self.__class__.__name__ + f"(p={self.p})" + + +class RandomResizedCrop(torch.nn.Module): + """Crop a random portion of image and resize it to a given size. + + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions + + A crop of the original image is made: the crop has a random area (H * W) + and a random aspect ratio. This crop is finally resized to the given + size. This is popularly used to train the Inception networks. + + Args: + size (int or sequence): expected output size of the crop, for each edge. If size is an + int instead of sequence like (h, w), a square output size ``(size, size)`` is + made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). + + .. note:: + In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``. + scale (tuple of float): Specifies the lower and upper bounds for the random area of the crop, + before resizing. The scale is defined with respect to the area of the original image. + ratio (tuple of float): lower and upper bounds for the random aspect ratio of the crop, before + resizing. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` and + ``InterpolationMode.BICUBIC`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. + + """ + + def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation=InterpolationMode.BILINEAR): + super().__init__() + self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.") + + if not isinstance(scale, Sequence): + raise TypeError("Scale should be a sequence") + if not isinstance(ratio, Sequence): + raise TypeError("Ratio should be a sequence") + if (scale[0] > scale[1]) or (ratio[0] > ratio[1]): + warnings.warn("Scale and ratio should be of kind (min, max)") + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn( + "Argument interpolation should be of type InterpolationMode instead of int. " + "Please, use InterpolationMode enum." + ) + interpolation = _interpolation_modes_from_int(interpolation) + + self.interpolation = interpolation + self.scale = scale + self.ratio = ratio + + @staticmethod + def get_params(img: Tensor, scale: List[float], ratio: List[float]) -> Tuple[int, int, int, int]: + """Get parameters for ``crop`` for a random sized crop. + + Args: + img (PIL Image or Tensor): Input image. + scale (list): range of scale of the origin size cropped + ratio (list): range of aspect ratio of the origin aspect ratio cropped + + Returns: + tuple: params (i, j, h, w) to be passed to ``crop`` for a random + sized crop. + """ + width, height = F.get_image_size(img) + area = height * width + + log_ratio = torch.log(torch.tensor(ratio)) + for _ in range(10): + target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item() + aspect_ratio = torch.exp(torch.empty(1).uniform_(log_ratio[0], log_ratio[1])).item() + + w = int(round(math.sqrt(target_area * aspect_ratio))) + h = int(round(math.sqrt(target_area / aspect_ratio))) + + if 0 < w <= width and 0 < h <= height: + i = torch.randint(0, height - h + 1, size=(1,)).item() + j = torch.randint(0, width - w + 1, size=(1,)).item() + return i, j, h, w + + # Fallback to central crop + in_ratio = float(width) / float(height) + if in_ratio < min(ratio): + w = width + h = int(round(w / min(ratio))) + elif in_ratio > max(ratio): + h = height + w = int(round(h * max(ratio))) + else: # whole image + w = width + h = height + i = (height - h) // 2 + j = (width - w) // 2 + return i, j, h, w + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be cropped and resized. + + Returns: + PIL Image or Tensor: Randomly cropped and resized image. + """ + i, j, h, w = self.get_params(img, self.scale, self.ratio) + return F.resized_crop(img, i, j, h, w, self.size, self.interpolation) + + def __repr__(self): + interpolate_str = self.interpolation.value + format_string = self.__class__.__name__ + f"(size={self.size}" + format_string += f", scale={tuple(round(s, 4) for s in self.scale)}" + format_string += f", ratio={tuple(round(r, 4) for r in self.ratio)}" + format_string += f", interpolation={interpolate_str})" + return format_string + + +class RandomSizedCrop(RandomResizedCrop): + """ + Note: This transform is deprecated in favor of RandomResizedCrop. + """ + + def __init__(self, *args, **kwargs): + warnings.warn( + "The use of the transforms.RandomSizedCrop transform is deprecated, " + + "please use transforms.RandomResizedCrop instead." + ) + super().__init__(*args, **kwargs) + + +class FiveCrop(torch.nn.Module): + """Crop the given image into four corners and the central crop. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading + dimensions + + .. Note:: + This transform returns a tuple of images and there may be a mismatch in the number of + inputs and targets your Dataset returns. See below for an example of how to deal with + this. + + Args: + size (sequence or int): Desired output size of the crop. If size is an ``int`` + instead of sequence like (h, w), a square crop of size (size, size) is made. + If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). + + Example: + >>> transform = Compose([ + >>> FiveCrop(size), # this is a list of PIL Images + >>> Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor + >>> ]) + >>> #In your test loop you can do the following: + >>> input, target = batch # input is a 5d tensor, target is 2d + >>> bs, ncrops, c, h, w = input.size() + >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops + >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops + """ + + def __init__(self, size): + super().__init__() + self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.") + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be cropped. + + Returns: + tuple of 5 images. Image can be PIL Image or Tensor + """ + return F.five_crop(img, self.size) + + def __repr__(self): + return self.__class__.__name__ + f"(size={self.size})" + + +class TenCrop(torch.nn.Module): + """Crop the given image into four corners and the central crop plus the flipped version of + these (horizontal flipping is used by default). + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading + dimensions + + .. Note:: + This transform returns a tuple of images and there may be a mismatch in the number of + inputs and targets your Dataset returns. See below for an example of how to deal with + this. + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). + vertical_flip (bool): Use vertical flipping instead of horizontal + + Example: + >>> transform = Compose([ + >>> TenCrop(size), # this is a list of PIL Images + >>> Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor + >>> ]) + >>> #In your test loop you can do the following: + >>> input, target = batch # input is a 5d tensor, target is 2d + >>> bs, ncrops, c, h, w = input.size() + >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops + >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops + """ + + def __init__(self, size, vertical_flip=False): + super().__init__() + self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.") + self.vertical_flip = vertical_flip + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be cropped. + + Returns: + tuple of 10 images. Image can be PIL Image or Tensor + """ + return F.ten_crop(img, self.size, self.vertical_flip) + + def __repr__(self): + return self.__class__.__name__ + f"(size={self.size}, vertical_flip={self.vertical_flip})" + + +class LinearTransformation(torch.nn.Module): + """Transform a tensor image with a square transformation matrix and a mean_vector computed + offline. + This transform does not support PIL Image. + Given transformation_matrix and mean_vector, will flatten the torch.*Tensor and + subtract mean_vector from it which is then followed by computing the dot + product with the transformation matrix and then reshaping the tensor to its + original shape. + + Applications: + whitening transformation: Suppose X is a column vector zero-centered data. + Then compute the data covariance matrix [D x D] with torch.mm(X.t(), X), + perform SVD on this matrix and pass it as transformation_matrix. + + Args: + transformation_matrix (Tensor): tensor [D x D], D = C x H x W + mean_vector (Tensor): tensor [D], D = C x H x W + """ + + def __init__(self, transformation_matrix, mean_vector): + super().__init__() + if transformation_matrix.size(0) != transformation_matrix.size(1): + raise ValueError( + "transformation_matrix should be square. Got " + f"{tuple(transformation_matrix.size())} rectangular matrix." + ) + + if mean_vector.size(0) != transformation_matrix.size(0): + raise ValueError( + f"mean_vector should have the same length {mean_vector.size(0)}" + f" as any one of the dimensions of the transformation_matrix [{tuple(transformation_matrix.size())}]" + ) + + if transformation_matrix.device != mean_vector.device: + raise ValueError( + f"Input tensors should be on the same device. Got {transformation_matrix.device} and {mean_vector.device}" + ) + + self.transformation_matrix = transformation_matrix + self.mean_vector = mean_vector + + def forward(self, tensor: Tensor) -> Tensor: + """ + Args: + tensor (Tensor): Tensor image to be whitened. + + Returns: + Tensor: Transformed image. + """ + shape = tensor.shape + n = shape[-3] * shape[-2] * shape[-1] + if n != self.transformation_matrix.shape[0]: + raise ValueError( + "Input tensor and transformation matrix have incompatible shape." + + f"[{shape[-3]} x {shape[-2]} x {shape[-1]}] != " + + f"{self.transformation_matrix.shape[0]}" + ) + + if tensor.device.type != self.mean_vector.device.type: + raise ValueError( + "Input tensor should be on the same device as transformation matrix and mean vector. " + f"Got {tensor.device} vs {self.mean_vector.device}" + ) + + flat_tensor = tensor.view(-1, n) - self.mean_vector + transformed_tensor = torch.mm(flat_tensor, self.transformation_matrix) + tensor = transformed_tensor.view(shape) + return tensor + + def __repr__(self): + format_string = self.__class__.__name__ + "(transformation_matrix=" + format_string += str(self.transformation_matrix.tolist()) + ")" + format_string += ", (mean_vector=" + str(self.mean_vector.tolist()) + ")" + return format_string + + +class ColorJitter(torch.nn.Module): + """Randomly change the brightness, contrast, saturation and hue of an image. + If the image is torch Tensor, it is expected + to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions. + If img is PIL Image, mode "1", "I", "F" and modes with transparency (alpha channel) are not supported. + + Args: + brightness (float or tuple of float (min, max)): How much to jitter brightness. + brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness] + or the given [min, max]. Should be non negative numbers. + contrast (float or tuple of float (min, max)): How much to jitter contrast. + contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast] + or the given [min, max]. Should be non negative numbers. + saturation (float or tuple of float (min, max)): How much to jitter saturation. + saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation] + or the given [min, max]. Should be non negative numbers. + hue (float or tuple of float (min, max)): How much to jitter hue. + hue_factor is chosen uniformly from [-hue, hue] or the given [min, max]. + Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5. + """ + + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): + super().__init__() + self.brightness = self._check_input(brightness, "brightness") + self.contrast = self._check_input(contrast, "contrast") + self.saturation = self._check_input(saturation, "saturation") + self.hue = self._check_input(hue, "hue", center=0, bound=(-0.5, 0.5), clip_first_on_zero=False) + + @torch.jit.unused + def _check_input(self, value, name, center=1, bound=(0, float("inf")), clip_first_on_zero=True): + if isinstance(value, numbers.Number): + if value < 0: + raise ValueError(f"If {name} is a single number, it must be non negative.") + value = [center - float(value), center + float(value)] + if clip_first_on_zero: + value[0] = max(value[0], 0.0) + elif isinstance(value, (tuple, list)) and len(value) == 2: + if not bound[0] <= value[0] <= value[1] <= bound[1]: + raise ValueError(f"{name} values should be between {bound}") + else: + raise TypeError(f"{name} should be a single number or a list/tuple with length 2.") + + # if value is 0 or (1., 1.) for brightness/contrast/saturation + # or (0., 0.) for hue, do nothing + if value[0] == value[1] == center: + value = None + return value + + @staticmethod + def get_params( + brightness: Optional[List[float]], + contrast: Optional[List[float]], + saturation: Optional[List[float]], + hue: Optional[List[float]], + ) -> Tuple[Tensor, Optional[float], Optional[float], Optional[float], Optional[float]]: + """Get the parameters for the randomized transform to be applied on image. + + Args: + brightness (tuple of float (min, max), optional): The range from which the brightness_factor is chosen + uniformly. Pass None to turn off the transformation. + contrast (tuple of float (min, max), optional): The range from which the contrast_factor is chosen + uniformly. Pass None to turn off the transformation. + saturation (tuple of float (min, max), optional): The range from which the saturation_factor is chosen + uniformly. Pass None to turn off the transformation. + hue (tuple of float (min, max), optional): The range from which the hue_factor is chosen uniformly. + Pass None to turn off the transformation. + + Returns: + tuple: The parameters used to apply the randomized transform + along with their random order. + """ + fn_idx = torch.randperm(4) + + b = None if brightness is None else float(torch.empty(1).uniform_(brightness[0], brightness[1])) + c = None if contrast is None else float(torch.empty(1).uniform_(contrast[0], contrast[1])) + s = None if saturation is None else float(torch.empty(1).uniform_(saturation[0], saturation[1])) + h = None if hue is None else float(torch.empty(1).uniform_(hue[0], hue[1])) + + return fn_idx, b, c, s, h + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Input image. + + Returns: + PIL Image or Tensor: Color jittered image. + """ + fn_idx, brightness_factor, contrast_factor, saturation_factor, hue_factor = self.get_params( + self.brightness, self.contrast, self.saturation, self.hue + ) + + for fn_id in fn_idx: + if fn_id == 0 and brightness_factor is not None: + img = F.adjust_brightness(img, brightness_factor) + elif fn_id == 1 and contrast_factor is not None: + img = F.adjust_contrast(img, contrast_factor) + elif fn_id == 2 and saturation_factor is not None: + img = F.adjust_saturation(img, saturation_factor) + elif fn_id == 3 and hue_factor is not None: + img = F.adjust_hue(img, hue_factor) + + return img + + def __repr__(self): + format_string = self.__class__.__name__ + "(" + format_string += f"brightness={self.brightness}" + format_string += f", contrast={self.contrast}" + format_string += f", saturation={self.saturation}" + format_string += f", hue={self.hue})" + return format_string + + +class RandomRotation(torch.nn.Module): + """Rotate the image by angle. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + degrees (sequence or number): Range of degrees to select from. + If degrees is a number instead of sequence like (min, max), the range of degrees + will be (-degrees, +degrees). + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. + expand (bool, optional): Optional expansion flag. + If true, expands the output to make it large enough to hold the entire rotated image. + If false or omitted, make the output image the same size as the input image. + Note that the expand flag assumes rotation around the center and no translation. + center (sequence, optional): Optional center of rotation, (x, y). Origin is the upper left corner. + Default is the center of the image. + fill (sequence or number): Pixel fill value for the area outside the rotated + image. Default is ``0``. If given a number, the value is used for all bands respectively. + resample (int, optional): deprecated argument and will be removed since v0.10.0. + Please use the ``interpolation`` parameter instead. + + .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters + + """ + + def __init__( + self, degrees, interpolation=InterpolationMode.NEAREST, expand=False, center=None, fill=0, resample=None + ): + super().__init__() + if resample is not None: + warnings.warn( + "Argument resample is deprecated and will be removed since v0.10.0. Please, use interpolation instead" + ) + interpolation = _interpolation_modes_from_int(resample) + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn( + "Argument interpolation should be of type InterpolationMode instead of int. " + "Please, use InterpolationMode enum." + ) + interpolation = _interpolation_modes_from_int(interpolation) + + self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2,)) + + if center is not None: + _check_sequence_input(center, "center", req_sizes=(2,)) + + self.center = center + + self.resample = self.interpolation = interpolation + self.expand = expand + + if fill is None: + fill = 0 + elif not isinstance(fill, (Sequence, numbers.Number)): + raise TypeError("Fill should be either a sequence or a number.") + + self.fill = fill + + @staticmethod + def get_params(degrees: List[float]) -> float: + """Get parameters for ``rotate`` for a random rotation. + + Returns: + float: angle parameter to be passed to ``rotate`` for random rotation. + """ + angle = float(torch.empty(1).uniform_(float(degrees[0]), float(degrees[1])).item()) + return angle + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be rotated. + + Returns: + PIL Image or Tensor: Rotated image. + """ + fill = self.fill + if isinstance(img, Tensor): + if isinstance(fill, (int, float)): + fill = [float(fill)] * F.get_image_num_channels(img) + else: + fill = [float(f) for f in fill] + angle = self.get_params(self.degrees) + + return F.rotate(img, angle, self.resample, self.expand, self.center, fill) + + def __repr__(self): + interpolate_str = self.interpolation.value + format_string = self.__class__.__name__ + f"(degrees={self.degrees}" + format_string += f", interpolation={interpolate_str}" + format_string += f", expand={self.expand}" + if self.center is not None: + format_string += f", center={self.center}" + if self.fill is not None: + format_string += f", fill={self.fill}" + format_string += ")" + return format_string + + +class RandomAffine(torch.nn.Module): + """Random affine transformation of the image keeping center invariant. + If the image is torch Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + degrees (sequence or number): Range of degrees to select from. + If degrees is a number instead of sequence like (min, max), the range of degrees + will be (-degrees, +degrees). Set to 0 to deactivate rotations. + translate (tuple, optional): tuple of maximum absolute fraction for horizontal + and vertical translations. For example translate=(a, b), then horizontal shift + is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is + randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. + scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is + randomly sampled from the range a <= scale <= b. Will keep original scale by default. + shear (sequence or number, optional): Range of degrees to select from. + If shear is a number, a shear parallel to the x axis in the range (-shear, +shear) + will be applied. Else if shear is a sequence of 2 values a shear parallel to the x axis in the + range (shear[0], shear[1]) will be applied. Else if shear is a sequence of 4 values, + a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied. + Will not apply shear by default. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image.NEAREST``) are still acceptable. + fill (sequence or number): Pixel fill value for the area outside the transformed + image. Default is ``0``. If given a number, the value is used for all bands respectively. + fillcolor (sequence or number, optional): deprecated argument and will be removed since v0.10.0. + Please use the ``fill`` parameter instead. + resample (int, optional): deprecated argument and will be removed since v0.10.0. + Please use the ``interpolation`` parameter instead. + + .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters + + """ + + def __init__( + self, + degrees, + translate=None, + scale=None, + shear=None, + interpolation=InterpolationMode.NEAREST, + fill=0, + fillcolor=None, + resample=None, + ): + super().__init__() + if resample is not None: + warnings.warn( + "Argument resample is deprecated and will be removed since v0.10.0. Please, use interpolation instead" + ) + interpolation = _interpolation_modes_from_int(resample) + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn( + "Argument interpolation should be of type InterpolationMode instead of int. " + "Please, use InterpolationMode enum." + ) + interpolation = _interpolation_modes_from_int(interpolation) + + if fillcolor is not None: + warnings.warn( + "Argument fillcolor is deprecated and will be removed since v0.10.0. Please, use fill instead" + ) + fill = fillcolor + + self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2,)) + + if translate is not None: + _check_sequence_input(translate, "translate", req_sizes=(2,)) + for t in translate: + if not (0.0 <= t <= 1.0): + raise ValueError("translation values should be between 0 and 1") + self.translate = translate + + if scale is not None: + _check_sequence_input(scale, "scale", req_sizes=(2,)) + for s in scale: + if s <= 0: + raise ValueError("scale values should be positive") + self.scale = scale + + if shear is not None: + self.shear = _setup_angle(shear, name="shear", req_sizes=(2, 4)) + else: + self.shear = shear + + self.resample = self.interpolation = interpolation + + if fill is None: + fill = 0 + elif not isinstance(fill, (Sequence, numbers.Number)): + raise TypeError("Fill should be either a sequence or a number.") + + self.fillcolor = self.fill = fill + + @staticmethod + def get_params( + degrees: List[float], + translate: Optional[List[float]], + scale_ranges: Optional[List[float]], + shears: Optional[List[float]], + img_size: List[int], + ) -> Tuple[float, Tuple[int, int], float, Tuple[float, float]]: + """Get parameters for affine transformation + + Returns: + params to be passed to the affine transformation + """ + angle = float(torch.empty(1).uniform_(float(degrees[0]), float(degrees[1])).item()) + if translate is not None: + max_dx = float(translate[0] * img_size[0]) + max_dy = float(translate[1] * img_size[1]) + tx = int(round(torch.empty(1).uniform_(-max_dx, max_dx).item())) + ty = int(round(torch.empty(1).uniform_(-max_dy, max_dy).item())) + translations = (tx, ty) + else: + translations = (0, 0) + + if scale_ranges is not None: + scale = float(torch.empty(1).uniform_(scale_ranges[0], scale_ranges[1]).item()) + else: + scale = 1.0 + + shear_x = shear_y = 0.0 + if shears is not None: + shear_x = float(torch.empty(1).uniform_(shears[0], shears[1]).item()) + if len(shears) == 4: + shear_y = float(torch.empty(1).uniform_(shears[2], shears[3]).item()) + + shear = (shear_x, shear_y) + + return angle, translations, scale, shear + + def forward(self, img): + """ + img (PIL Image or Tensor): Image to be transformed. + + Returns: + PIL Image or Tensor: Affine transformed image. + """ + fill = self.fill + if isinstance(img, Tensor): + if isinstance(fill, (int, float)): + fill = [float(fill)] * F.get_image_num_channels(img) + else: + fill = [float(f) for f in fill] + + img_size = F.get_image_size(img) + + ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img_size) + + return F.affine(img, *ret, interpolation=self.interpolation, fill=fill) + + def __repr__(self): + s = "{name}(degrees={degrees}" + if self.translate is not None: + s += ", translate={translate}" + if self.scale is not None: + s += ", scale={scale}" + if self.shear is not None: + s += ", shear={shear}" + if self.interpolation != InterpolationMode.NEAREST: + s += ", interpolation={interpolation}" + if self.fill != 0: + s += ", fill={fill}" + s += ")" + d = dict(self.__dict__) + d["interpolation"] = self.interpolation.value + return s.format(name=self.__class__.__name__, **d) + + +class Grayscale(torch.nn.Module): + """Convert image to grayscale. + If the image is torch Tensor, it is expected + to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions + + Args: + num_output_channels (int): (1 or 3) number of channels desired for output image + + Returns: + PIL Image: Grayscale version of the input. + + - If ``num_output_channels == 1`` : returned image is single channel + - If ``num_output_channels == 3`` : returned image is 3 channel with r == g == b + + """ + + def __init__(self, num_output_channels=1): + super().__init__() + self.num_output_channels = num_output_channels + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be converted to grayscale. + + Returns: + PIL Image or Tensor: Grayscaled image. + """ + return F.rgb_to_grayscale(img, num_output_channels=self.num_output_channels) + + def __repr__(self): + return self.__class__.__name__ + f"(num_output_channels={self.num_output_channels})" + + +class RandomGrayscale(torch.nn.Module): + """Randomly convert image to grayscale with a probability of p (default 0.1). + If the image is torch Tensor, it is expected + to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions + + Args: + p (float): probability that image should be converted to grayscale. + + Returns: + PIL Image or Tensor: Grayscale version of the input image with probability p and unchanged + with probability (1-p). + - If input image is 1 channel: grayscale version is 1 channel + - If input image is 3 channel: grayscale version is 3 channel with r == g == b + + """ + + def __init__(self, p=0.1): + super().__init__() + self.p = p + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be converted to grayscale. + + Returns: + PIL Image or Tensor: Randomly grayscaled image. + """ + num_output_channels = F.get_image_num_channels(img) + if torch.rand(1) < self.p: + return F.rgb_to_grayscale(img, num_output_channels=num_output_channels) + return img + + def __repr__(self): + return self.__class__.__name__ + f"(p={self.p})" + + +class RandomErasing(torch.nn.Module): + """Randomly selects a rectangle region in an torch Tensor image and erases its pixels. + This transform does not support PIL Image. + 'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/abs/1708.04896 + + Args: + p: probability that the random erasing operation will be performed. + scale: range of proportion of erased area against input image. + ratio: range of aspect ratio of erased area. + value: erasing value. Default is 0. If a single int, it is used to + erase all pixels. If a tuple of length 3, it is used to erase + R, G, B channels respectively. + If a str of 'random', erasing each pixel with random values. + inplace: boolean to make this transform inplace. Default set to False. + + Returns: + Erased Image. + + Example: + >>> transform = transforms.Compose([ + >>> transforms.RandomHorizontalFlip(), + >>> transforms.PILToTensor(), + >>> transforms.ConvertImageDtype(torch.float), + >>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + >>> transforms.RandomErasing(), + >>> ]) + """ + + def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False): + super().__init__() + if not isinstance(value, (numbers.Number, str, tuple, list)): + raise TypeError("Argument value should be either a number or str or a sequence") + if isinstance(value, str) and value != "random": + raise ValueError("If value is str, it should be 'random'") + if not isinstance(scale, (tuple, list)): + raise TypeError("Scale should be a sequence") + if not isinstance(ratio, (tuple, list)): + raise TypeError("Ratio should be a sequence") + if (scale[0] > scale[1]) or (ratio[0] > ratio[1]): + warnings.warn("Scale and ratio should be of kind (min, max)") + if scale[0] < 0 or scale[1] > 1: + raise ValueError("Scale should be between 0 and 1") + if p < 0 or p > 1: + raise ValueError("Random erasing probability should be between 0 and 1") + + self.p = p + self.scale = scale + self.ratio = ratio + self.value = value + self.inplace = inplace + + @staticmethod + def get_params( + img: Tensor, scale: Tuple[float, float], ratio: Tuple[float, float], value: Optional[List[float]] = None + ) -> Tuple[int, int, int, int, Tensor]: + """Get parameters for ``erase`` for a random erasing. + + Args: + img (Tensor): Tensor image to be erased. + scale (sequence): range of proportion of erased area against input image. + ratio (sequence): range of aspect ratio of erased area. + value (list, optional): erasing value. If None, it is interpreted as "random" + (erasing each pixel with random values). If ``len(value)`` is 1, it is interpreted as a number, + i.e. ``value[0]``. + + Returns: + tuple: params (i, j, h, w, v) to be passed to ``erase`` for random erasing. + """ + img_c, img_h, img_w = img.shape[-3], img.shape[-2], img.shape[-1] + area = img_h * img_w + + log_ratio = torch.log(torch.tensor(ratio)) + for _ in range(10): + erase_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item() + aspect_ratio = torch.exp(torch.empty(1).uniform_(log_ratio[0], log_ratio[1])).item() + + h = int(round(math.sqrt(erase_area * aspect_ratio))) + w = int(round(math.sqrt(erase_area / aspect_ratio))) + if not (h < img_h and w < img_w): + continue + + if value is None: + v = torch.empty([img_c, h, w], dtype=torch.float32).normal_() + else: + v = torch.tensor(value)[:, None, None] + + i = torch.randint(0, img_h - h + 1, size=(1,)).item() + j = torch.randint(0, img_w - w + 1, size=(1,)).item() + return i, j, h, w, v + + # Return original image + return 0, 0, img_h, img_w, img + + def forward(self, img): + """ + Args: + img (Tensor): Tensor image to be erased. + + Returns: + img (Tensor): Erased Tensor image. + """ + if torch.rand(1) < self.p: + + # cast self.value to script acceptable type + if isinstance(self.value, (int, float)): + value = [ + self.value, + ] + elif isinstance(self.value, str): + value = None + elif isinstance(self.value, tuple): + value = list(self.value) + else: + value = self.value + + if value is not None and not (len(value) in (1, img.shape[-3])): + raise ValueError( + "If value is a sequence, it should have either a single value or " + f"{img.shape[-3]} (number of input channels)" + ) + + x, y, h, w, v = self.get_params(img, scale=self.scale, ratio=self.ratio, value=value) + return F.erase(img, x, y, h, w, v, self.inplace) + return img + + def __repr__(self): + s = f"(p={self.p}, " + s += f"scale={self.scale}, " + s += f"ratio={self.ratio}, " + s += f"value={self.value}, " + s += f"inplace={self.inplace})" + return self.__class__.__name__ + s + + +class GaussianBlur(torch.nn.Module): + """Blurs image with randomly chosen Gaussian blur. + If the image is torch Tensor, it is expected + to have [..., C, H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + kernel_size (int or sequence): Size of the Gaussian kernel. + sigma (float or tuple of float (min, max)): Standard deviation to be used for + creating kernel to perform blurring. If float, sigma is fixed. If it is tuple + of float (min, max), sigma is chosen uniformly at random to lie in the + given range. + + Returns: + PIL Image or Tensor: Gaussian blurred version of the input image. + + """ + + def __init__(self, kernel_size, sigma=(0.1, 2.0)): + super().__init__() + self.kernel_size = _setup_size(kernel_size, "Kernel size should be a tuple/list of two integers") + for ks in self.kernel_size: + if ks <= 0 or ks % 2 == 0: + raise ValueError("Kernel size value should be an odd and positive number.") + + if isinstance(sigma, numbers.Number): + if sigma <= 0: + raise ValueError("If sigma is a single number, it must be positive.") + sigma = (sigma, sigma) + elif isinstance(sigma, Sequence) and len(sigma) == 2: + if not 0.0 < sigma[0] <= sigma[1]: + raise ValueError("sigma values should be positive and of the form (min, max).") + else: + raise ValueError("sigma should be a single number or a list/tuple with length 2.") + + self.sigma = sigma + + @staticmethod + def get_params(sigma_min: float, sigma_max: float) -> float: + """Choose sigma for random gaussian blurring. + + Args: + sigma_min (float): Minimum standard deviation that can be chosen for blurring kernel. + sigma_max (float): Maximum standard deviation that can be chosen for blurring kernel. + + Returns: + float: Standard deviation to be passed to calculate kernel for gaussian blurring. + """ + return torch.empty(1).uniform_(sigma_min, sigma_max).item() + + def forward(self, img: Tensor) -> Tensor: + """ + Args: + img (PIL Image or Tensor): image to be blurred. + + Returns: + PIL Image or Tensor: Gaussian blurred image + """ + sigma = self.get_params(self.sigma[0], self.sigma[1]) + return F.gaussian_blur(img, self.kernel_size, [sigma, sigma]) + + def __repr__(self): + s = f"(kernel_size={self.kernel_size}, " + s += f"sigma={self.sigma})" + return self.__class__.__name__ + s + + +def _setup_size(size, error_msg): + if isinstance(size, numbers.Number): + return int(size), int(size) + + if isinstance(size, Sequence) and len(size) == 1: + return size[0], size[0] + + if len(size) != 2: + raise ValueError(error_msg) + + return size + + +def _check_sequence_input(x, name, req_sizes): + msg = req_sizes[0] if len(req_sizes) < 2 else " or ".join([str(s) for s in req_sizes]) + if not isinstance(x, Sequence): + raise TypeError(f"{name} should be a sequence of length {msg}.") + if len(x) not in req_sizes: + raise ValueError(f"{name} should be sequence of length {msg}.") + + +def _setup_angle(x, name, req_sizes=(2,)): + if isinstance(x, numbers.Number): + if x < 0: + raise ValueError(f"If {name} is a single number, it must be positive.") + x = [-x, x] + else: + _check_sequence_input(x, name, req_sizes) + + return [float(d) for d in x] + + +class RandomInvert(torch.nn.Module): + """Inverts the colors of the given image randomly with a given probability. + If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". + + Args: + p (float): probability of the image being color inverted. Default value is 0.5 + """ + + def __init__(self, p=0.5): + super().__init__() + self.p = p + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be inverted. + + Returns: + PIL Image or Tensor: Randomly color inverted image. + """ + if torch.rand(1).item() < self.p: + return F.invert(img) + return img + + def __repr__(self): + return self.__class__.__name__ + f"(p={self.p})" + + +class RandomPosterize(torch.nn.Module): + """Posterize the image randomly with a given probability by reducing the + number of bits for each color channel. If the image is torch Tensor, it should be of type torch.uint8, + and it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". + + Args: + bits (int): number of bits to keep for each channel (0-8) + p (float): probability of the image being color inverted. Default value is 0.5 + """ + + def __init__(self, bits, p=0.5): + super().__init__() + self.bits = bits + self.p = p + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be posterized. + + Returns: + PIL Image or Tensor: Randomly posterized image. + """ + if torch.rand(1).item() < self.p: + return F.posterize(img, self.bits) + return img + + def __repr__(self): + return self.__class__.__name__ + f"(bits={self.bits},p={self.p})" + + +class RandomSolarize(torch.nn.Module): + """Solarize the image randomly with a given probability by inverting all pixel + values above a threshold. If img is a Tensor, it is expected to be in [..., 1 or 3, H, W] format, + where ... means it can have an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". + + Args: + threshold (float): all pixels equal or above this value are inverted. + p (float): probability of the image being color inverted. Default value is 0.5 + """ + + def __init__(self, threshold, p=0.5): + super().__init__() + self.threshold = threshold + self.p = p + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be solarized. + + Returns: + PIL Image or Tensor: Randomly solarized image. + """ + if torch.rand(1).item() < self.p: + return F.solarize(img, self.threshold) + return img + + def __repr__(self): + return self.__class__.__name__ + f"(threshold={self.threshold},p={self.p})" + + +class RandomAdjustSharpness(torch.nn.Module): + """Adjust the sharpness of the image randomly with a given probability. If the image is torch Tensor, + it is expected to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + sharpness_factor (float): How much to adjust the sharpness. Can be + any non negative number. 0 gives a blurred image, 1 gives the + original image while 2 increases the sharpness by a factor of 2. + p (float): probability of the image being color inverted. Default value is 0.5 + """ + + def __init__(self, sharpness_factor, p=0.5): + super().__init__() + self.sharpness_factor = sharpness_factor + self.p = p + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be sharpened. + + Returns: + PIL Image or Tensor: Randomly sharpened image. + """ + if torch.rand(1).item() < self.p: + return F.adjust_sharpness(img, self.sharpness_factor) + return img + + def __repr__(self): + return self.__class__.__name__ + f"(sharpness_factor={self.sharpness_factor},p={self.p})" + + +class RandomAutocontrast(torch.nn.Module): + """Autocontrast the pixels of the given image randomly with a given probability. + If the image is torch Tensor, it is expected + to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "L" or "RGB". + + Args: + p (float): probability of the image being autocontrasted. Default value is 0.5 + """ + + def __init__(self, p=0.5): + super().__init__() + self.p = p + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be autocontrasted. + + Returns: + PIL Image or Tensor: Randomly autocontrasted image. + """ + if torch.rand(1).item() < self.p: + return F.autocontrast(img) + return img + + def __repr__(self): + return self.__class__.__name__ + f"(p={self.p})" + + +class RandomEqualize(torch.nn.Module): + """Equalize the histogram of the given image randomly with a given probability. + If the image is torch Tensor, it is expected + to have [..., 1 or 3, H, W] shape, where ... means an arbitrary number of leading dimensions. + If img is PIL Image, it is expected to be in mode "P", "L" or "RGB". + + Args: + p (float): probability of the image being equalized. Default value is 0.5 + """ + + def __init__(self, p=0.5): + super().__init__() + self.p = p + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Image to be equalized. + + Returns: + PIL Image or Tensor: Randomly equalized image. + """ + if torch.rand(1).item() < self.p: + return F.equalize(img) + return img + + def __repr__(self): + return self.__class__.__name__ + f"(p={self.p})" diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py new file mode 100644 index 0000000000..10783c8e53 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py @@ -0,0 +1,1297 @@ +import torch +import math +import random +from PIL import Image +try: + import accimage +except ImportError: + accimage = None +import numpy as np +import numbers +import types +from collections.abc import Sequence, Iterable +import warnings + +from . import functional as F + + +__all__ = ["Compose", "ToTensor", "ToPILImage", "Normalize", "Resize", "Scale", "CenterCrop", "Pad", + "Lambda", "RandomApply", "RandomChoice", "RandomOrder", "RandomCrop", "RandomHorizontalFlip", + "RandomVerticalFlip", "RandomResizedCrop", "RandomSizedCrop", "FiveCrop", "TenCrop", "LinearTransformation", + "ColorJitter", "RandomRotation", "RandomAffine", "Grayscale", "RandomGrayscale", + "RandomPerspective", "RandomErasing"] + +_pil_interpolation_to_str = { + Image.NEAREST: 'PIL.Image.NEAREST', + Image.BILINEAR: 'PIL.Image.BILINEAR', + Image.BICUBIC: 'PIL.Image.BICUBIC', + Image.LANCZOS: 'PIL.Image.LANCZOS', + Image.HAMMING: 'PIL.Image.HAMMING', + Image.BOX: 'PIL.Image.BOX', +} + + +def _get_image_size(img): + if F._is_pil_image(img): + return img.size + elif isinstance(img, torch.Tensor) and img.dim() > 2: + return img.shape[-2:][::-1] + else: + raise TypeError("Unexpected type {}".format(type(img))) + + +class Compose(object): + """Composes several transforms together. + + Args: + transforms (list of ``Transform`` objects): list of transforms to compose. + + Example: + >>> transforms.Compose([ + >>> transforms.CenterCrop(10), + >>> transforms.ToTensor(), + >>> ]) + """ + + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, img): + for t in self.transforms: + img = t(img) + return img + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.transforms: + format_string += '\n' + format_string += ' {0}'.format(t) + format_string += '\n)' + return format_string + + +class ToTensor(object): + """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. + + Converts a PIL Image or numpy.ndarray (H x W x C) in the range + [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] + if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1) + or if the numpy.ndarray has dtype = np.uint8 + + In the other cases, tensors are returned without scaling. + """ + + def __call__(self, pic): + """ + Args: + pic (PIL Image or numpy.ndarray): Image to be converted to tensor. + + Returns: + Tensor: Converted image. + """ + return F.to_tensor(pic) + + def __repr__(self): + return self.__class__.__name__ + '()' + + +class ToPILImage(object): + """Convert a tensor or an ndarray to PIL Image. + + Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape + H x W x C to a PIL Image while preserving the value range. + + Args: + mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). + If ``mode`` is ``None`` (default) there are some assumptions made about the input data: + - If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``. + - If the input has 3 channels, the ``mode`` is assumed to be ``RGB``. + - If the input has 2 channels, the ``mode`` is assumed to be ``LA``. + - If the input has 1 channel, the ``mode`` is determined by the data type (i.e ``int``, ``float``, + ``short``). + + .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes + """ + def __init__(self, mode=None): + self.mode = mode + + def __call__(self, pic): + """ + Args: + pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. + + Returns: + PIL Image: Image converted to PIL Image. + + """ + return F.to_pil_image(pic, self.mode) + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + if self.mode is not None: + format_string += 'mode={0}'.format(self.mode) + format_string += ')' + return format_string + + +class Normalize(object): + """Normalize a tensor image with mean and standard deviation. + Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform + will normalize each channel of the input ``torch.*Tensor`` i.e. + ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` + + .. note:: + This transform acts out of place, i.e., it does not mutate the input tensor. + + Args: + mean (sequence): Sequence of means for each channel. + std (sequence): Sequence of standard deviations for each channel. + inplace(bool,optional): Bool to make this operation in-place. + + """ + + def __init__(self, mean, std, inplace=False): + self.mean = mean + self.std = std + self.inplace = inplace + + def __call__(self, tensor): + """ + Args: + tensor (Tensor): Tensor image of size (C, H, W) to be normalized. + + Returns: + Tensor: Normalized Tensor image. + """ + return F.normalize(tensor, self.mean, self.std, self.inplace) + + def __repr__(self): + return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std) + + +class Resize(object): + """Resize the input PIL Image to the given size. + + Args: + size (sequence or int): Desired output size. If size is a sequence like + (h, w), output size will be matched to this. If size is an int, + smaller edge of the image will be matched to this number. + i.e, if height > width, then image will be rescaled to + (size * height / width, size) + interpolation (int, optional): Desired interpolation. Default is + ``PIL.Image.BILINEAR`` + """ + + def __init__(self, size, interpolation=Image.BILINEAR): + assert isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2) + self.size = size + self.interpolation = interpolation + + def __call__(self, img): + """ + Args: + img (PIL Image): Image to be scaled. + + Returns: + PIL Image: Rescaled image. + """ + return F.resize(img, self.size, self.interpolation) + + def __repr__(self): + interpolate_str = _pil_interpolation_to_str[self.interpolation] + return self.__class__.__name__ + '(size={0}, interpolation={1})'.format(self.size, interpolate_str) + + +class Scale(Resize): + """ + Note: This transform is deprecated in favor of Resize. + """ + def __init__(self, *args, **kwargs): + warnings.warn("The use of the transforms.Scale transform is deprecated, " + + "please use transforms.Resize instead.") + super(Scale, self).__init__(*args, **kwargs) + + +class CenterCrop(object): + """Crops the given PIL Image at the center. + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. + """ + + def __init__(self, size): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + + def __call__(self, img): + """ + Args: + img (PIL Image): Image to be cropped. + + Returns: + PIL Image: Cropped image. + """ + return F.center_crop(img, self.size) + + def __repr__(self): + return self.__class__.__name__ + '(size={0})'.format(self.size) + + +class Pad(object): + """Pad the given PIL Image on all sides with the given "pad" value. + + Args: + padding (int or tuple): Padding on each border. If a single int is provided this + is used to pad all borders. If tuple of length 2 is provided this is the padding + on left/right and top/bottom respectively. If a tuple of length 4 is provided + this is the padding for the left, top, right and bottom borders + respectively. + fill (int or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of + length 3, it is used to fill R, G, B channels respectively. + This value is only used when the padding_mode is constant + padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. + Default is constant. + + - constant: pads with a constant value, this value is specified with fill + + - edge: pads with the last value at the edge of the image + + - reflect: pads with reflection of image without repeating the last value on the edge + + For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode + will result in [3, 2, 1, 2, 3, 4, 3, 2] + + - symmetric: pads with reflection of image repeating the last value on the edge + + For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode + will result in [2, 1, 1, 2, 3, 4, 4, 3] + """ + + def __init__(self, padding, fill=0, padding_mode='constant'): + assert isinstance(padding, (numbers.Number, tuple)) + assert isinstance(fill, (numbers.Number, str, tuple)) + assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] + if isinstance(padding, Sequence) and len(padding) not in [2, 4]: + raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " + + "{} element tuple".format(len(padding))) + + self.padding = padding + self.fill = fill + self.padding_mode = padding_mode + + def __call__(self, img): + """ + Args: + img (PIL Image): Image to be padded. + + Returns: + PIL Image: Padded image. + """ + return F.pad(img, self.padding, self.fill, self.padding_mode) + + def __repr__(self): + return self.__class__.__name__ + '(padding={0}, fill={1}, padding_mode={2})'.\ + format(self.padding, self.fill, self.padding_mode) + + +class Lambda(object): + """Apply a user-defined lambda as a transform. + + Args: + lambd (function): Lambda/function to be used for transform. + """ + + def __init__(self, lambd): + assert callable(lambd), repr(type(lambd).__name__) + " object is not callable" + self.lambd = lambd + + def __call__(self, img): + return self.lambd(img) + + def __repr__(self): + return self.__class__.__name__ + '()' + + +class RandomTransforms(object): + """Base class for a list of transformations with randomness + + Args: + transforms (list or tuple): list of transformations + """ + + def __init__(self, transforms): + assert isinstance(transforms, (list, tuple)) + self.transforms = transforms + + def __call__(self, *args, **kwargs): + raise NotImplementedError() + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.transforms: + format_string += '\n' + format_string += ' {0}'.format(t) + format_string += '\n)' + return format_string + + +class RandomApply(RandomTransforms): + """Apply randomly a list of transformations with a given probability + + Args: + transforms (list or tuple): list of transformations + p (float): probability + """ + + def __init__(self, transforms, p=0.5): + super(RandomApply, self).__init__(transforms) + self.p = p + + def __call__(self, img): + if self.p < random.random(): + return img + for t in self.transforms: + img = t(img) + return img + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + format_string += '\n p={}'.format(self.p) + for t in self.transforms: + format_string += '\n' + format_string += ' {0}'.format(t) + format_string += '\n)' + return format_string + + +class RandomOrder(RandomTransforms): + """Apply a list of transformations in a random order + """ + def __call__(self, img): + order = list(range(len(self.transforms))) + random.shuffle(order) + for i in order: + img = self.transforms[i](img) + return img + + +class RandomChoice(RandomTransforms): + """Apply single transformation randomly picked from a list + """ + def __call__(self, img): + t = random.choice(self.transforms) + return t(img) + + +class RandomCrop(object): + """Crop the given PIL Image at a random location. + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. + padding (int or sequence, optional): Optional padding on each border + of the image. Default is None, i.e no padding. If a sequence of length + 4 is provided, it is used to pad left, top, right, bottom borders + respectively. If a sequence of length 2 is provided, it is used to + pad left/right, top/bottom borders, respectively. + pad_if_needed (boolean): It will pad the image if smaller than the + desired size to avoid raising an exception. Since cropping is done + after padding, the padding seems to be done at a random offset. + fill: Pixel fill value for constant fill. Default is 0. If a tuple of + length 3, it is used to fill R, G, B channels respectively. + This value is only used when the padding_mode is constant + padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. + + - constant: pads with a constant value, this value is specified with fill + + - edge: pads with the last value on the edge of the image + + - reflect: pads with reflection of image (without repeating the last value on the edge) + + padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode + will result in [3, 2, 1, 2, 3, 4, 3, 2] + + - symmetric: pads with reflection of image (repeating the last value on the edge) + + padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode + will result in [2, 1, 1, 2, 3, 4, 4, 3] + + """ + + def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode='constant'): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + self.padding = padding + self.pad_if_needed = pad_if_needed + self.fill = fill + self.padding_mode = padding_mode + + @staticmethod + def get_params(img, output_size): + """Get parameters for ``crop`` for a random crop. + + Args: + img (PIL Image): Image to be cropped. + output_size (tuple): Expected output size of the crop. + + Returns: + tuple: params (i, j, h, w) to be passed to ``crop`` for random crop. + """ + w, h = _get_image_size(img) + th, tw = output_size + if w == tw and h == th: + return 0, 0, h, w + + i = random.randint(0, h - th) + j = random.randint(0, w - tw) + return i, j, th, tw + + def __call__(self, img): + """ + Args: + img (PIL Image): Image to be cropped. + + Returns: + PIL Image: Cropped image. + """ + if self.padding is not None: + img = F.pad(img, self.padding, self.fill, self.padding_mode) + + # pad the width if needed + if self.pad_if_needed and img.size[0] < self.size[1]: + img = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode) + # pad the height if needed + if self.pad_if_needed and img.size[1] < self.size[0]: + img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode) + + i, j, h, w = self.get_params(img, self.size) + + return F.crop(img, i, j, h, w) + + def __repr__(self): + return self.__class__.__name__ + '(size={0}, padding={1})'.format(self.size, self.padding) + + +class RandomHorizontalFlip(object): + """Horizontally flip the given PIL Image randomly with a given probability. + + Args: + p (float): probability of the image being flipped. Default value is 0.5 + """ + + def __init__(self, p=0.5): + self.p = p + + def __call__(self, img): + """ + Args: + img (PIL Image): Image to be flipped. + + Returns: + PIL Image: Randomly flipped image. + """ + if random.random() < self.p: + return F.hflip(img) + return img + + def __repr__(self): + return self.__class__.__name__ + '(p={})'.format(self.p) + + +class RandomVerticalFlip(object): + """Vertically flip the given PIL Image randomly with a given probability. + + Args: + p (float): probability of the image being flipped. Default value is 0.5 + """ + + def __init__(self, p=0.5): + self.p = p + + def __call__(self, img): + """ + Args: + img (PIL Image): Image to be flipped. + + Returns: + PIL Image: Randomly flipped image. + """ + if random.random() < self.p: + return F.vflip(img) + return img + + def __repr__(self): + return self.__class__.__name__ + '(p={})'.format(self.p) + + +class RandomPerspective(object): + """Performs Perspective transformation of the given PIL Image randomly with a given probability. + + Args: + interpolation : Default- Image.BICUBIC + + p (float): probability of the image being perspectively transformed. Default value is 0.5 + + distortion_scale(float): it controls the degree of distortion and ranges from 0 to 1. Default value is 0.5. + + fill (3-tuple or int): RGB pixel fill value for area outside the rotated image. + If int, it is used for all channels respectively. Default value is 0. + """ + + def __init__(self, distortion_scale=0.5, p=0.5, interpolation=Image.BICUBIC, fill=0): + self.p = p + self.interpolation = interpolation + self.distortion_scale = distortion_scale + self.fill = fill + + def __call__(self, img): + """ + Args: + img (PIL Image): Image to be Perspectively transformed. + + Returns: + PIL Image: Random perspectivley transformed image. + """ + if not F._is_pil_image(img): + raise TypeError('img should be PIL Image. Got {}'.format(type(img))) + + if random.random() < self.p: + width, height = img.size + startpoints, endpoints = self.get_params(width, height, self.distortion_scale) + return F.perspective(img, startpoints, endpoints, self.interpolation, self.fill) + return img + + @staticmethod + def get_params(width, height, distortion_scale): + """Get parameters for ``perspective`` for a random perspective transform. + + Args: + width : width of the image. + height : height of the image. + + Returns: + List containing [top-left, top-right, bottom-right, bottom-left] of the original image, + List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image. + """ + half_height = int(height / 2) + half_width = int(width / 2) + topleft = (random.randint(0, int(distortion_scale * half_width)), + random.randint(0, int(distortion_scale * half_height))) + topright = (random.randint(width - int(distortion_scale * half_width) - 1, width - 1), + random.randint(0, int(distortion_scale * half_height))) + botright = (random.randint(width - int(distortion_scale * half_width) - 1, width - 1), + random.randint(height - int(distortion_scale * half_height) - 1, height - 1)) + botleft = (random.randint(0, int(distortion_scale * half_width)), + random.randint(height - int(distortion_scale * half_height) - 1, height - 1)) + startpoints = [(0, 0), (width - 1, 0), (width - 1, height - 1), (0, height - 1)] + endpoints = [topleft, topright, botright, botleft] + return startpoints, endpoints + + def __repr__(self): + return self.__class__.__name__ + '(p={})'.format(self.p) + + +class RandomResizedCrop(object): + """Crop the given PIL Image to random size and aspect ratio. + + A crop of random size (default: of 0.08 to 1.0) of the original size and a random + aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop + is finally resized to given size. + This is popularly used to train the Inception networks. + + Args: + size: expected output size of each edge + scale: range of size of the origin size cropped + ratio: range of aspect ratio of the origin aspect ratio cropped + interpolation: Default: PIL.Image.BILINEAR + """ + + def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR): + if isinstance(size, (tuple, list)): + self.size = size + else: + self.size = (size, size) + if (scale[0] > scale[1]) or (ratio[0] > ratio[1]): + warnings.warn("range should be of kind (min, max)") + + self.interpolation = interpolation + self.scale = scale + self.ratio = ratio + + @staticmethod + def get_params(img, scale, ratio): + """Get parameters for ``crop`` for a random sized crop. + + Args: + img (PIL Image): Image to be cropped. + scale (tuple): range of size of the origin size cropped + ratio (tuple): range of aspect ratio of the origin aspect ratio cropped + + Returns: + tuple: params (i, j, h, w) to be passed to ``crop`` for a random + sized crop. + """ + width, height = _get_image_size(img) + area = height * width + + for _ in range(10): + target_area = random.uniform(*scale) * area + log_ratio = (math.log(ratio[0]), math.log(ratio[1])) + aspect_ratio = math.exp(random.uniform(*log_ratio)) + + w = int(round(math.sqrt(target_area * aspect_ratio))) + h = int(round(math.sqrt(target_area / aspect_ratio))) + + if 0 < w <= width and 0 < h <= height: + i = random.randint(0, height - h) + j = random.randint(0, width - w) + return i, j, h, w + + # Fallback to central crop + in_ratio = float(width) / float(height) + if (in_ratio < min(ratio)): + w = width + h = int(round(w / min(ratio))) + elif (in_ratio > max(ratio)): + h = height + w = int(round(h * max(ratio))) + else: # whole image + w = width + h = height + i = (height - h) // 2 + j = (width - w) // 2 + return i, j, h, w + + def __call__(self, img): + """ + Args: + img (PIL Image): Image to be cropped and resized. + + Returns: + PIL Image: Randomly cropped and resized image. + """ + i, j, h, w = self.get_params(img, self.scale, self.ratio) + return F.resized_crop(img, i, j, h, w, self.size, self.interpolation) + + def __repr__(self): + interpolate_str = _pil_interpolation_to_str[self.interpolation] + format_string = self.__class__.__name__ + '(size={0}'.format(self.size) + format_string += ', scale={0}'.format(tuple(round(s, 4) for s in self.scale)) + format_string += ', ratio={0}'.format(tuple(round(r, 4) for r in self.ratio)) + format_string += ', interpolation={0})'.format(interpolate_str) + return format_string + + +class RandomSizedCrop(RandomResizedCrop): + """ + Note: This transform is deprecated in favor of RandomResizedCrop. + """ + def __init__(self, *args, **kwargs): + warnings.warn("The use of the transforms.RandomSizedCrop transform is deprecated, " + + "please use transforms.RandomResizedCrop instead.") + super(RandomSizedCrop, self).__init__(*args, **kwargs) + + +class FiveCrop(object): + """Crop the given PIL Image into four corners and the central crop + + .. Note:: + This transform returns a tuple of images and there may be a mismatch in the number of + inputs and targets your Dataset returns. See below for an example of how to deal with + this. + + Args: + size (sequence or int): Desired output size of the crop. If size is an ``int`` + instead of sequence like (h, w), a square crop of size (size, size) is made. + + Example: + >>> transform = Compose([ + >>> FiveCrop(size), # this is a list of PIL Images + >>> Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor + >>> ]) + >>> #In your test loop you can do the following: + >>> input, target = batch # input is a 5d tensor, target is 2d + >>> bs, ncrops, c, h, w = input.size() + >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops + >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops + """ + + def __init__(self, size): + self.size = size + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + assert len(size) == 2, "Please provide only two dimensions (h, w) for size." + self.size = size + + def __call__(self, img): + return F.five_crop(img, self.size) + + def __repr__(self): + return self.__class__.__name__ + '(size={0})'.format(self.size) + + +class TenCrop(object): + """Crop the given PIL Image into four corners and the central crop plus the flipped version of + these (horizontal flipping is used by default) + + .. Note:: + This transform returns a tuple of images and there may be a mismatch in the number of + inputs and targets your Dataset returns. See below for an example of how to deal with + this. + + Args: + size (sequence or int): Desired output size of the crop. If size is an + int instead of sequence like (h, w), a square crop (size, size) is + made. + vertical_flip (bool): Use vertical flipping instead of horizontal + + Example: + >>> transform = Compose([ + >>> TenCrop(size), # this is a list of PIL Images + >>> Lambda(lambda crops: torch.stack([ToTensor()(crop) for crop in crops])) # returns a 4D tensor + >>> ]) + >>> #In your test loop you can do the following: + >>> input, target = batch # input is a 5d tensor, target is 2d + >>> bs, ncrops, c, h, w = input.size() + >>> result = model(input.view(-1, c, h, w)) # fuse batch size and ncrops + >>> result_avg = result.view(bs, ncrops, -1).mean(1) # avg over crops + """ + + def __init__(self, size, vertical_flip=False): + self.size = size + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + assert len(size) == 2, "Please provide only two dimensions (h, w) for size." + self.size = size + self.vertical_flip = vertical_flip + + def __call__(self, img): + return F.ten_crop(img, self.size, self.vertical_flip) + + def __repr__(self): + return self.__class__.__name__ + '(size={0}, vertical_flip={1})'.format(self.size, self.vertical_flip) + + +class LinearTransformation(object): + """Transform a tensor image with a square transformation matrix and a mean_vector computed + offline. + Given transformation_matrix and mean_vector, will flatten the torch.*Tensor and + subtract mean_vector from it which is then followed by computing the dot + product with the transformation matrix and then reshaping the tensor to its + original shape. + + Applications: + whitening transformation: Suppose X is a column vector zero-centered data. + Then compute the data covariance matrix [D x D] with torch.mm(X.t(), X), + perform SVD on this matrix and pass it as transformation_matrix. + + Args: + transformation_matrix (Tensor): tensor [D x D], D = C x H x W + mean_vector (Tensor): tensor [D], D = C x H x W + """ + + def __init__(self, transformation_matrix, mean_vector): + if transformation_matrix.size(0) != transformation_matrix.size(1): + raise ValueError("transformation_matrix should be square. Got " + + "[{} x {}] rectangular matrix.".format(*transformation_matrix.size())) + + if mean_vector.size(0) != transformation_matrix.size(0): + raise ValueError("mean_vector should have the same length {}".format(mean_vector.size(0)) + + " as any one of the dimensions of the transformation_matrix [{} x {}]" + .format(transformation_matrix.size())) + + self.transformation_matrix = transformation_matrix + self.mean_vector = mean_vector + + def __call__(self, tensor): + """ + Args: + tensor (Tensor): Tensor image of size (C, H, W) to be whitened. + + Returns: + Tensor: Transformed image. + """ + if tensor.size(0) * tensor.size(1) * tensor.size(2) != self.transformation_matrix.size(0): + raise ValueError("tensor and transformation matrix have incompatible shape." + + "[{} x {} x {}] != ".format(*tensor.size()) + + "{}".format(self.transformation_matrix.size(0))) + flat_tensor = tensor.view(1, -1) - self.mean_vector + transformed_tensor = torch.mm(flat_tensor, self.transformation_matrix) + tensor = transformed_tensor.view(tensor.size()) + return tensor + + def __repr__(self): + format_string = self.__class__.__name__ + '(transformation_matrix=' + format_string += (str(self.transformation_matrix.tolist()) + ')') + format_string += (", (mean_vector=" + str(self.mean_vector.tolist()) + ')') + return format_string + + +class ColorJitter(object): + """Randomly change the brightness, contrast and saturation of an image. + + Args: + brightness (float or tuple of float (min, max)): How much to jitter brightness. + brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness] + or the given [min, max]. Should be non negative numbers. + contrast (float or tuple of float (min, max)): How much to jitter contrast. + contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast] + or the given [min, max]. Should be non negative numbers. + saturation (float or tuple of float (min, max)): How much to jitter saturation. + saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation] + or the given [min, max]. Should be non negative numbers. + hue (float or tuple of float (min, max)): How much to jitter hue. + hue_factor is chosen uniformly from [-hue, hue] or the given [min, max]. + Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5. + """ + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): + self.brightness = self._check_input(brightness, 'brightness') + self.contrast = self._check_input(contrast, 'contrast') + self.saturation = self._check_input(saturation, 'saturation') + self.hue = self._check_input(hue, 'hue', center=0, bound=(-0.5, 0.5), + clip_first_on_zero=False) + + def _check_input(self, value, name, center=1, bound=(0, float('inf')), clip_first_on_zero=True): + if isinstance(value, numbers.Number): + if value < 0: + raise ValueError("If {} is a single number, it must be non negative.".format(name)) + value = [center - value, center + value] + if clip_first_on_zero: + value[0] = max(value[0], 0) + elif isinstance(value, (tuple, list)) and len(value) == 2: + if not bound[0] <= value[0] <= value[1] <= bound[1]: + raise ValueError("{} values should be between {}".format(name, bound)) + else: + raise TypeError("{} should be a single number or a list/tuple with lenght 2.".format(name)) + + # if value is 0 or (1., 1.) for brightness/contrast/saturation + # or (0., 0.) for hue, do nothing + if value[0] == value[1] == center: + value = None + return value + + @staticmethod + def get_params(brightness, contrast, saturation, hue): + """Get a randomized transform to be applied on image. + + Arguments are same as that of __init__. + + Returns: + Transform which randomly adjusts brightness, contrast and + saturation in a random order. + """ + transforms = [] + + if brightness is not None: + brightness_factor = random.uniform(brightness[0], brightness[1]) + transforms.append(Lambda(lambda img: F.adjust_brightness(img, brightness_factor))) + + if contrast is not None: + contrast_factor = random.uniform(contrast[0], contrast[1]) + transforms.append(Lambda(lambda img: F.adjust_contrast(img, contrast_factor))) + + if saturation is not None: + saturation_factor = random.uniform(saturation[0], saturation[1]) + transforms.append(Lambda(lambda img: F.adjust_saturation(img, saturation_factor))) + + if hue is not None: + hue_factor = random.uniform(hue[0], hue[1]) + transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor))) + + random.shuffle(transforms) + transform = Compose(transforms) + + return transform + + def __call__(self, img): + """ + Args: + img (PIL Image): Input image. + + Returns: + PIL Image: Color jittered image. + """ + transform = self.get_params(self.brightness, self.contrast, + self.saturation, self.hue) + return transform(img) + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + format_string += 'brightness={0}'.format(self.brightness) + format_string += ', contrast={0}'.format(self.contrast) + format_string += ', saturation={0}'.format(self.saturation) + format_string += ', hue={0})'.format(self.hue) + return format_string + + +class RandomRotation(object): + """Rotate the image by angle. + + Args: + degrees (sequence or float or int): Range of degrees to select from. + If degrees is a number instead of sequence like (min, max), the range of degrees + will be (-degrees, +degrees). + resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional): + An optional resampling filter. See `filters`_ for more information. + If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST. + expand (bool, optional): Optional expansion flag. + If true, expands the output to make it large enough to hold the entire rotated image. + If false or omitted, make the output image the same size as the input image. + Note that the expand flag assumes rotation around the center and no translation. + center (2-tuple, optional): Optional center of rotation. + Origin is the upper left corner. + Default is the center of the image. + fill (n-tuple or int or float): Pixel fill value for area outside the rotated + image. If int or float, the value is used for all bands respectively. + Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``. + + .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters + + """ + + def __init__(self, degrees, resample=False, expand=False, center=None, fill=None): + if isinstance(degrees, numbers.Number): + if degrees < 0: + raise ValueError("If degrees is a single number, it must be positive.") + self.degrees = (-degrees, degrees) + else: + if len(degrees) != 2: + raise ValueError("If degrees is a sequence, it must be of len 2.") + self.degrees = degrees + + self.resample = resample + self.expand = expand + self.center = center + self.fill = fill + + @staticmethod + def get_params(degrees): + """Get parameters for ``rotate`` for a random rotation. + + Returns: + sequence: params to be passed to ``rotate`` for random rotation. + """ + angle = random.uniform(degrees[0], degrees[1]) + + return angle + + def __call__(self, img): + """ + Args: + img (PIL Image): Image to be rotated. + + Returns: + PIL Image: Rotated image. + """ + + angle = self.get_params(self.degrees) + + return F.rotate(img, angle, self.resample, self.expand, self.center, self.fill) + + def __repr__(self): + format_string = self.__class__.__name__ + '(degrees={0}'.format(self.degrees) + format_string += ', resample={0}'.format(self.resample) + format_string += ', expand={0}'.format(self.expand) + if self.center is not None: + format_string += ', center={0}'.format(self.center) + format_string += ')' + return format_string + + +class RandomAffine(object): + """Random affine transformation of the image keeping center invariant + + Args: + degrees (sequence or float or int): Range of degrees to select from. + If degrees is a number instead of sequence like (min, max), the range of degrees + will be (-degrees, +degrees). Set to 0 to deactivate rotations. + translate (tuple, optional): tuple of maximum absolute fraction for horizontal + and vertical translations. For example translate=(a, b), then horizontal shift + is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is + randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. + scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is + randomly sampled from the range a <= scale <= b. Will keep original scale by default. + shear (sequence or float or int, optional): Range of degrees to select from. + If shear is a number, a shear parallel to the x axis in the range (-shear, +shear) + will be apllied. Else if shear is a tuple or list of 2 values a shear parallel to the x axis in the + range (shear[0], shear[1]) will be applied. Else if shear is a tuple or list of 4 values, + a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied. + Will not apply shear by default + resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional): + An optional resampling filter. See `filters`_ for more information. + If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST. + fillcolor (tuple or int): Optional fill color (Tuple for RGB Image And int for grayscale) for the area + outside the transform in the output image.(Pillow>=5.0.0) + + .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters + + """ + + def __init__(self, degrees, translate=None, scale=None, shear=None, resample=False, fillcolor=0): + if isinstance(degrees, numbers.Number): + if degrees < 0: + raise ValueError("If degrees is a single number, it must be positive.") + self.degrees = (-degrees, degrees) + else: + assert isinstance(degrees, (tuple, list)) and len(degrees) == 2, \ + "degrees should be a list or tuple and it must be of length 2." + self.degrees = degrees + + if translate is not None: + assert isinstance(translate, (tuple, list)) and len(translate) == 2, \ + "translate should be a list or tuple and it must be of length 2." + for t in translate: + if not (0.0 <= t <= 1.0): + raise ValueError("translation values should be between 0 and 1") + self.translate = translate + + if scale is not None: + assert isinstance(scale, (tuple, list)) and len(scale) == 2, \ + "scale should be a list or tuple and it must be of length 2." + for s in scale: + if s <= 0: + raise ValueError("scale values should be positive") + self.scale = scale + + if shear is not None: + if isinstance(shear, numbers.Number): + if shear < 0: + raise ValueError("If shear is a single number, it must be positive.") + self.shear = (-shear, shear) + else: + assert isinstance(shear, (tuple, list)) and \ + (len(shear) == 2 or len(shear) == 4), \ + "shear should be a list or tuple and it must be of length 2 or 4." + # X-Axis shear with [min, max] + if len(shear) == 2: + self.shear = [shear[0], shear[1], 0., 0.] + elif len(shear) == 4: + self.shear = [s for s in shear] + else: + self.shear = shear + + self.resample = resample + self.fillcolor = fillcolor + + @staticmethod + def get_params(degrees, translate, scale_ranges, shears, img_size): + """Get parameters for affine transformation + + Returns: + sequence: params to be passed to the affine transformation + """ + angle = random.uniform(degrees[0], degrees[1]) + if translate is not None: + max_dx = translate[0] * img_size[0] + max_dy = translate[1] * img_size[1] + translations = (np.round(random.uniform(-max_dx, max_dx)), + np.round(random.uniform(-max_dy, max_dy))) + else: + translations = (0, 0) + + if scale_ranges is not None: + scale = random.uniform(scale_ranges[0], scale_ranges[1]) + else: + scale = 1.0 + + if shears is not None: + if len(shears) == 2: + shear = [random.uniform(shears[0], shears[1]), 0.] + elif len(shears) == 4: + shear = [random.uniform(shears[0], shears[1]), + random.uniform(shears[2], shears[3])] + else: + shear = 0.0 + + return angle, translations, scale, shear + + def __call__(self, img): + """ + img (PIL Image): Image to be transformed. + + Returns: + PIL Image: Affine transformed image. + """ + ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img.size) + return F.affine(img, *ret, resample=self.resample, fillcolor=self.fillcolor) + + def __repr__(self): + s = '{name}(degrees={degrees}' + if self.translate is not None: + s += ', translate={translate}' + if self.scale is not None: + s += ', scale={scale}' + if self.shear is not None: + s += ', shear={shear}' + if self.resample > 0: + s += ', resample={resample}' + if self.fillcolor != 0: + s += ', fillcolor={fillcolor}' + s += ')' + d = dict(self.__dict__) + d['resample'] = _pil_interpolation_to_str[d['resample']] + return s.format(name=self.__class__.__name__, **d) + + +class Grayscale(object): + """Convert image to grayscale. + + Args: + num_output_channels (int): (1 or 3) number of channels desired for output image + + Returns: + PIL Image: Grayscale version of the input. + - If ``num_output_channels == 1`` : returned image is single channel + - If ``num_output_channels == 3`` : returned image is 3 channel with r == g == b + + """ + + def __init__(self, num_output_channels=1): + self.num_output_channels = num_output_channels + + def __call__(self, img): + """ + Args: + img (PIL Image): Image to be converted to grayscale. + + Returns: + PIL Image: Randomly grayscaled image. + """ + return F.to_grayscale(img, num_output_channels=self.num_output_channels) + + def __repr__(self): + return self.__class__.__name__ + '(num_output_channels={0})'.format(self.num_output_channels) + + +class RandomGrayscale(object): + """Randomly convert image to grayscale with a probability of p (default 0.1). + + Args: + p (float): probability that image should be converted to grayscale. + + Returns: + PIL Image: Grayscale version of the input image with probability p and unchanged + with probability (1-p). + - If input image is 1 channel: grayscale version is 1 channel + - If input image is 3 channel: grayscale version is 3 channel with r == g == b + + """ + + def __init__(self, p=0.1): + self.p = p + + def __call__(self, img): + """ + Args: + img (PIL Image): Image to be converted to grayscale. + + Returns: + PIL Image: Randomly grayscaled image. + """ + num_output_channels = 1 if img.mode == 'L' else 3 + if random.random() < self.p: + return F.to_grayscale(img, num_output_channels=num_output_channels) + return img + + def __repr__(self): + return self.__class__.__name__ + '(p={0})'.format(self.p) + + +class RandomErasing(object): + """ Randomly selects a rectangle region in an image and erases its pixels. + 'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/pdf/1708.04896.pdf + + Args: + p: probability that the random erasing operation will be performed. + scale: range of proportion of erased area against input image. + ratio: range of aspect ratio of erased area. + value: erasing value. Default is 0. If a single int, it is used to + erase all pixels. If a tuple of length 3, it is used to erase + R, G, B channels respectively. + If a str of 'random', erasing each pixel with random values. + inplace: boolean to make this transform inplace. Default set to False. + + Returns: + Erased Image. + + # Examples: + >>> transform = transforms.Compose([ + >>> transforms.RandomHorizontalFlip(), + >>> transforms.ToTensor(), + >>> transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + >>> transforms.RandomErasing(), + >>> ]) + """ + + def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False): + assert isinstance(value, (numbers.Number, str, tuple, list)) + if (scale[0] > scale[1]) or (ratio[0] > ratio[1]): + warnings.warn("range should be of kind (min, max)") + if scale[0] < 0 or scale[1] > 1: + raise ValueError("range of scale should be between 0 and 1") + if p < 0 or p > 1: + raise ValueError("range of random erasing probability should be between 0 and 1") + + self.p = p + self.scale = scale + self.ratio = ratio + self.value = value + self.inplace = inplace + + @staticmethod + def get_params(img, scale, ratio, value=0): + """Get parameters for ``erase`` for a random erasing. + + Args: + img (Tensor): Tensor image of size (C, H, W) to be erased. + scale: range of proportion of erased area against input image. + ratio: range of aspect ratio of erased area. + + Returns: + tuple: params (i, j, h, w, v) to be passed to ``erase`` for random erasing. + """ + img_c, img_h, img_w = img.shape + area = img_h * img_w + + for _ in range(10): + erase_area = random.uniform(scale[0], scale[1]) * area + aspect_ratio = random.uniform(ratio[0], ratio[1]) + + h = int(round(math.sqrt(erase_area * aspect_ratio))) + w = int(round(math.sqrt(erase_area / aspect_ratio))) + + if h < img_h and w < img_w: + i = random.randint(0, img_h - h) + j = random.randint(0, img_w - w) + if isinstance(value, numbers.Number): + v = value + elif isinstance(value, torch._six.string_classes): + v = torch.empty([img_c, h, w], dtype=torch.float32).normal_() + elif isinstance(value, (list, tuple)): + v = torch.tensor(value, dtype=torch.float32).view(-1, 1, 1).expand(-1, h, w) + return i, j, h, w, v + + # Return original image + return 0, 0, img_h, img_w, img + + def __call__(self, img): + """ + Args: + img (Tensor): Tensor image of size (C, H, W) to be erased. + + Returns: + img (Tensor): Erased Tensor image. + """ + if random.uniform(0, 1) < self.p: + x, y, h, w, v = self.get_params(img, scale=self.scale, ratio=self.ratio, value=self.value) + return F.erase(img, x, y, h, w, v, self.inplace) + return img diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py new file mode 100644 index 0000000000..399dc3fcc5 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py @@ -0,0 +1,309 @@ +import math +import pathlib +import warnings +from typing import Union, Optional, List, Tuple, BinaryIO + +import numpy as np +import torch +from PIL import Image, ImageDraw, ImageFont, ImageColor + +__all__ = ["make_grid", "save_image", "draw_bounding_boxes", "draw_segmentation_masks"] + + +@torch.no_grad() +def make_grid( + tensor: Union[torch.Tensor, List[torch.Tensor]], + nrow: int = 8, + padding: int = 2, + normalize: bool = False, + value_range: Optional[Tuple[int, int]] = None, + scale_each: bool = False, + pad_value: int = 0, + **kwargs, +) -> torch.Tensor: + """ + Make a grid of images. + + Args: + tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W) + or a list of images all of the same size. + nrow (int, optional): Number of images displayed in each row of the grid. + The final grid size is ``(B / nrow, nrow)``. Default: ``8``. + padding (int, optional): amount of padding. Default: ``2``. + normalize (bool, optional): If True, shift the image to the range (0, 1), + by the min and max values specified by ``value_range``. Default: ``False``. + value_range (tuple, optional): tuple (min, max) where min and max are numbers, + then these numbers are used to normalize the image. By default, min and max + are computed from the tensor. + scale_each (bool, optional): If ``True``, scale each image in the batch of + images separately rather than the (min, max) over all images. Default: ``False``. + pad_value (float, optional): Value for the padded pixels. Default: ``0``. + + Returns: + grid (Tensor): the tensor containing grid of images. + """ + if not (torch.is_tensor(tensor) or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))): + raise TypeError(f"tensor or list of tensors expected, got {type(tensor)}") + + if "range" in kwargs.keys(): + warning = "range will be deprecated, please use value_range instead." + warnings.warn(warning) + value_range = kwargs["range"] + + # if list of tensors, convert to a 4D mini-batch Tensor + if isinstance(tensor, list): + tensor = torch.stack(tensor, dim=0) + + if tensor.dim() == 2: # single image H x W + tensor = tensor.unsqueeze(0) + if tensor.dim() == 3: # single image + if tensor.size(0) == 1: # if single-channel, convert to 3-channel + tensor = torch.cat((tensor, tensor, tensor), 0) + tensor = tensor.unsqueeze(0) + + if tensor.dim() == 4 and tensor.size(1) == 1: # single-channel images + tensor = torch.cat((tensor, tensor, tensor), 1) + + if normalize is True: + tensor = tensor.clone() # avoid modifying tensor in-place + if value_range is not None: + assert isinstance( + value_range, tuple + ), "value_range has to be a tuple (min, max) if specified. min and max are numbers" + + def norm_ip(img, low, high): + img.clamp_(min=low, max=high) + img.sub_(low).div_(max(high - low, 1e-5)) + + def norm_range(t, value_range): + if value_range is not None: + norm_ip(t, value_range[0], value_range[1]) + else: + norm_ip(t, float(t.min()), float(t.max())) + + if scale_each is True: + for t in tensor: # loop over mini-batch dimension + norm_range(t, value_range) + else: + norm_range(tensor, value_range) + + if tensor.size(0) == 1: + return tensor.squeeze(0) + + # make the mini-batch of images into a grid + nmaps = tensor.size(0) + xmaps = min(nrow, nmaps) + ymaps = int(math.ceil(float(nmaps) / xmaps)) + height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding) + num_channels = tensor.size(1) + grid = tensor.new_full((num_channels, height * ymaps + padding, width * xmaps + padding), pad_value) + k = 0 + for y in range(ymaps): + for x in range(xmaps): + if k >= nmaps: + break + # Tensor.copy_() is a valid method but seems to be missing from the stubs + # https://pytorch.org/docs/stable/tensors.html#torch.Tensor.copy_ + grid.narrow(1, y * height + padding, height - padding).narrow( # type: ignore[attr-defined] + 2, x * width + padding, width - padding + ).copy_(tensor[k]) + k = k + 1 + return grid + + +@torch.no_grad() +def save_image( + tensor: Union[torch.Tensor, List[torch.Tensor]], + fp: Union[str, pathlib.Path, BinaryIO], + format: Optional[str] = None, + **kwargs, +) -> None: + """ + Save a given Tensor into an image file. + + Args: + tensor (Tensor or list): Image to be saved. If given a mini-batch tensor, + saves the tensor as a grid of images by calling ``make_grid``. + fp (string or file object): A filename or a file object + format(Optional): If omitted, the format to use is determined from the filename extension. + If a file object was used instead of a filename, this parameter should always be used. + **kwargs: Other arguments are documented in ``make_grid``. + """ + + grid = make_grid(tensor, **kwargs) + # Add 0.5 after unnormalizing to [0, 255] to round to nearest integer + ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to("cpu", torch.uint8).numpy() + im = Image.fromarray(ndarr) + im.save(fp, format=format) + + +@torch.no_grad() +def draw_bounding_boxes( + image: torch.Tensor, + boxes: torch.Tensor, + labels: Optional[List[str]] = None, + colors: Optional[Union[List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]]] = None, + fill: Optional[bool] = False, + width: int = 1, + font: Optional[str] = None, + font_size: int = 10, +) -> torch.Tensor: + + """ + Draws bounding boxes on given image. + The values of the input image should be uint8 between 0 and 255. + If fill is True, Resulting Tensor should be saved as PNG image. + + Args: + image (Tensor): Tensor of shape (C x H x W) and dtype uint8. + boxes (Tensor): Tensor of size (N, 4) containing bounding boxes in (xmin, ymin, xmax, ymax) format. Note that + the boxes are absolute coordinates with respect to the image. In other words: `0 <= xmin < xmax < W` and + `0 <= ymin < ymax < H`. + labels (List[str]): List containing the labels of bounding boxes. + colors (color or list of colors, optional): List containing the colors + of the boxes or single color for all boxes. The color can be represented as + PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``. + fill (bool): If `True` fills the bounding box with specified color. + width (int): Width of bounding box. + font (str): A filename containing a TrueType font. If the file is not found in this filename, the loader may + also search in other directories, such as the `fonts/` directory on Windows or `/Library/Fonts/`, + `/System/Library/Fonts/` and `~/Library/Fonts/` on macOS. + font_size (int): The requested font size in points. + + Returns: + img (Tensor[C, H, W]): Image Tensor of dtype uint8 with bounding boxes plotted. + """ + + if not isinstance(image, torch.Tensor): + raise TypeError(f"Tensor expected, got {type(image)}") + elif image.dtype != torch.uint8: + raise ValueError(f"Tensor uint8 expected, got {image.dtype}") + elif image.dim() != 3: + raise ValueError("Pass individual images, not batches") + elif image.size(0) not in {1, 3}: + raise ValueError("Only grayscale and RGB images are supported") + + if image.size(0) == 1: + image = torch.tile(image, (3, 1, 1)) + + ndarr = image.permute(1, 2, 0).numpy() + img_to_draw = Image.fromarray(ndarr) + + img_boxes = boxes.to(torch.int64).tolist() + + if fill: + draw = ImageDraw.Draw(img_to_draw, "RGBA") + + else: + draw = ImageDraw.Draw(img_to_draw) + + txt_font = ImageFont.load_default() if font is None else ImageFont.truetype(font=font, size=font_size) + + for i, bbox in enumerate(img_boxes): + if colors is None: + color = None + elif isinstance(colors, list): + color = colors[i] + else: + color = colors + + if fill: + if color is None: + fill_color = (255, 255, 255, 100) + elif isinstance(color, str): + # This will automatically raise Error if rgb cannot be parsed. + fill_color = ImageColor.getrgb(color) + (100,) + elif isinstance(color, tuple): + fill_color = color + (100,) + draw.rectangle(bbox, width=width, outline=color, fill=fill_color) + else: + draw.rectangle(bbox, width=width, outline=color) + + if labels is not None: + margin = width + 1 + draw.text((bbox[0] + margin, bbox[1] + margin), labels[i], fill=color, font=txt_font) + + return torch.from_numpy(np.array(img_to_draw)).permute(2, 0, 1).to(dtype=torch.uint8) + + +@torch.no_grad() +def draw_segmentation_masks( + image: torch.Tensor, + masks: torch.Tensor, + alpha: float = 0.8, + colors: Optional[Union[List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]]] = None, +) -> torch.Tensor: + + """ + Draws segmentation masks on given RGB image. + The values of the input image should be uint8 between 0 and 255. + + Args: + image (Tensor): Tensor of shape (3, H, W) and dtype uint8. + masks (Tensor): Tensor of shape (num_masks, H, W) or (H, W) and dtype bool. + alpha (float): Float number between 0 and 1 denoting the transparency of the masks. + 0 means full transparency, 1 means no transparency. + colors (color or list of colors, optional): List containing the colors + of the masks or single color for all masks. The color can be represented as + PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``. + By default, random colors are generated for each mask. + + Returns: + img (Tensor[C, H, W]): Image Tensor, with segmentation masks drawn on top. + """ + + if not isinstance(image, torch.Tensor): + raise TypeError(f"The image must be a tensor, got {type(image)}") + elif image.dtype != torch.uint8: + raise ValueError(f"The image dtype must be uint8, got {image.dtype}") + elif image.dim() != 3: + raise ValueError("Pass individual images, not batches") + elif image.size()[0] != 3: + raise ValueError("Pass an RGB image. Other Image formats are not supported") + if masks.ndim == 2: + masks = masks[None, :, :] + if masks.ndim != 3: + raise ValueError("masks must be of shape (H, W) or (batch_size, H, W)") + if masks.dtype != torch.bool: + raise ValueError(f"The masks must be of dtype bool. Got {masks.dtype}") + if masks.shape[-2:] != image.shape[-2:]: + raise ValueError("The image and the masks must have the same height and width") + + num_masks = masks.size()[0] + if colors is not None and num_masks > len(colors): + raise ValueError(f"There are more masks ({num_masks}) than colors ({len(colors)})") + + if colors is None: + colors = _generate_color_palette(num_masks) + + if not isinstance(colors, list): + colors = [colors] + if not isinstance(colors[0], (tuple, str)): + raise ValueError("colors must be a tuple or a string, or a list thereof") + if isinstance(colors[0], tuple) and len(colors[0]) != 3: + raise ValueError("It seems that you passed a tuple of colors instead of a list of colors") + + out_dtype = torch.uint8 + + colors_ = [] + for color in colors: + if isinstance(color, str): + color = ImageColor.getrgb(color) + colors_.append(torch.tensor(color, dtype=out_dtype)) + + img_to_draw = image.detach().clone() + # TODO: There might be a way to vectorize this + for mask, color in zip(masks, colors_): + img_to_draw[:, mask] = color[:, None] + + out = image * (1 - alpha) + img_to_draw * alpha + return out.to(out_dtype) + + +def _generate_color_palette(num_masks: int): + palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1]) + return [tuple((i * palette) % 255) for i in range(num_masks)] + + +def _log_api_usage_once(obj: object) -> None: + torch._C._log_api_usage_once(f"{obj.__module__}.{obj.__class__.__name__}") diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py new file mode 100644 index 0000000000..1a773b3fd2 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py @@ -0,0 +1,109 @@ +import torch +import math +irange = range + + +def make_grid(tensor, nrow=8, padding=2, + normalize=False, range=None, scale_each=False, pad_value=0): + """Make a grid of images. + + Args: + tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W) + or a list of images all of the same size. + nrow (int, optional): Number of images displayed in each row of the grid. + The final grid size is ``(B / nrow, nrow)``. Default: ``8``. + padding (int, optional): amount of padding. Default: ``2``. + normalize (bool, optional): If True, shift the image to the range (0, 1), + by the min and max values specified by :attr:`range`. Default: ``False``. + range (tuple, optional): tuple (min, max) where min and max are numbers, + then these numbers are used to normalize the image. By default, min and max + are computed from the tensor. + scale_each (bool, optional): If ``True``, scale each image in the batch of + images separately rather than the (min, max) over all images. Default: ``False``. + pad_value (float, optional): Value for the padded pixels. Default: ``0``. + + Example: + See this notebook `here `_ + + """ + if not (torch.is_tensor(tensor) or + (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))): + raise TypeError('tensor or list of tensors expected, got {}'.format(type(tensor))) + + # if list of tensors, convert to a 4D mini-batch Tensor + if isinstance(tensor, list): + tensor = torch.stack(tensor, dim=0) + + if tensor.dim() == 2: # single image H x W + tensor = tensor.unsqueeze(0) + if tensor.dim() == 3: # single image + if tensor.size(0) == 1: # if single-channel, convert to 3-channel + tensor = torch.cat((tensor, tensor, tensor), 0) + tensor = tensor.unsqueeze(0) + + if tensor.dim() == 4 and tensor.size(1) == 1: # single-channel images + tensor = torch.cat((tensor, tensor, tensor), 1) + + if normalize is True: + tensor = tensor.clone() # avoid modifying tensor in-place + if range is not None: + assert isinstance(range, tuple), \ + "range has to be a tuple (min, max) if specified. min and max are numbers" + + def norm_ip(img, min, max): + img.clamp_(min=min, max=max) + img.add_(-min).div_(max - min + 1e-5) + + def norm_range(t, range): + if range is not None: + norm_ip(t, range[0], range[1]) + else: + norm_ip(t, float(t.min()), float(t.max())) + + if scale_each is True: + for t in tensor: # loop over mini-batch dimension + norm_range(t, range) + else: + norm_range(tensor, range) + + if tensor.size(0) == 1: + return tensor.squeeze(0) + + # make the mini-batch of images into a grid + nmaps = tensor.size(0) + xmaps = min(nrow, nmaps) + ymaps = int(math.ceil(float(nmaps) / xmaps)) + height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding) + num_channels = tensor.size(1) + grid = tensor.new_full((num_channels, height * ymaps + padding, width * xmaps + padding), pad_value) + k = 0 + for y in irange(ymaps): + for x in irange(xmaps): + if k >= nmaps: + break + grid.narrow(1, y * height + padding, height - padding)\ + .narrow(2, x * width + padding, width - padding)\ + .copy_(tensor[k]) + k = k + 1 + return grid + + +def save_image(tensor, fp, nrow=8, padding=2, + normalize=False, range=None, scale_each=False, pad_value=0, format=None): + """Save a given Tensor into an image file. + + Args: + tensor (Tensor or list): Image to be saved. If given a mini-batch tensor, + saves the tensor as a grid of images by calling ``make_grid``. + fp (string or file object): A filename or a file object + format(Optional): If omitted, the format to use is determined from the filename extension. + If a file object was used instead of a filename, this parameter should always be used. + **kwargs: Other arguments are documented in ``make_grid``. + """ + from PIL import Image + grid = make_grid(tensor, nrow=nrow, padding=padding, pad_value=pad_value, + normalize=normalize, range=range, scale_each=scale_each) + # Add 0.5 after unnormalizing to [0, 255] to round to nearest integer + ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy() + im = Image.fromarray(ndarr) + im.save(fp, format=format) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py new file mode 100644 index 0000000000..146fc171ca --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py @@ -0,0 +1,5 @@ +__version__ = '0.6.0' +git_version = '82fd1c85d7e42d93255ed01f763ca40d58f288e3' +from torchvision.extension import _check_cuda_version +if _check_cuda_version() > 0: + cuda = _check_cuda_version() diff --git a/PyTorch/contrib/cv/classification/SSDLite320/train.py b/PyTorch/contrib/cv/classification/SSDLite320/train.py new file mode 100644 index 0000000000..ba2de2a72d --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/train.py @@ -0,0 +1,281 @@ +r"""PyTorch Detection Training. + +To run in a multi-gpu environment, use the distributed launcher:: + +python -m torch.distributed.launch --nproc_per_node=8 train.py --world-size 8 --dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660 --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 512 --weight-decay 0.00004 --data-augmentation ssdlite + +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py --dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660 --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24 --weight-decay 0.00004 --data-augmentation ssdlite + + python -m torch.distributed.launch --nproc_per_node=$NGPU --use_env \ + train.py ... --world-size $NGPU + +The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu. + --lr 0.02 --batch-size 2 --world-size 8 +If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU. + +On top of that, for training Faster/Mask R-CNN, the default hyperparameters are + --epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3 + +Also, if you train Keypoint R-CNN, the default hyperparameters are + --epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3 +Because the number of images is smaller in the person keypoint subset of COCO, +the number of epochs should be adapted so that we have the same number of iterations. + +跑通1(py37): +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 python train.py --dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660 --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24 --weight-decay 0.00004 --data-augmentation ssdlite +Not using distributed mode (单卡跑通的) + +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 python train.py --test-only (单卡跑通) + + +torchrun --nproc_per_node=8 train.py --dataset coco --model ssdlite320_mobilenet_v3_large --epochs 660 --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr --lr 0.15 --batch-size 24\ + --weight-decay 0.00004 --data-augmentation ssdlite + +""" +import datetime +import os +import sys +import time + +import presets +import torch +import torch.utils.data +import torchvision +import torchvision.models.detection +import torchvision.models.detection.mask_rcnn +import torchvision.models.detection +import utils +from coco_utils import get_coco, get_coco_kp +from engine import train_one_epoch, evaluate +from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups + +from torch.utils.data import dataloader +from apex import amp + +os.environ['MASTER_ADDR'] = '127.0.0.1' +os.environ['MASTER_PORT'] = '29688' + +# for servers to immediately record the logs +def flush_print(func): + def new_print(*args, **kwargs): + func(*args, **kwargs) + sys.stdout.flush() + return new_print +print = flush_print(print) + +def get_dataset(name, image_set, transform, data_path): + paths = {"coco": (data_path, get_coco, 91), "coco_kp": (data_path, get_coco_kp, 2)} + p, ds_fn, num_classes = paths[name] + + ds = ds_fn(p, image_set=image_set, transforms=transform) + return ds, num_classes + +def get_transform(train, data_augmentation): + return presets.DetectionPresetTrain(data_augmentation) if train else presets.DetectionPresetEval() + + +def get_args_parser(add_help=True): + import argparse + + parser = argparse.ArgumentParser(description="PyTorch Detection Training", add_help=add_help) + + parser.add_argument("--data-path", default="", type=str, help="dataset path")#"/datasets01/COCO/022719/" + parser.add_argument("--dataset", default="coco", type=str, help="dataset name") + parser.add_argument("--model", default="ssdlite320_mobilenet_v3_large", type=str, help="model name")#"maskrcnn_resnet50_fpn" + parser.add_argument("--device", default="npu", type=str, help="device (Use cuda or cpu Default: cuda)") + parser.add_argument( + "-b", "--batch-size", default=128, type=int, help="images per gpu, the total batch size is $NGPU x batch_size" + ) + parser.add_argument("--epochs", default=600, type=int, metavar="N", help="number of total epochs to run") + parser.add_argument( + "-j", "--workers", default=4, type=int, metavar="N", help="number of data loading workers (default: 4)" #4 + ) + parser.add_argument( + "--lr", + default=0.15, + type=float, + help="initial learning rate, 0.02 is the default value for training on 8 gpus and 2 images_per_gpu", + ) + parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum") + parser.add_argument( + "--wd", + "--weight-decay", + default=0.00004, #1e-4, + type=float, + metavar="W", + help="weight decay (default: 1e-4)", + dest="weight_decay", + ) + parser.add_argument( + "--lr-scheduler", default="cosineannealinglr", type=str, help="name of lr scheduler (default: multisteplr)" #multisteplr + ) + parser.add_argument( + "--lr-step-size", default=8, type=int, help="decrease lr every step-size epochs (multisteplr scheduler only)" + ) + parser.add_argument( + "--lr-steps", + default=[16, 22], + nargs="+", + type=int, + help="decrease lr every step-size epochs (multisteplr scheduler only)", + ) + parser.add_argument( + "--lr-gamma", default=0.1, type=float, help="decrease lr by a factor of lr-gamma (multisteplr scheduler only)" + ) + parser.add_argument("--print-freq", default=1, type=int, help="print frequency") + parser.add_argument("--output-dir", default="./multigpu", type=str, help="path to save outputs") + # parser.add_argument("--resume", default="./ssdlite320_mobilenet_v3_large_coco-a79551df.pth", + # type=str, help="path of checkpoint")#model_128.pth + parser.add_argument("--resume", default="./multigpu/model_599.pth", type=str, help="path of checkpoint") + + parser.add_argument("--start_epoch", default=0, type=int, help="start epoch") + parser.add_argument("--aspect-ratio-group-factor", default=3, type=int) + parser.add_argument("--rpn-score-thresh", default=None, type=float, help="rpn score threshold for faster-rcnn") + parser.add_argument( + "--trainable-backbone-layers", default=None, type=int, help="number of trainable layers of backbone" + ) + parser.add_argument( + "--data-augmentation", default="ssdlite", type=str, help="data augmentation policy (default: hflip)" #hflip + ) + parser.add_argument( + "--sync-bn", + dest="sync_bn", + help="Use sync batch norm", + action="store_true", + ) + parser.add_argument( + "--test-only", + dest="test_only", + help="Only test the model", + action="store_true", + ) + parser.add_argument( + "--pretrained", + dest="pretrained", + help="Use pre-trained models from the modelzoo", + action="store_true", + ) + + # distributed training parameters + parser.add_argument("--world-size", default=20, type=int, help="number of distributed processes") + parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training") + + # Mixed precision training parameters + parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training") + + return parser + + +def main(args): + if args.output_dir: + utils.mkdir(args.output_dir) + + utils.init_distributed_mode(args) + print(args) + + device = torch.device(args.device) + + # Data loading code + print("Loading data") + + dataset, num_classes = get_dataset( + args.dataset, "train", get_transform(True, args.data_augmentation), args.data_path + ) + dataset_test, _ = get_dataset(args.dataset, "val", get_transform(False, args.data_augmentation), args.data_path) + + print("Creating data loaders") + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) + test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) + else: + train_sampler = torch.utils.data.RandomSampler(dataset) + test_sampler = torch.utils.data.SequentialSampler(dataset_test) + + if args.aspect_ratio_group_factor >= 0: + group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor) + train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size) + else: + train_batch_sampler = torch.utils.data.BatchSampler(train_sampler, args.batch_size, drop_last=True) + + data_loader = torch.utils.data.DataLoader( + dataset, batch_sampler=train_batch_sampler, num_workers=args.workers, collate_fn=utils.collate_fn + ) + + data_loader_test = torch.utils.data.DataLoader( + dataset_test, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn + ) + + print("Creating model") + kwargs = {"trainable_backbone_layers": args.trainable_backbone_layers} + + model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(pretrained=False) + + model.to(device) + + params = [p for p in model.parameters() if p.requires_grad] + optimizer = torch.optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) + + model, optimizer = amp.initialize(model, optimizer, opt_level="O1", loss_scale=128.0, combine_grad=True) + + if args.distributed and args.sync_bn: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + + model_without_ddp = model + if args.distributed: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) + model_without_ddp = model.module + + args.lr_scheduler = args.lr_scheduler.lower() + if args.lr_scheduler == "multisteplr": + lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma) + elif args.lr_scheduler == "cosineannealinglr": + lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) + else: + raise RuntimeError( + f"Invalid lr scheduler '{args.lr_scheduler}'. Only MultiStepLR and CosineAnnealingLR are supported." + ) + + if args.resume: + print(args.resume) + checkpoint = torch.load(args.resume, map_location="cpu") + model_without_ddp.load_state_dict(checkpoint["model"]) + optimizer.load_state_dict(checkpoint["optimizer"]) + lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) + args.start_epoch = checkpoint["epoch"] + 1 + amp.load_state_dict(checkpoint['amp']) + + if args.test_only: + evaluate(model_without_ddp, data_loader_test, device=device) + return + + print("Start training",'args.distributed:',args.distributed) + start_time = time.time() + for epoch in range(0, 1): + if args.distributed: + train_sampler.set_epoch(epoch) + + train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq) + lr_scheduler.step() + if args.output_dir: + checkpoint = { + "model": model_without_ddp.state_dict(), + "optimizer": optimizer.state_dict(), + "lr_scheduler": lr_scheduler.state_dict(), + "args": args, + "epoch": epoch, + 'amp': amp.state_dict() + } + utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth")) + utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth")) + + # evaluate after every epoch + evaluate(model, data_loader_test, device=device) + + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print(f"Training time {total_time_str}") + + +if __name__ == "__main__": + args = get_args_parser().parse_args() + main(args) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py b/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py new file mode 100644 index 0000000000..f0683a1a37 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py @@ -0,0 +1,302 @@ +import math +from typing import List, Tuple, Dict, Optional + +import torch +import torchvision +import pdb +import numpy as np +from torch import nn, Tensor +from image_list import ImageList + + +@torch.jit.unused +def _get_shape_onnx(image: Tensor) -> Tensor: + from torch.onnx import operators + + return operators.shape_as_tensor(image)[-2:] + + +@torch.jit.unused +def _fake_cast_onnx(v: Tensor) -> float: + # ONNX requires a tensor but here we fake its type for JIT. + return v + + +def _resize_image_and_masks( + image: Tensor, + self_min_size: float, + self_max_size: float, + target: Optional[Dict[str, Tensor]] = None, + fixed_size: Optional[Tuple[int, int]] = None, +) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if torchvision._is_tracing(): + im_shape = _get_shape_onnx(image) + else: + im_shape = torch.tensor(image.shape[-2:]) + + size: Optional[List[int]] = None + scale_factor: Optional[float] = None + recompute_scale_factor: Optional[bool] = None + if fixed_size is not None: + size = [fixed_size[1], fixed_size[0]] + else: + min_size = torch.min(im_shape).to(dtype=torch.float32) + max_size = torch.max(im_shape).to(dtype=torch.float32) + scale = torch.min(self_min_size / min_size, self_max_size / max_size) + + if torchvision._is_tracing(): + scale_factor = _fake_cast_onnx(scale) + else: + scale_factor = scale.item() + recompute_scale_factor = True + + image = torch.nn.functional.interpolate( + image[None], + size=size, + scale_factor=scale_factor, + mode="bilinear", + recompute_scale_factor=recompute_scale_factor, + align_corners=False, + )[0] + + if target is None: + return image, target + + if "masks" in target: + mask = target["masks"] + mask = torch.nn.functional.interpolate( + mask[:, None].float(), size=size, scale_factor=scale_factor, recompute_scale_factor=recompute_scale_factor + )[:, 0].byte() + target["masks"] = mask + return image, target + + +class GeneralizedRCNNTransform(nn.Module): + """ + Performs input / target transformation before feeding the data to a GeneralizedRCNN + model. + + The transformations it perform are: + - input normalization (mean subtraction and std division) + - input / target resizing to match min_size / max_size + + It returns a ImageList for the inputs, and a List[Dict[Tensor]] for the targets + """ + + def __init__( + self, + device: 'cuda', + min_size: int, + max_size: int, + image_mean: List[float], + image_std: List[float], + size_divisible: int = 32, + fixed_size: Optional[Tuple[int, int]] = None, + ): + super().__init__() + if not isinstance(min_size, (list, tuple)): + min_size = (min_size,) + self.min_size = min_size + self.max_size = max_size + self.image_mean = image_mean + self.image_std = image_std + self.size_divisible = size_divisible + self.fixed_size = fixed_size + self.device = device + + def forward( + self, images: List[Tensor], targets: Optional[List[Dict[str, Tensor]]] = None + ) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]]: + images = [img for img in images] + if targets is not None: + # make a copy of targets to avoid modifying it in-place + # once torchscript supports dict comprehension + # this can be simplified as follows + # targets = [{k: v for k,v in t.items()} for t in targets] + targets_copy: List[Dict[str, Tensor]] = [] + for t in targets: + data: Dict[str, Tensor] = {} + for k, v in t.items(): + data[k] = v + targets_copy.append(data) + targets = targets_copy + for i in range(len(images)): + image = images[i] + target_index = targets[i] if targets is not None else None + + if image.dim() != 3: + raise ValueError(f"images is expected to be a list of 3d tensors of shape [C, H, W], got {image.shape}") + image = self.normalize(image) + image, target_index = self.resize(image, target_index) + + image = image.to(self.device) + images[i] = image + if targets is not None and target_index is not None: + targets[i] = target_index + image_sizes = [img.shape[-2:] for img in images] + images = self.batch_images(images, size_divisible=self.size_divisible) + image_sizes_list: List[Tuple[int, int]] = [] + for image_size in image_sizes: + assert len(image_size) == 2 + image_sizes_list.append((image_size[0], image_size[1])) + + image_list = ImageList(images, image_sizes_list) + return image_list, targets + + def normalize(self, image: Tensor) -> Tensor: + if not image.is_floating_point(): + raise TypeError( + f"Expected input images to be of floating type (in range [0, 1]), " + f"but found type {image.dtype} instead" + ) + dtype, device = image.dtype, image.device + mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device) + std = torch.as_tensor(self.image_std, dtype=dtype, device=device) + return (image - mean[:, None, None]) / std[:, None, None] + + def torch_choice(self, k: List[int]) -> int: + """ + Implements `random.choice` via torch ops so it can be compiled with + TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803 + is fixed. + """ + index = int(torch.empty(1).uniform_(0.0, float(len(k))).item()) + return k[index] + + def resize( + self, + image: Tensor, + target: Optional[Dict[str, Tensor]] = None, + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + h, w = image.shape[-2:] + if self.training: + size = float(self.torch_choice(self.min_size)) + else: + # FIXME assume for now that testing uses the largest scale + size = float(self.min_size[-1]) + image, target = _resize_image_and_masks(image, size, float(self.max_size), target, self.fixed_size) + + if target is None: + return image, target + + # 填充值 + # 固定ground_box数量 + max_boxes = 20 + classes = 0 + target = fix_target(target, max_boxes, classes) + + bbox = target["boxes"] + bbox = resize_boxes(bbox, (h, w), image.shape[-2:]) + target["boxes"] = bbox + + if "keypoints" in target: + keypoints = target["keypoints"] + keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:]) + target["keypoints"] = keypoints + return image, target + + # _onnx_batch_images() is an implementation of + # batch_images() that is supported by ONNX tracing. + @torch.jit.unused + def _onnx_batch_images(self, images: List[Tensor], size_divisible: int = 32) -> Tensor: + max_size = [] + for i in range(images[0].dim()): + max_size_i = torch.max(torch.stack([img.shape[i] for img in images]).to(torch.float32)).to(torch.int64) + max_size.append(max_size_i) + stride = size_divisible + max_size[1] = (torch.ceil((max_size[1].to(torch.float32)) / stride) * stride).to(torch.int64) + max_size[2] = (torch.ceil((max_size[2].to(torch.float32)) / stride) * stride).to(torch.int64) + max_size = tuple(max_size) + + # work around for + # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + # which is not yet supported in onnx + padded_imgs = [] + for img in images: + padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] + padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0])) + padded_imgs.append(padded_img) + + return torch.stack(padded_imgs) + + def max_by_axis(self, the_list: List[List[int]]) -> List[int]: + maxes = the_list[0] + for sublist in the_list[1:]: + for index, item in enumerate(sublist): + maxes[index] = max(maxes[index], item) + return maxes + + def batch_images(self, images: List[Tensor], size_divisible: int = 32) -> Tensor: + if torchvision._is_tracing(): + # batch_images() does not export well to ONNX + # call _onnx_batch_images() instead + return self._onnx_batch_images(images, size_divisible) + + max_size = self.max_by_axis([list(img.shape) for img in images]) + stride = float(size_divisible) + max_size = list(max_size) + max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride) + max_size[2] = int(math.ceil(float(max_size[2]) / stride) * stride) + + batch_shape = [len(images)] + max_size + batched_imgs = images[0].new_full(batch_shape, 0) + for i in range(batched_imgs.shape[0]): + img = images[i] + batched_imgs[i, : img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + + return batched_imgs + +def resize_boxes(boxes: Tensor, original_size: List[int], new_size: List[int]) -> Tensor: + ratios = [ + torch.tensor(s, dtype=torch.float32, device=boxes.device) + / torch.tensor(s_orig, dtype=torch.float32, device=boxes.device) + for s, s_orig in zip(new_size, original_size) + ] + ratio_height, ratio_width = ratios + xmin, ymin, xmax, ymax = boxes.unbind(1) + + xmin = xmin * ratio_width + xmax = xmax * ratio_width + ymin = ymin * ratio_height + ymax = ymax * ratio_height + return torch.stack((xmin, ymin, xmax, ymax), dim=1) + +def fix_target(target, max_boxes, classes): + target_pad = [] + boxes_num = target['boxes'].shape[0] + if boxes_num < max_boxes: + diff_num = max_boxes - boxes_num + # box对齐 + np_boxes = target['boxes'].numpy() + np_boxes = np.concatenate((np_boxes, np.zeros([diff_num, 4])), axis=0) + target['boxes'] = torch.as_tensor(np_boxes,dtype=torch.float) + # label对齐 + padding_label = torch.zeros(diff_num) + classes + np_padding_label = padding_label.long().numpy() + np_labels = target['labels'].numpy() + np_labels = np.concatenate((np_labels, np_padding_label), axis=0) + target['labels'] = torch.from_numpy(np_labels) + # mask对齐 + padding_mask = target['masks'][0].unsqueeze(0) + np_padding_mask = padding_mask.numpy() + np_masks = target['masks'].numpy() + np_masks = np.concatenate((np_masks, np.tile(np_padding_mask, (diff_num, 1, 1))), axis=0) + target['masks'] = torch.from_numpy(np_masks) + # area对齐 + np_area = target['area'].numpy() + np_area = np.concatenate((np_area, np.zeros(diff_num)), axis=0) + target['area'] = torch.as_tensor(np_area, dtype=torch.float) + # iscrowd对齐 + padding_iscrowd = torch.zeros(diff_num).long() + np_padding_iscrowd = padding_iscrowd.numpy() + np_iscrowd = target['iscrowd'].numpy() + np_iscrowd = np.concatenate((np_iscrowd, np_padding_iscrowd), axis=0) + target['iscrowd'] = torch.from_numpy(np_iscrowd) + else: + select_idx = torch.randperm(boxes_num)[:max_boxes] + target['boxes'] = target['boxes'][select_idx] + target['labels'] = target['labels'][select_idx] + target['masks'] = target['masks'][select_idx] + target['area'] = target['area'][select_idx] + target['iscrowd'] = target['iscrowd'][select_idx] + return target diff --git a/PyTorch/contrib/cv/classification/SSDLite320/transforms.py b/PyTorch/contrib/cv/classification/SSDLite320/transforms.py new file mode 100644 index 0000000000..9415efb893 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/transforms.py @@ -0,0 +1,286 @@ +from typing import List, Tuple, Dict, Optional + +import torch +import torchvision +from torch import nn, Tensor +from torchvision.transforms import functional as F +from torchvision.transforms import transforms as T +# from transforms import transforms as T + + + +def _flip_coco_person_keypoints(kps, width): + flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] + flipped_data = kps[:, flip_inds] + flipped_data[..., 0] = width - flipped_data[..., 0] + # Maintain COCO convention that if visibility == 0, then x, y = 0 + inds = flipped_data[..., 2] == 0 + flipped_data[inds] = 0 + return flipped_data + + +class Compose: + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, image, target): + for t in self.transforms: + image, target = t(image, target) + + return image, target + + +class RandomHorizontalFlip(T.RandomHorizontalFlip): + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if torch.rand(1) < self.p: + image = F.hflip(image) + if target is not None: + width, _ = F.get_image_size(image) + target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]] + if "masks" in target: + target["masks"] = target["masks"].flip(-1) + if "keypoints" in target: + keypoints = target["keypoints"] + keypoints = _flip_coco_person_keypoints(keypoints, width) + target["keypoints"] = keypoints + return image, target + + +class ToTensor(nn.Module): + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + image = F.pil_to_tensor(image) + image = F.convert_image_dtype(image) + return image, target + + +class PILToTensor(nn.Module): + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + image = F.pil_to_tensor(image) + return image, target + + +class ConvertImageDtype(nn.Module): + def __init__(self, dtype: torch.dtype) -> None: + super().__init__() + self.dtype = dtype + + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + image = F.convert_image_dtype(image, self.dtype) + return image, target + + +class RandomIoUCrop(nn.Module): + def __init__( + self, + min_scale: float = 0.3, + max_scale: float = 1.0, + min_aspect_ratio: float = 0.5, + max_aspect_ratio: float = 2.0, + sampler_options: Optional[List[float]] = None, + trials: int = 40, + ): + super().__init__() + # Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174 + self.min_scale = min_scale + self.max_scale = max_scale + self.min_aspect_ratio = min_aspect_ratio + self.max_aspect_ratio = max_aspect_ratio + if sampler_options is None: + sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0] + self.options = sampler_options + self.trials = trials + + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if target is None: + raise ValueError("The targets can't be None for this transform.") + + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.") + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + orig_w, orig_h = F.get_image_size(image) + + while True: + # sample an option + idx = int(torch.randint(low=0, high=len(self.options), size=(1,))) + min_jaccard_overlap = self.options[idx] + if min_jaccard_overlap >= 1.0: # a value larger than 1 encodes the leave as-is option + return image, target + + for _ in range(self.trials): + # check the aspect ratio limitations + r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2) + new_w = int(orig_w * r[0]) + new_h = int(orig_h * r[1]) + aspect_ratio = new_w / new_h + if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio): + continue + + # check for 0 area crops + r = torch.rand(2) + left = int((orig_w - new_w) * r[0]) + top = int((orig_h - new_h) * r[1]) + right = left + new_w + bottom = top + new_h + if left == right or top == bottom: + continue + + # check for any valid boxes with centers within the crop area + cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2]) + cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3]) + is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom) + if not is_within_crop_area.any(): + continue + + # check at least 1 box with jaccard limitations + boxes = target["boxes"][is_within_crop_area] + ious = torchvision.ops.boxes.box_iou( + boxes, torch.tensor([[left, top, right, bottom]], dtype=boxes.dtype, device=boxes.device) + ) + if ious.max() < min_jaccard_overlap: + continue + + # keep only valid boxes and perform cropping + target["boxes"] = boxes + target["labels"] = target["labels"][is_within_crop_area] + target["boxes"][:, 0::2] -= left + target["boxes"][:, 1::2] -= top + target["boxes"][:, 0::2].clamp_(min=0, max=new_w) + target["boxes"][:, 1::2].clamp_(min=0, max=new_h) + image = F.crop(image, top, left, new_h, new_w) + + return image, target + + +class RandomZoomOut(nn.Module): + def __init__( + self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5 + ): + super().__init__() + if fill is None: + fill = [0.0, 0.0, 0.0] + self.fill = fill + self.side_range = side_range + if side_range[0] < 1.0 or side_range[0] > side_range[1]: + raise ValueError(f"Invalid canvas side range provided {side_range}.") + self.p = p + + @torch.jit.unused + def _get_fill_value(self, is_pil): + # type: (bool) -> int + # We fake the type to make it work on JIT + return tuple(int(x) for x in self.fill) if is_pil else 0 + + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.") + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + if torch.rand(1) < self.p: + return image, target + + orig_w, orig_h = F.get_image_size(image) + + r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0]) + canvas_width = int(orig_w * r) + canvas_height = int(orig_h * r) + + r = torch.rand(2) + left = int((canvas_width - orig_w) * r[0]) + top = int((canvas_height - orig_h) * r[1]) + right = canvas_width - (left + orig_w) + bottom = canvas_height - (top + orig_h) + + if torch.jit.is_scripting(): + fill = 0 + else: + fill = self._get_fill_value(F._is_pil_image(image)) + + image = F.pad(image, [left, top, right, bottom], fill=fill) + if isinstance(image, torch.Tensor): + v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1) + image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h) :, :] = image[ + ..., :, (left + orig_w) : + ] = v + + if target is not None: + target["boxes"][:, 0::2] += left + target["boxes"][:, 1::2] += top + + return image, target + + +class RandomPhotometricDistort(nn.Module): + def __init__( + self, + contrast: Tuple[float] = (0.5, 1.5), + saturation: Tuple[float] = (0.5, 1.5), + hue: Tuple[float] = (-0.05, 0.05), + brightness: Tuple[float] = (0.875, 1.125), + p: float = 0.5, + ): + super().__init__() + self._brightness = T.ColorJitter(brightness=brightness) + self._contrast = T.ColorJitter(contrast=contrast) + self._hue = T.ColorJitter(hue=hue) + self._saturation = T.ColorJitter(saturation=saturation) + self.p = p + + def forward( + self, image: Tensor, target: Optional[Dict[str, Tensor]] = None + ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: + if isinstance(image, torch.Tensor): + if image.ndimension() not in {2, 3}: + raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.") + elif image.ndimension() == 2: + image = image.unsqueeze(0) + + r = torch.rand(7) + + if r[0] < self.p: + image = self._brightness(image) + + contrast_before = r[1] < 0.5 + if contrast_before: + if r[2] < self.p: + image = self._contrast(image) + + if r[3] < self.p: + image = self._saturation(image) + + if r[4] < self.p: + image = self._hue(image) + + if not contrast_before: + if r[5] < self.p: + image = self._contrast(image) + + if r[6] < self.p: + channels = F.get_image_num_channels(image) + permutation = torch.randperm(channels) + + is_pil = F._is_pil_image(image) + if is_pil: + image = F.pil_to_tensor(image) + image = F.convert_image_dtype(image) + image = image[..., permutation, :, :] + if is_pil: + image = F.to_pil_image(image) + + return image, target diff --git a/PyTorch/contrib/cv/classification/SSDLite320/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/utils.py new file mode 100644 index 0000000000..00bb4bbce1 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/utils.py @@ -0,0 +1,288 @@ +import datetime +import errno +import os +import time +from collections import defaultdict, deque + +import torch +import torch.distributed as dist + + +class SmoothedValue: + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20, fmt=None): + if fmt is None: + fmt = "{median:.4f} ({global_avg:.4f})" + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + self.fmt = fmt + + def update(self, value, n=1): + self.deque.append(value) + self.count += n + self.total += value * n + + def synchronize_between_processes(self): + """ + Warning: does not synchronize the deque! + """ + if not is_dist_avail_and_initialized(): + return + t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda") + dist.barrier() + dist.all_reduce(t) + t = t.tolist() + self.count = int(t[0]) + self.total = t[1] + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque), dtype=torch.float32) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + @property + def max(self): + return max(self.deque) + + @property + def value(self): + return self.deque[-1] + + def __str__(self): + return self.fmt.format( + median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value + ) + + +def all_gather(data): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors) + Args: + data: any picklable object + Returns: + list[data]: list of data gathered from each rank + """ + world_size = get_world_size() + if world_size == 1: + return [data] + data_list = [None] * world_size + dist.all_gather_object(data_list, data) + return data_list + + +def reduce_dict(input_dict, average=True): + """ + Args: + input_dict (dict): all the values will be reduced + average (bool): whether to do average or sum + Reduce the values in the dictionary from all processes so that all processes + have the averaged results. Returns a dict with the same fields as + input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): #torch.inference_mode(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.all_reduce(values) + if average: + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict + + +class MetricLogger: + def __init__(self, delimiter="\t"): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append(f"{name}: {str(meter)}") + return self.delimiter.join(loss_str) + + def synchronize_between_processes(self): + for meter in self.meters.values(): + meter.synchronize_between_processes() + + def add_meter(self, name, meter): + self.meters[name] = meter + + def log_every(self, iterable, print_freq, header=None): + i = 0 + if not header: + header = "" + start_time = time.time() + end = time.time() + iter_time = SmoothedValue(fmt="{avg:.4f}") + data_time = SmoothedValue(fmt="{avg:.4f}") + space_fmt = ":" + str(len(str(len(iterable)))) + "d" + if torch.cuda.is_available(): + log_msg = self.delimiter.join( + [ + header, + "[{0" + space_fmt + "}/{1}]", + "eta: {eta}", + "{meters}", + "time: {time}", + "data: {data}", + "max mem: {memory:.0f}", + ] + ) + else: + log_msg = self.delimiter.join( + [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"] + ) + MB = 1024.0 * 1024.0 + for obj in iterable: + data_time.update(time.time() - end) + yield obj + iter_time.update(time.time() - end) + if i % print_freq == 0 or i == len(iterable) - 1: + eta_seconds = iter_time.global_avg * (len(iterable) - i) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + if torch.cuda.is_available(): + print( + log_msg.format( + i, + len(iterable), + eta=eta_string, + meters=str(self), + time=str(iter_time), + data=str(data_time), + memory=torch.cuda.max_memory_allocated() / MB, + ) + ) + else: + print( + log_msg.format( + i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time) + ) + ) + i += 1 + end = time.time() + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print(f"{header} Total time: {total_time_str} ({total_time / len(iterable):.4f} s / it)") + + +def collate_fn(batch): + return tuple(zip(*batch)) + + +def mkdir(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def setup_for_distributed(is_master): + """ + This function disables printing when not in master process + """ + import builtins as __builtin__ + + builtin_print = __builtin__.print + + def print(*args, **kwargs): + force = kwargs.pop("force", False) + if is_master or force: + builtin_print(*args, **kwargs) + + __builtin__.print = print + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def save_on_master(*args, **kwargs): + if is_main_process(): + torch.save(*args, **kwargs) + + +def init_distributed_mode(args): + if "RANK" in os.environ and "WORLD_SIZE" in os.environ: + args.rank = int(os.environ["RANK"]) + args.world_size = int(os.environ["WORLD_SIZE"]) + args.gpu = int(os.environ["LOCAL_RANK"]) + elif "SLURM_PROCID" in os.environ: + args.rank = int(os.environ["SLURM_PROCID"]) + args.gpu = args.rank % torch.npu.device_count() + else: + print("Not using distributed mode") + args.distributed = False + return + # print("Not using distributed mode") + # args.distributed = False + # return + + args.distributed = True + + torch.npu.set_device(args.gpu) + args.dist_backend = "hccl" + print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True) + torch.distributed.init_process_group( + backend=args.dist_backend, + # init_method=args.dist_url, + world_size=args.world_size, + rank=args.rank + ) + torch.distributed.barrier() + setup_for_distributed(args.rank == 0) -- Gitee From 54f87a70154fe68bb0668d0c9db9c7d204a04e57 Mon Sep 17 00:00:00 2001 From: Qw1kowa <1741235576@qq.com> Date: Mon, 4 Jul 2022 19:38:01 +0800 Subject: [PATCH 2/8] =?UTF-8?q?SSDLite320=E9=A6=96=E6=AC=A1-8p=E7=B2=BE?= =?UTF-8?q?=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../classification/SSDLite320/add_license.py | 50 ++++ .../cv/classification/SSDLite320/coco_eval.py | 33 +++ .../classification/SSDLite320/coco_utils.py | 33 +++ .../cv/classification/SSDLite320/engine.py | 33 +++ .../SSDLite320/extract_ops_by_step.py | 33 +++ .../SSDLite320/group_by_aspect_ratio.py | 33 +++ .../classification/SSDLite320/image_list.py | 33 +++ .../cv/classification/SSDLite320/jenkins.txt | 265 ++++++++++++++++++ .../cv/classification/SSDLite320/presets.py | 33 +++ .../{train_eval_1p.sh => train_eval_8p.sh} | 8 +- .../torchvision/_internally_replaced_utils.py | 33 +++ .../torchvision/datasets/caltech.py | 33 +++ .../SSDLite320/torchvision/datasets/celeba.py | 33 +++ .../SSDLite320/torchvision/datasets/cifar.py | 33 +++ .../SSDLite320/torchvision/datasets/coco.py | 33 +++ .../torchvision/datasets/fakedata.py | 33 +++ .../SSDLite320/torchvision/datasets/flickr.py | 33 +++ .../SSDLite320/torchvision/datasets/folder.py | 33 +++ .../SSDLite320/torchvision/datasets/hmdb51.py | 33 +++ .../torchvision/datasets/imagenet.py | 33 +++ .../torchvision/datasets/kinetics.py | 33 +++ .../SSDLite320/torchvision/datasets/lsun.py | 33 +++ .../SSDLite320/torchvision/datasets/mnist.py | 33 +++ .../torchvision/datasets/omniglot.py | 33 +++ .../torchvision/datasets/phototour.py | 33 +++ .../datasets/samplers/clip_sampler.py | 33 +++ .../SSDLite320/torchvision/datasets/sbd.py | 33 +++ .../SSDLite320/torchvision/datasets/sbu.py | 33 +++ .../torchvision/datasets/semeion.py | 33 +++ .../SSDLite320/torchvision/datasets/stl10.py | 33 +++ .../SSDLite320/torchvision/datasets/svhn.py | 33 +++ .../SSDLite320/torchvision/datasets/ucf101.py | 33 +++ .../SSDLite320/torchvision/datasets/usps.py | 33 +++ .../SSDLite320/torchvision/datasets/utils.py | 33 +++ .../torchvision/datasets/video_utils.py | 33 +++ .../SSDLite320/torchvision/datasets/vision.py | 33 +++ .../SSDLite320/torchvision/datasets/voc.py | 33 +++ .../SSDLite320/torchvision/extension.py | 33 +++ .../SSDLite320/torchvision/io/_video_opt.py | 33 +++ .../SSDLite320/torchvision/io/video.py | 33 +++ .../SSDLite320/torchvision/models/_utils.py | 33 +++ .../torchvision/models/_utils_origin.py | 33 +++ .../SSDLite320/torchvision/models/alexnet.py | 33 +++ .../SSDLite320/torchvision/models/densenet.py | 33 +++ .../torchvision/models/detection/_utils.py | 33 +++ .../models/detection/_utils_origin.py | 33 +++ .../models/detection/anchor_utils.py | 33 +++ .../models/detection/anchor_utils_origin.py | 33 +++ .../models/detection/backbone_utils.py | 33 +++ .../models/detection/backbone_utils_origin.py | 33 +++ .../models/detection/faster_rcnn.py | 33 +++ .../models/detection/generalized_rcnn.py | 33 +++ .../models/detection/image_list.py | 33 +++ .../models/detection/keypoint_rcnn.py | 33 +++ .../torchvision/models/detection/mask_rcnn.py | 33 +++ .../torchvision/models/detection/roi_heads.py | 33 +++ .../torchvision/models/detection/rpn.py | 33 +++ .../torchvision/models/detection/ssd.py | 33 +++ .../models/detection/ssd_origin.py | 33 +++ .../torchvision/models/detection/ssdlite.py | 33 +++ .../torchvision/models/detection/transform.py | 33 +++ .../models/detection/transform_origin.py | 33 +++ .../torchvision/models/googlenet.py | 33 +++ .../torchvision/models/inception.py | 33 +++ .../SSDLite320/torchvision/models/mnasnet.py | 33 +++ .../torchvision/models/mobilenet.py | 33 +++ .../torchvision/models/mobilenet_origin.py | 33 +++ .../torchvision/models/mobilenetv2.py | 33 +++ .../torchvision/models/mobilenetv3.py | 33 +++ .../models/quantization/googlenet.py | 33 +++ .../models/quantization/inception.py | 33 +++ .../models/quantization/mobilenet.py | 33 +++ .../models/quantization/mobilenetv2.py | 33 +++ .../models/quantization/mobilenetv3.py | 33 +++ .../torchvision/models/quantization/resnet.py | 33 +++ .../models/quantization/shufflenetv2.py | 33 +++ .../torchvision/models/quantization/utils.py | 33 +++ .../SSDLite320/torchvision/models/resnet.py | 33 +++ .../torchvision/models/segmentation/_utils.py | 33 +++ .../models/segmentation/deeplabv3.py | 33 +++ .../torchvision/models/segmentation/fcn.py | 33 +++ .../models/segmentation/segmentation.py | 33 +++ .../torchvision/models/shufflenetv2.py | 33 +++ .../torchvision/models/squeezenet.py | 33 +++ .../SSDLite320/torchvision/models/utils.py | 33 +++ .../SSDLite320/torchvision/models/vgg.py | 33 +++ .../torchvision/models/video/resnet.py | 33 +++ .../torchvision/ops/_register_onnx_ops.py | 33 +++ .../SSDLite320/torchvision/ops/_utils.py | 33 +++ .../torchvision/ops/_utils_origin.py | 33 +++ .../SSDLite320/torchvision/ops/boxes.py | 33 +++ .../SSDLite320/torchvision/ops/deform_conv.py | 33 +++ .../ops/feature_pyramid_network.py | 33 +++ .../SSDLite320/torchvision/ops/misc.py | 33 +++ .../SSDLite320/torchvision/ops/misc_origin.py | 33 +++ .../torchvision/ops/new_empty_tensor.py | 33 +++ .../SSDLite320/torchvision/ops/poolers.py | 33 +++ .../torchvision/ops/ps_roi_align.py | 33 +++ .../SSDLite320/torchvision/ops/ps_roi_pool.py | 33 +++ .../SSDLite320/torchvision/ops/roi_align.py | 33 +++ .../SSDLite320/torchvision/ops/roi_pool.py | 33 +++ .../transforms/_functional_video.py | 33 +++ .../transforms/_transforms_video.py | 33 +++ .../torchvision/transforms/functional.py | 33 +++ .../transforms/functional_origin.py | 33 +++ .../torchvision/transforms/functional_pil.py | 33 +++ .../transforms/functional_tensor.py | 33 +++ .../transforms/functional_tensor_origin.py | 33 +++ .../torchvision/transforms/transforms.py | 33 +++ .../transforms/transforms_origin.py | 33 +++ .../SSDLite320/torchvision/utils.py | 33 +++ .../SSDLite320/torchvision/utils_origin.py | 33 +++ .../SSDLite320/torchvision/version.py | 33 +++ .../cv/classification/SSDLite320/train.py | 33 +++ .../SSDLite320/transform_ssd.py | 33 +++ .../classification/SSDLite320/transforms.py | 33 +++ .../cv/classification/SSDLite320/utils.py | 33 +++ 117 files changed, 4081 insertions(+), 4 deletions(-) create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/add_license.py create mode 100644 PyTorch/contrib/cv/classification/SSDLite320/jenkins.txt rename PyTorch/contrib/cv/classification/SSDLite320/test/{train_eval_1p.sh => train_eval_8p.sh} (95%) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/add_license.py b/PyTorch/contrib/cv/classification/SSDLite320/add_license.py new file mode 100644 index 0000000000..f1bdbfc9b3 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/add_license.py @@ -0,0 +1,50 @@ +# 以下不需要修改 +lic = '''# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== +''' +file_list = [] +with open("jenkins.txt", encoding = "utf-8") as f: + for i in f.readlines(): + if 'README.md' in i: + model_name = i.split('/')[4] + print('model_name:', model_name) + + if 'license no exists' in i: + file_list.append(i.split(',')[0].split(model_name + '/')[1]) + +for file in file_list: + print(file) + with open(file, 'r+') as filename: + filetext = filename.read() + filename.seek(0, 0) + filename.write(lic + '\n' + filetext) \ No newline at end of file diff --git a/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py b/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py index ec0709c5d9..54f686e269 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import copy import io from contextlib import redirect_stdout diff --git a/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py index a656602865..01eb6d4a8a 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import copy import os diff --git a/PyTorch/contrib/cv/classification/SSDLite320/engine.py b/PyTorch/contrib/cv/classification/SSDLite320/engine.py index 33f4897eb5..0944340903 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/engine.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/engine.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import math import sys import time diff --git a/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py b/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py index 6a499a143f..c318666467 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import re from collections import defaultdict import argparse diff --git a/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py b/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py index 1323849a6a..f858b994ce 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import bisect import copy import math diff --git a/PyTorch/contrib/cv/classification/SSDLite320/image_list.py b/PyTorch/contrib/cv/classification/SSDLite320/image_list.py index 583866557e..960aefd75d 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/image_list.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/image_list.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from typing import List, Tuple import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/jenkins.txt b/PyTorch/contrib/cv/classification/SSDLite320/jenkins.txt new file mode 100644 index 0000000000..9de6f1b1d4 --- /dev/null +++ b/PyTorch/contrib/cv/classification/SSDLite320/jenkins.txt @@ -0,0 +1,265 @@ +#################################################################### +# Start Modelzoo Network Test.... +#################################################################### +=================Modified files in this PR: ================= +PyTorch/contrib/cv/classification/SSDLite320/README.md +PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py +PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py +PyTorch/contrib/cv/classification/SSDLite320/engine.py +PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py +PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py +PyTorch/contrib/cv/classification/SSDLite320/image_list.py +PyTorch/contrib/cv/classification/SSDLite320/presets.py +PyTorch/contrib/cv/classification/SSDLite320/requirements.txt +PyTorch/contrib/cv/classification/SSDLite320/test/env_npu.sh +PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh +PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh +PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh +PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh +PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh +PyTorch/contrib/cv/classification/SSDLite320/torchvision/__init__.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/__init__.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cityscapes.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/__init__.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/__init__.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/__init__.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/__init__.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/__init__.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/__init__.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/__init__.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/__init__.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/__init__.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py +PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py +PyTorch/contrib/cv/classification/SSDLite320/train.py +PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py +PyTorch/contrib/cv/classification/SSDLite320/transforms.py +PyTorch/contrib/cv/classification/SSDLite320/utils.py +=================Start to Check Type of File ================= +=================Start to Check License ================= +PyTorch/contrib/cv/classification/SSDLite320/coco_eval.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/coco_utils.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/engine.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/extract_ops_by_step.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/group_by_aspect_ratio.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/image_list.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/presets.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/train.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/transforms.py,The keyword license no exists in the file,please check and add it! +PyTorch/contrib/cv/classification/SSDLite320/utils.py,The keyword license no exists in the file,please check and add it! +License check failed, Please follow the guide to add License: +https://gitee.com/ascend/modelzoo/blob/master/contrib/CONTRIBUTING.md +=================Start to Check Size of File ================= +=================Start to Check Junk file ================= +=================Start to Check file of First Directory ================= +=================Start to Check Internal Link ================= +=================Start to Check Sensitive Information ================= +=================Start to Check Modelzoo Level ================= +=================Start to Check File&Keywords of Test Directory ================= +=================Start to Check core_binding&Device Id status ================= +check fail diff --git a/PyTorch/contrib/cv/classification/SSDLite320/presets.py b/PyTorch/contrib/cv/classification/SSDLite320/presets.py index 88d8c697d2..b15ae0069f 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/presets.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/presets.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch import transforms as T diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh similarity index 95% rename from PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh rename to PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh index 159f319279..03ad5ec6ce 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_1p.sh +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh @@ -1,7 +1,7 @@ ################基础配置参数,需要模型审视修改################## Network="SSDLite320" # 训练使用的npu卡数 -export RANK_SIZE=1 +export RANK_SIZE=8 # 数据集路径,保持为空,不需要修改 data_path="/opt/npu/dataset/coco/" batch_size=24 @@ -56,7 +56,7 @@ python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.p --epochs 600\ --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\ --lr 0.15 --batch-size 24 --test-only\ - --weight-decay 0.00004 --data-augmentation ssdlite > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_1p.log 2>&1 & + --weight-decay 0.00004 --data-augmentation ssdlite > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log 2>&1 & wait @@ -68,13 +68,13 @@ e2e_time=$(( $end_time - $start_time )) # 结果打印,不需要修改 echo "------------------ Final result ------------------" # 输出性能FPS,需要模型审视修改 -step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}' +step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}' FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'` # 打印,不需要修改 echo "Final Performance images/sec : $FPS" # 输出训练精度,需要模型审视修改 -train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}' +train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}' # 打印,不需要修改 echo "Final Train Accuracy : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py index d147997b0b..20bc303762 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/_internally_replaced_utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import importlib.machinery import os diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py index 09ec1c3d7f..f1970eabe6 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/caltech.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from PIL import Image import os import os.path diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py index 71af65ed11..10c91ebf3a 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/celeba.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from functools import partial import torch import os diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py index 127c085cfb..033ecd038c 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/cifar.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from PIL import Image import os import os.path diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py index 9dd3c7adf8..2417b970d2 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/coco.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from .vision import VisionDataset from PIL import Image import os diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py index f079c1a92d..3dfa957ac4 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/fakedata.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch from .vision import VisionDataset from .. import transforms diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py index 77cd430705..00f98166f3 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/flickr.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from collections import defaultdict from PIL import Image from html.parser import HTMLParser diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py index 16d092b716..02c87cfc84 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/folder.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from .vision import VisionDataset from PIL import Image diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py index 3b826bfa9a..044bc8be9e 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/hmdb51.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import glob import os diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py index a45ff3cd44..2f72916c55 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/imagenet.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import warnings from contextlib import contextmanager import os diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py index 07db91cc19..75253b3818 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/kinetics.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from .utils import list_dir from .folder import make_dataset from .video_utils import VideoClips diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py index fc67f8f024..33abf6af51 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/lsun.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from .vision import VisionDataset from PIL import Image import os diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py index 74bc0c16aa..931d50bfe8 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/mnist.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from .vision import VisionDataset import warnings from PIL import Image diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py index dd86128488..4849e8bc6f 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/omniglot.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from PIL import Image from os.path import join import os diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py index 47591e3db8..13ab9645d1 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/phototour.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import os import numpy as np from PIL import Image diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py index 2432a6d20d..71cfcd34dc 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/samplers/clip_sampler.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import math import torch from torch.utils.data import Sampler diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py index c4713f7257..60a3c1f801 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbd.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import os import shutil from .vision import VisionDataset diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py index 70cb68344b..5e70cfaa08 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/sbu.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from PIL import Image from .utils import download_url, check_integrity diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py index 12c92c4a35..096a32c021 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/semeion.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from PIL import Image import os import os.path diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py index 6bec45afe2..c0e224ab6b 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/stl10.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from PIL import Image import os import os.path diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py index d96d0f3f43..2e702323dc 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/svhn.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from .vision import VisionDataset from PIL import Image import os diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py index 43d8124bd4..646c2bba77 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/ucf101.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import glob import os diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py index 06f1fd0596..f71278b9c1 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/usps.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from PIL import Image import os import numpy as np diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py index 6689eef649..99ac7f6d93 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import os import os.path import hashlib diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py index 5c9244e545..7351b8fd5c 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/video_utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import bisect import math from fractions import Fraction diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py index 7ee5a84dfc..654cc594cb 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/vision.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import os import torch import torch.utils.data as data diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py index 2be53c4fcc..8eca53904e 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/datasets/voc.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import os import tarfile import collections diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py index db3356aa67..35f844dae4 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/extension.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + _HAS_OPS = False diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py index da37c66cfa..b0879e66cc 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/_video_opt.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import importlib import math diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py index 40d1cfeed8..eb6f76a895 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/io/video.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import gc import math import re diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py index f4e1cd8450..36b321d7e4 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from collections import OrderedDict from typing import Dict, Optional diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py index 291041d7b5..d4f91ad495 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/_utils_origin.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from collections import OrderedDict import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py index a0126312d1..4ecc35fd9e 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/alexnet.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch import torch.nn as nn from .utils import load_state_dict_from_url diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py index 822dde0925..efb484b239 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/densenet.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import re import torch import torch.nn as nn diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py index ce70d93be0..464944d009 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import math from collections import OrderedDict from typing import List, Tuple diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py index c48576328d..eaf9df1144 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/_utils_origin.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import math import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py index 1d6298eabe..52df4ec852 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import math from typing import List, Optional diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py index bac7cb6c74..63323b23e7 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/anchor_utils_origin.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import math from typing import List, Optional diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py index 54fdc4c05c..9c59d10756 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import warnings from typing import Callable, Dict, Optional, List, Union diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py index f5335c451d..483804669b 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/backbone_utils_origin.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from collections import OrderedDict from torch import nn from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py index 92366352b9..74fa7708f0 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/faster_rcnn.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from collections import OrderedDict import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py index 50a25fb4f9..7d570e0903 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/generalized_rcnn.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. """ Implements the Generalized R-CNN framework diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py index 583866557e..960aefd75d 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/image_list.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from typing import List, Tuple import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py index aeee558ca2..469ff0ca89 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/keypoint_rcnn.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch from torch import nn diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py index a8a980fa3c..b188fceb56 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/mask_rcnn.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from collections import OrderedDict import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py index fd1334fbc2..678d2be236 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/roi_heads.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch import torchvision diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py index 381bc77084..dd1ae5bbdb 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/rpn.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. import torch from torch.nn import functional as F diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py index 91a5ae5cda..47d102ced3 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import warnings from collections import OrderedDict from typing import Any, Dict, List, Optional, Tuple diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py index be30bb54c4..69ced9e7dd 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssd_origin.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import warnings from collections import OrderedDict from typing import Any, Dict, List, Optional, Tuple diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py index 652d3afe4d..76baeced45 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/ssdlite.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import warnings from collections import OrderedDict from functools import partial diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py index af9d13414d..9922ebd87e 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import math from typing import List, Tuple, Dict, Optional diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py index ffbe2279b7..d345738091 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/detection/transform_origin.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import random import math import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py index 4b1cb28003..77233d0ffd 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/googlenet.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import warnings from collections import namedtuple import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py index e4c5430c31..0e739bab41 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/inception.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from collections import namedtuple import warnings import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py index 59677427f1..70635ec63d 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mnasnet.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import math import warnings diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py index 4108305d3f..a272fdc740 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from .mobilenetv2 import MobileNetV2, mobilenet_v2, __all__ as mv2_all from .mobilenetv3 import MobileNetV3, mobilenet_v3_large, mobilenet_v3_small, __all__ as mv3_all diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py index e4c3069a60..e33cc5dd29 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenet_origin.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from torch import nn from .utils import load_state_dict_from_url diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py index 1a470953df..90023f33e2 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv2.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import warnings from typing import Callable, Any, Optional, List diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py index e6a2bbbfbe..fa94b3256e 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/mobilenetv3.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import warnings from functools import partial from typing import Any, Callable, List, Optional, Sequence diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py index d01534bc70..22d9065d54 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/googlenet.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import warnings import torch import torch.nn as nn diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py index f452de0281..76409c0d64 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/inception.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import warnings from collections import namedtuple diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py index 8f2c42db64..14ac30395d 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenet.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from .mobilenetv2 import QuantizableMobileNetV2, mobilenet_v2, __all__ as mv2_all from .mobilenetv3 import QuantizableMobileNetV3, mobilenet_v3_large, __all__ as mv3_all diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py index faa63e73be..6d7e33c366 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv2.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from typing import Any from torch import Tensor diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py index 948b72ead7..91c3232232 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/mobilenetv3.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from typing import Any, List, Optional import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py index 5fd3c03929..0ced2fc858 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/resnet.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch from torchvision.models.resnet import Bottleneck, BasicBlock, ResNet, model_urls import torch.nn as nn diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py index a2030ca5ec..58e3cfd915 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/shufflenetv2.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch import torch.nn as nn from torchvision.models.utils import load_state_dict_from_url diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py index bf23c9a933..df49a023c3 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/quantization/utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch from torch import nn diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py index 797f459f5c..ff355c68df 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/resnet.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch import torch.nn as nn from .utils import load_state_dict_from_url diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py index c5a7ae99e4..9d79016c8d 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/_utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from collections import OrderedDict import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py index ae652cd7d2..8721897082 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/deeplabv3.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch from torch import nn from torch.nn import functional as F diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py index 4d7701cc4e..d55085c26a 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/fcn.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from torch import nn from ._utils import _SimpleSegmentationModel diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py index 15df4d8ae3..49a5a21900 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/segmentation/segmentation.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from .._utils import IntermediateLayerGetter from ..utils import load_state_dict_from_url from .. import resnet diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py index 14f9521886..31091b58bf 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/shufflenetv2.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch import torch.nn as nn from .utils import load_state_dict_from_url diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py index 964f3ec66d..1ce19a5334 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/squeezenet.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch import torch.nn as nn import torch.nn.init as init diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py index 638ef07cd8..8452307d73 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + try: from torch.hub import load_state_dict_from_url except ImportError: diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py index dba534f651..ac4b6f7ec8 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/vgg.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch import torch.nn as nn from .utils import load_state_dict_from_url diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py index a9e59a149c..ed3e394379 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/models/video/resnet.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch import torch.nn as nn diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py index d9d9c5c094..a3a0cf4d29 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_register_onnx_ops.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import sys import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py index 3a07c747f5..672f2aab4a 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from typing import List, Optional, Tuple, Union import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py index 714022f042..fcc0613017 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/_utils_origin.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch from torch import Tensor from torch.jit.annotations import List diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py index ac0dba1fe7..0239781515 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/boxes.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch from torch.jit.annotations import Tuple from torch import Tensor diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py index c948b16419..56995bc2ba 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/deform_conv.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import math import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py index 09e79cc7ef..9ab0db7994 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/feature_pyramid_network.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from collections import OrderedDict import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py index caf0d999f7..b35a940fb4 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + """ helper class that supports empty tensors on some nn functions. diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py index 65e150700a..ae80603532 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/misc_origin.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from collections import OrderedDict from torch.jit.annotations import Optional, List from torch import Tensor diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py index 74455a98c4..01a0e15300 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/new_empty_tensor.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch from torch.jit.annotations import List from torch import Tensor diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py index b94a9eb405..2d529d8b72 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/poolers.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. import torch import torch.nn.functional as F diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py index c0c761b72c..03e70a3bbc 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_align.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch from torch import nn, Tensor diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py index 710f2cb019..dbdda1ceb8 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/ps_roi_pool.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch from torch import nn, Tensor diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py index 14224d8a83..bd126efac9 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_align.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch from torch import nn, Tensor diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py index 10232f16b4..9c53abf2d8 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/ops/roi_pool.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch from torch import nn, Tensor diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py index 06c3071690..84a9babb57 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_functional_video.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py index aa1a4b0531..f43db9a696 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/_transforms_video.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + #!/usr/bin/env python3 import numbers diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py index bd5b170626..d866860a90 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import math import numbers import warnings diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py index 7ce1fb6ab3..8b2728c85a 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_origin.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch import math from PIL import Image, ImageOps, ImageEnhance, __version__ as PILLOW_VERSION diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py index fdaf5f7de1..92c396d174 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_pil.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import numbers from typing import Any, Dict, List, Optional, Sequence, Tuple, Union diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py index 09ae726931..af0d34038d 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import warnings from typing import Optional, Tuple, List diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py index b81deed6d4..8672ebe10b 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/functional_tensor_origin.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch import torchvision.transforms.functional as F from torch import Tensor diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py index a409ff3cbb..2450e32509 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import math import numbers import random diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py index 10783c8e53..0d90426edd 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/transforms/transforms_origin.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch import math import random diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py index 399dc3fcc5..f11b6b7459 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import math import pathlib import warnings diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py index 1a773b3fd2..e8edd62e3a 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/utils_origin.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import torch import math irange = range diff --git a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py index 146fc171ca..2a0e78d5c9 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/torchvision/version.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + __version__ = '0.6.0' git_version = '82fd1c85d7e42d93255ed01f763ca40d58f288e3' from torchvision.extension import _check_cuda_version diff --git a/PyTorch/contrib/cv/classification/SSDLite320/train.py b/PyTorch/contrib/cv/classification/SSDLite320/train.py index ba2de2a72d..34b1b5c74b 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/train.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/train.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + r"""PyTorch Detection Training. To run in a multi-gpu environment, use the distributed launcher:: diff --git a/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py b/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py index f0683a1a37..0091d84ab5 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/transform_ssd.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import math from typing import List, Tuple, Dict, Optional diff --git a/PyTorch/contrib/cv/classification/SSDLite320/transforms.py b/PyTorch/contrib/cv/classification/SSDLite320/transforms.py index 9415efb893..2cb72322d8 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/transforms.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/transforms.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + from typing import List, Tuple, Dict, Optional import torch diff --git a/PyTorch/contrib/cv/classification/SSDLite320/utils.py b/PyTorch/contrib/cv/classification/SSDLite320/utils.py index 00bb4bbce1..0b32356460 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/utils.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/utils.py @@ -1,3 +1,36 @@ +# -*- coding: utf-8 -*- +# BSD 3-Clause License +# +# Copyright (c) 2017 +# All rights reserved. +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ========================================================================== + import datetime import errno import os -- Gitee From 2f51c6bc3aa0347c1c9df6481589d6226a67613b Mon Sep 17 00:00:00 2001 From: Qw1kowa <1741235576@qq.com> Date: Mon, 4 Jul 2022 19:41:31 +0800 Subject: [PATCH 3/8] =?UTF-8?q?SSDLite320=E9=A6=96=E6=AC=A1=E6=8F=90?= =?UTF-8?q?=E4=BA=A4&=E5=AE=8C=E5=96=848p=E7=B2=BE=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../contrib/cv/classification/SSDLite320/README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/README.md b/PyTorch/contrib/cv/classification/SSDLite320/README.md index b53891e1c8..b68150270b 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/README.md +++ b/PyTorch/contrib/cv/classification/SSDLite320/README.md @@ -35,7 +35,7 @@ bash test/train_full_8p.sh --data_path=/opt/npu/dataset/coco/ # 1p train full bash test/train_full_1p.sh --data_path=/opt/npu/dataset/coco/ -# 1p eval +# 8p eval bash test/train_eval_8p.sh --data_path=/opt/npu/dataset/coco/ --model_path=/eval_model.pth ``` @@ -43,19 +43,19 @@ bash test/train_eval_8p.sh --data_path=/opt/npu/dataset/coco/ --model_path=/eval Log Path: -- train_perf_1p.log # 1p 训练下性能测试日志 -- train_perf_8p.log # 8p 训练下性能测试日志 +- train_perf_1p.log # 1p 训练下性能测试日志 +- train_perf_8p.log # 8p 训练下性能测试日志 - train_full_1p.log # 1p 完整训练下性能和精度测试日志 - train_full_8p.log # 8p 完整训练下性能和精度测试日志 -- train_eval_1p.log # 1p 测试模型验证集精度日志 +- train_eval_1p.log # 8p 测试模型验证集精度日志 ## SSDlite 训练结果 | top1 acc | FPS | Epochs | AMP_Type | Device | | :------: | :---: | :----: | :------: | :----: | | - | 10.8 | 1 | O1 | 1p Npu | -| ? | 100.8 | 660 | O1 | 8p Npu | -| - | 54.7 | 1 | - | 1p Gpu | -| 20.4 | 387.2 | 660 | - | 8p Gpu | +| 20.5 | 100.8 | 600 | O1 | 8p Npu | +| - | 54.7 | 1 | O1 | 1p Gpu | +| 20.4 | 387.2 | 600 | O1 | 8p Gpu | 注:源仓库模型测试为 21.3 (660 epochs) \ No newline at end of file -- Gitee From a7b76024f2e791cc44c0887ea2ef3a0eb7a1fd0c Mon Sep 17 00:00:00 2001 From: qingy735 <2429841116@qq.com> Date: Tue, 9 Aug 2022 20:58:47 +0800 Subject: [PATCH 4/8] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=B8=AA=E5=88=AB?= =?UTF-8?q?=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cv/classification/SSDLite320/test/train_eval_8p.sh | 4 ++-- .../cv/classification/SSDLite320/test/train_full_1p.sh | 4 ++-- .../cv/classification/SSDLite320/test/train_full_8p.sh | 4 ++-- .../classification/SSDLite320/test/train_performance_1p.sh | 4 ++-- .../classification/SSDLite320/test/train_performance_8p.sh | 4 ++-- PyTorch/contrib/cv/classification/SSDLite320/train.py | 5 +++-- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh index 03ad5ec6ce..b966ae9e24 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh @@ -68,13 +68,13 @@ e2e_time=$(( $end_time - $start_time )) # 结果打印,不需要修改 echo "------------------ Final result ------------------" # 输出性能FPS,需要模型审视修改 -step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}' +step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'` FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'` # 打印,不需要修改 echo "Final Performance images/sec : $FPS" # 输出训练精度,需要模型审视修改 -train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}' +train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'` # 打印,不需要修改 echo "Final Train Accuracy : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh index 7dafc83788..c2605ca73c 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_1p.sh @@ -66,13 +66,13 @@ e2e_time=$(( $end_time - $start_time )) # 结果打印,不需要修改 echo "------------------ Final result ------------------" # 输出性能FPS,需要模型审视修改 -step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}' +step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'` FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'` # 打印,不需要修改 echo "Final Performance images/sec : $FPS" # 输出训练精度,需要模型审视修改 -train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}' +train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'` # 打印,不需要修改 echo "Final Train Accuracy : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh index 2dc85c74a4..c421a3cc63 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_full_8p.sh @@ -66,13 +66,13 @@ e2e_time=$(( $end_time - $start_time )) # 结果打印,不需要修改 echo "------------------ Final result ------------------" # 输出性能FPS,需要模型审视修改 -step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}' +step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'` FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*8/'${step_time}'}'` # 打印,不需要修改 echo "Final Performance images/sec : $FPS" # 输出训练精度,需要模型审视修改 -train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}' +train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_full_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'` # 打印,不需要修改 echo "Final Train Accuracy : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh index 35d9a5c924..44612007f4 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh @@ -66,13 +66,13 @@ e2e_time=$(( $end_time - $start_time )) # 结果打印,不需要修改 echo "------------------ Final result ------------------" # 输出性能FPS,需要模型审视修改 -step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}' +step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'` FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'` # 打印,不需要修改 echo "Final Performance images/sec : $FPS" # 输出训练精度,需要模型审视修改 -train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}' +train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'` # 打印,不需要修改 echo "Final Train Accuracy : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh index 90656c3937..0b495404ae 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh @@ -66,13 +66,13 @@ e2e_time=$(( $end_time - $start_time )) # 结果打印,不需要修改 echo "------------------ Final result ------------------" # 输出性能FPS,需要模型审视修改 -step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}' +step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'` FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'` # 打印,不需要修改 echo "Final Performance images/sec : $FPS" # 输出训练精度,需要模型审视修改 -train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}' +train_accuracy=`grep 'Average Precision' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | grep 'IoU=0.50:0.95' | grep ' all' | awk -F ' ' '{print $13}' | awk 'END {print}'` # 打印,不需要修改 echo "Final Train Accuracy : ${train_accuracy}" echo "E2E Training Duration sec : $e2e_time" diff --git a/PyTorch/contrib/cv/classification/SSDLite320/train.py b/PyTorch/contrib/cv/classification/SSDLite320/train.py index 34b1b5c74b..51d90e1407 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/train.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/train.py @@ -155,11 +155,12 @@ def get_args_parser(add_help=True): parser.add_argument( "--lr-gamma", default=0.1, type=float, help="decrease lr by a factor of lr-gamma (multisteplr scheduler only)" ) - parser.add_argument("--print-freq", default=1, type=int, help="print frequency") + parser.add_argument("--print-freq", default=20, type=int, help="print frequency") parser.add_argument("--output-dir", default="./multigpu", type=str, help="path to save outputs") # parser.add_argument("--resume", default="./ssdlite320_mobilenet_v3_large_coco-a79551df.pth", # type=str, help="path of checkpoint")#model_128.pth - parser.add_argument("--resume", default="./multigpu/model_599.pth", type=str, help="path of checkpoint") + parser.add_argument("--resume", default="", type=str, help="path of checkpoint") + # ./multigpu/model_599.pth parser.add_argument("--start_epoch", default=0, type=int, help="start epoch") parser.add_argument("--aspect-ratio-group-factor", default=3, type=int) -- Gitee From 0c6c3d8ec5641576b84c71458a568080d72253c2 Mon Sep 17 00:00:00 2001 From: qingy735 <2429841116@qq.com> Date: Wed, 10 Aug 2022 00:47:08 +0800 Subject: [PATCH 5/8] =?UTF-8?q?SSDLite320=20=E5=BE=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PyTorch/contrib/cv/classification/SSDLite320/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/train.py b/PyTorch/contrib/cv/classification/SSDLite320/train.py index 51d90e1407..7fc71d633e 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/train.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/train.py @@ -284,7 +284,7 @@ def main(args): print("Start training",'args.distributed:',args.distributed) start_time = time.time() - for epoch in range(0, 1): + for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) -- Gitee From c02cbc0db17d44761032553659eb8887bfc22cb8 Mon Sep 17 00:00:00 2001 From: qingy735 <2429841116@qq.com> Date: Wed, 10 Aug 2022 13:43:53 +0800 Subject: [PATCH 6/8] =?UTF-8?q?SSDLite320=20=E5=BE=AE=E8=B0=83=E8=AE=AD?= =?UTF-8?q?=E7=BB=83=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../SSDLite320/test/train_performance_1p.sh | 2 +- .../SSDLite320/test/train_performance_8p.sh | 2 +- .../cv/classification/SSDLite320/train.py | 64 +++++++++++++------ 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh index 44612007f4..4e473dbc4f 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_1p.sh @@ -53,7 +53,7 @@ python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.p --model ssdlite320_mobilenet_v3_large\ --epochs 2\ --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\ - --lr 0.15 --batch-size 24\ + --lr 0.15 --batch-size 24 --train_only\ --weight-decay 0.00004 --data-augmentation ssdlite > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_1p.log 2>&1 & wait diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh index 0b495404ae..03758c34ac 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh @@ -53,7 +53,7 @@ python -m torch.distributed.launch --nproc_per_node=$RANK_SIZE --use_env train.p --model ssdlite320_mobilenet_v3_large\ --epochs 2\ --aspect-ratio-group-factor 3 --lr-scheduler cosineannealinglr\ - --lr 0.15 --batch-size 24\ + --lr 0.15 --batch-size 24 --train_only\ --weight-decay 0.00004 --data-augmentation ssdlite --world-size 8 > ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log 2>&1 & wait diff --git a/PyTorch/contrib/cv/classification/SSDLite320/train.py b/PyTorch/contrib/cv/classification/SSDLite320/train.py index 7fc71d633e..e4e3294820 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/train.py +++ b/PyTorch/contrib/cv/classification/SSDLite320/train.py @@ -183,6 +183,12 @@ def get_args_parser(add_help=True): help="Only test the model", action="store_true", ) + parser.add_argument( + "--train-only", + dest="train_only", + help="Only train the model", + action="store_true", + ) parser.add_argument( "--pretrained", dest="pretrained", @@ -284,26 +290,46 @@ def main(args): print("Start training",'args.distributed:',args.distributed) start_time = time.time() - for epoch in range(args.start_epoch, args.epochs): - if args.distributed: - train_sampler.set_epoch(epoch) + + if args.train_only: + for epoch in range(0, 1): + if args.distributed: + train_sampler.set_epoch(epoch) + + train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq) + lr_scheduler.step() + if args.output_dir: + checkpoint = { + "model": model_without_ddp.state_dict(), + "optimizer": optimizer.state_dict(), + "lr_scheduler": lr_scheduler.state_dict(), + "args": args, + "epoch": epoch, + 'amp': amp.state_dict() + } + utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth")) + utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth")) + else: + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) - train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq) - lr_scheduler.step() - if args.output_dir: - checkpoint = { - "model": model_without_ddp.state_dict(), - "optimizer": optimizer.state_dict(), - "lr_scheduler": lr_scheduler.state_dict(), - "args": args, - "epoch": epoch, - 'amp': amp.state_dict() - } - utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth")) - utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth")) - - # evaluate after every epoch - evaluate(model, data_loader_test, device=device) + train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq) + lr_scheduler.step() + if args.output_dir: + checkpoint = { + "model": model_without_ddp.state_dict(), + "optimizer": optimizer.state_dict(), + "lr_scheduler": lr_scheduler.state_dict(), + "args": args, + "epoch": epoch, + 'amp': amp.state_dict() + } + utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth")) + utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth")) + + # evaluate after every epoch + evaluate(model, data_loader_test, device=device) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) -- Gitee From f08199bcb95ef5962d0f739a1baf6f1cf3b43be8 Mon Sep 17 00:00:00 2001 From: qingy735 <2429841116@qq.com> Date: Fri, 19 Aug 2022 23:52:29 +0800 Subject: [PATCH 7/8] =?UTF-8?q?=E4=BF=AE=E6=94=B98pFPS=E8=AE=A1=E7=AE=97?= =?UTF-8?q?=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../contrib/cv/classification/SSDLite320/test/train_eval_8p.sh | 2 +- .../cv/classification/SSDLite320/test/train_performance_8p.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh index b966ae9e24..55618eae01 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_eval_8p.sh @@ -69,7 +69,7 @@ e2e_time=$(( $end_time - $start_time )) echo "------------------ Final result ------------------" # 输出性能FPS,需要模型审视修改 step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_eval_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'` -FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*8/'${step_time}'}'` # 打印,不需要修改 echo "Final Performance images/sec : $FPS" diff --git a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh index 03758c34ac..18bacd4a80 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh +++ b/PyTorch/contrib/cv/classification/SSDLite320/test/train_performance_8p.sh @@ -67,7 +67,7 @@ e2e_time=$(( $end_time - $start_time )) echo "------------------ Final result ------------------" # 输出性能FPS,需要模型审视修改 step_time=`grep 'Total time' ${test_path_dir}/output/${ASCEND_DEVICE_ID}/train_perf_8p.log | awk -F ' ' '{print $6}'| cut -d '(' -f2 | awk 'END {print}'` -FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'/'${step_time}'}'` +FPS=`awk 'BEGIN{printf "%.2f\n", '${batch_size}'*8/'${step_time}'}'` # 打印,不需要修改 echo "Final Performance images/sec : $FPS" -- Gitee From 82b7a727e5e9c247dd36605c873f0e9832bb61f4 Mon Sep 17 00:00:00 2001 From: qingy735 <2429841116@qq.com> Date: Mon, 26 Dec 2022 11:37:59 +0000 Subject: [PATCH 8/8] update PyTorch/contrib/cv/classification/SSDLite320/README.md. Signed-off-by: qingy735 <2429841116@qq.com> --- .../cv/classification/SSDLite320/README.md | 47 +++++++++++++++++-- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/PyTorch/contrib/cv/classification/SSDLite320/README.md b/PyTorch/contrib/cv/classification/SSDLite320/README.md index b68150270b..8632248e19 100644 --- a/PyTorch/contrib/cv/classification/SSDLite320/README.md +++ b/PyTorch/contrib/cv/classification/SSDLite320/README.md @@ -1,8 +1,29 @@ # SSDLite320 -SSDlite320 模型在 COCO2017 数据集上的实现,主要修改自 [pytorch/vision/tree/main/references/detection]([github.com](https://github.com/pytorch/vision/tree/main/references/detection)) 源码 +# 概述 -## 环境准备 +## 简述 +SSDLite320 + +- 参考实现: + + ``` + url=https://github.com/pytorch/vision/tree/main/references/detection + ``` + +- 通过Git获取代码方法如下: + + ``` + git clone {url} # 克隆仓库的代码 + cd {code_path} # 切换到模型代码所在路径,若仓库下只有该模型,则无需切换 + ``` + +- 通过单击“立即下载”,下载源码包。 + + +# 准备训练环境 + +## 准备环境 - 安装 Pytorch 和混合精度训练工具 Apex - 安装依赖 `pip install -r requirements.txt` @@ -18,7 +39,13 @@ cp -rf torchvision ~/archiconda3/envs/xxx/lib/python3.7/site-packages/torchvisio 注:`~/archiconda3/envs/xxx/lib/python3.7/site-packages`为`xxx`环境下模块下载地址 -## 训练 +## 准备数据集 +该训练采用COCO2017数据集进行训练 + + +# 开始训练 + +## 训练模型 训练阶段,脚本调用 `train.py` 进行训练 @@ -49,6 +76,7 @@ Log Path: - train_full_8p.log # 8p 完整训练下性能和精度测试日志 - train_eval_1p.log # 8p 测试模型验证集精度日志 +# 训练结果展示 ## SSDlite 训练结果 | top1 acc | FPS | Epochs | AMP_Type | Device | @@ -58,4 +86,15 @@ Log Path: | - | 54.7 | 1 | O1 | 1p Gpu | | 20.4 | 387.2 | 600 | O1 | 8p Gpu | -注:源仓库模型测试为 21.3 (660 epochs) \ No newline at end of file +注:源仓库模型测试为 21.3 (660 epochs) + +# 版本说明 + +## 变更 + +2022.08.19:首次发布 +2011.12.26:更改模板 + +## 已知问题 + +无。 \ No newline at end of file -- Gitee