diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/LICENSE b/PyTorch/contrib/cv/semantic_segmentation/DPT/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..17e19b8d4874a7c5b1541ccaa4e217cb312d2d65
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/LICENSE
@@ -0,0 +1,203 @@
+Copyright (c) OpenMMLab. All rights reserved
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2021 Huawei Technologies Co., Ltd
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/README.md b/PyTorch/contrib/cv/semantic_segmentation/DPT/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fda23c2ec773a23373c15d3439cd6bbb023a8c07
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/README.md
@@ -0,0 +1,200 @@
+# DPT for PyTorch
+
+- [概述](概述.md)
+- [准备训练环境](准备训练环境.md)
+- [开始训练](开始训练.md)
+- [训练结果展示](训练结果展示.md)
+- [版本说明](版本说明.md)
+
+
+
+# 概述
+
+## 简述
+
+DPT是一种密集的预测体系结构,它基于编码器-解码器的设计,利用transformer作为编码器的基本计算构建块。使用 ViT 作为 encoder 结构,把原图切分为不重叠的 token,然后使用 MHSA 获得这些经过编码的 token 之间的 attention。transformer 处理后,token 的数量是不变的,且它们之间的 attention 是一对一的,每个 token 都可以获得和其他 token 的关系,能够获得全局感受野下的特征,空间分辨率也不会改变。
+
+
+- 参考实现:
+
+ ```
+ url=https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dpt
+ ```
+
+- 适配昇腾 AI 处理器的实现:
+
+ ```
+ url=https://gitee.com/ascend/ModelZoo-PyTorch.git
+ code_path=PyTorch/contrib/cv/classification
+ ```
+
+- 通过Git获取代码方法如下:
+
+ ```
+ git clone {https://github.com/open-mmlab/mmsegmentation} # 克隆仓库的代码
+ cd {mmsegmentation} # 切换到/mmsegmentaion目录下
+ ```
+
+- 通过单击“[立即下载](https://github.com/open-mmlab/mmsegmentation/archive/refs/heads/master.zip)”,下载源码包。
+
+# 准备训练环境
+
+## 准备环境
+
+- 当前模型支持的固件与驱动、 CANN 以及 PyTorch 如下表所示。
+
+ **表 1** 版本配套表
+
+ | 配套 | 版本 |
+ | ---------- | ------------------------------------------------------------ |
+ | 固件与驱动 | [1.0.15](https://www.hiascend.com/hardware/firmware-drivers?tag=commercial) |
+ | CANN | [5.1.RC1](https://www.hiascend.com/software/cann/commercial?version=5.1.RC1) |
+ | PyTorch | [1.5.0](https://gitee.com/ascend/pytorch/tree/v1.5.0/) |
+
+- 环境准备指导。
+
+ 请参考《[Pytorch框架训练环境准备](https://www.hiascend.com/document/detail/zh/ModelZoo/pytorchframework/ptes)》。
+
+- 安装依赖(根据模型需求,按需添加所需依赖)。
+
+ ```
+ pip install -r requirements.txt
+ ```
+ 构建mmcv.
+ ```
+ # 克隆mmcv仓库代码
+ git clone -b v1.4.4 https://github.com/open-mmlab/mmcv.git
+
+ # configure
+ cd /mmcv
+
+ # copy
+ rm -rf ./mmcv
+ mkdir mmcv
+ cp -r mmcv_replace/* ./mmcv/
+
+ # compile
+ MMCV_WITH_OPS=1 pip install -e . -v
+
+ cd /${模型文件夹名称}
+ ```
+ 构建mmsegmentation.
+ 用本页configs/下文件替换mmsegmentation/configs同名文件,mmseg/下文件替换/mmsegmentation/mmseg/下的的同名文件, ./tools/下文件替换/mmsegmentation/tools下同名文件
+ ```
+ # 克隆仓库的代码
+ git clone https://github.com/open-mmlab/mmsegmentation
+
+ # configure
+ cd /mmsegmentation
+
+ # copy
+ cp -r /${你的存放路径}/configs/* ./configs/
+ cp -r /${你的存放路径}/mmseg/* ./mmseg/
+ cp -r /${你的存放路径}/tools/* ./tools/
+
+ # compile
+ pip install -e . -v
+ ```
+
+## 准备数据集
+
+1. 获取数据集。
+
+ 用户自行获取原始数据集,可选用的开源数据集为[ade20k](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip),将数据集上传到服务器任意路径下并解压。
+
+ 数据集目录结构如下所示:
+ ```
+ ├──ADE20K/
+ | |──annotations/
+ | | |──training/
+ | | | ADE_train_00000001.png
+ | | | ADE_train_00000002.png
+ | | | ...
+ | | |──validation/
+ | | | ADE_val_00000001.png
+ | | | ADE_val_00000002.png
+ | | | ...
+ | |──images/
+ | | |──training/
+ | | | ADE_train_00000001.jpg
+ | | | ADE_train_00000002.jpg
+ | | | ...
+ | | |──validation/
+ | | | ADE_val_00000001.jpg
+ | | | ADE_val_00000002.jpg
+ | | | ...
+ | |──objectInfo150.txt
+ | |──sceneCategories.txt
+ ```
+
+## 获取预训练模型
+
+请参考原始仓库上的README.md进行预训练模型获取或[点击这里](https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth)。将获取的预训练模型放至在根目录下/pretrain,接着对模型进行转换
+```
+python mmsegmentation/tools/model_converters/vit2mmseg.py pretrain/jx_vit_base_p16_224-80ecf9dd.pth pretrain/vit_base_p16_224-80ecf9dd.pth
+```
+
+## 修改
+- torch.nn.parallel._function._get_stream中使用了torch.cuda,修改为torch.npu
+
+# 开始训练
+
+## 训练模型
+
+1. 进入根目录。
+
+ ```
+ cd .
+ ```
+
+2. 运行训练脚本。
+
+ 该模型支持单机单卡训练和单机8卡训练。
+
+ - 单机单卡训练
+
+ 启动单卡训练。
+
+ ```
+ # training 1p accuracy
+ bash ./test/train_full_1p.sh --data_path=xxx
+ # training 1p performance
+ bash ./test/train_performance_1p.sh --data_path=xxx
+ ```
+
+ - 单机8卡训练
+
+ 启动8卡训练。
+
+ ```
+ # training 8p accuracy
+ bash ./test/train_full_8p.sh --data_path=xxx
+ # training 8p performance
+ bash ./test/train_performance_8p.sh --data_path=xxx
+ ```
+
+ 训练完成后,权重文件默认保存在/work_dir下,并输出模型训练精度和性能信息。
+
+# 训练结果展示
+
+**表 2** 训练结果展示表
+
+| NAME | decode.acc_seg | FPS | iters | AMP_Type |
+| ------- | ----- | ---: | ------ | -------: |
+| 1p-GPU | | 5.98 | 500 | O1,None |
+| 1p-NPU | | 0.09 | 500 | O1,None |
+| 8p-GPU | 80.2740 | 37.20| 7500 | O1,None |
+| 8p-NPU | 81.0251 | 0.57 | 7500 | O1,None |
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/_base_/datasets/ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/_base_/datasets/ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9e9cfa42ad297475d230525b585678a72183174
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/_base_/datasets/ade20k.py
@@ -0,0 +1,69 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# dataset settings
+dataset_type = 'ADE20KDataset'
+data_root = '/home/savepath/yuhaiyan/mmsegmentation/data/ad20k/ADEChallengeData2016' #'data/ade/ADEChallengeData2016'
+img_norm_cfg = dict(
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+crop_size = (512, 512)
+train_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(type='LoadAnnotations', reduce_zero_label=True),
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
+ dict(type='RandomFlip', prob=0.5),
+ dict(type='PhotoMetricDistortion'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
+ dict(type='DefaultFormatBundle'),
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
+]
+test_pipeline = [
+ dict(type='LoadImageFromFile'),
+ dict(
+ type='MultiScaleFlipAug',
+ img_scale=(2048, 512),
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
+ flip=False,
+ transforms=[
+ dict(type='Resize', keep_ratio=True),
+ dict(type='RandomFlip'),
+ dict(type='Normalize', **img_norm_cfg),
+ dict(type='ImageToTensor', keys=['img']),
+ dict(type='Collect', keys=['img']),
+ ])
+]
+data = dict(
+ samples_per_gpu=4,
+ workers_per_gpu=4,
+ train=dict(
+ type=dataset_type,
+ data_root=data_root,
+ img_dir='images/training',
+ ann_dir='annotations/training',
+ pipeline=train_pipeline),
+ val=dict(
+ type=dataset_type,
+ data_root=data_root,
+ img_dir='images/validation',
+ ann_dir='annotations/validation',
+ pipeline=test_pipeline),
+ test=dict(
+ type=dataset_type,
+ data_root=data_root,
+ img_dir='images/validation',
+ ann_dir='annotations/validation',
+ pipeline=test_pipeline))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/_base_/default_runtime.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/_base_/default_runtime.py
new file mode 100644
index 0000000000000000000000000000000000000000..48926a1de92f76ce685d0c9726cc12b46c220d17
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/_base_/default_runtime.py
@@ -0,0 +1,30 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# yapf:disable
+log_config = dict(
+ interval=50,
+ hooks=[
+ dict(type='TextLoggerHook', by_epoch=False),
+ # dict(type='TensorboardLoggerHook')
+ # dict(type='PaviLoggerHook') # for internal services
+ ])
+# yapf:enable
+dist_params = dict(backend='nccl')
+log_level = 'INFO'
+load_from = None
+resume_from = None
+workflow = [('train', 1)]
+cudnn_benchmark = True
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/_base_/models/dpt_vit-b16.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/_base_/models/dpt_vit-b16.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d10c3ee4a637e0f112e9fae9ddf969d1afbd2a2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/_base_/models/dpt_vit-b16.py
@@ -0,0 +1,45 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='EncoderDecoder',
+ pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa
+ backbone=dict(
+ type='VisionTransformer',
+ img_size=224,
+ embed_dims=768,
+ num_layers=12,
+ num_heads=12,
+ out_indices=(2, 5, 8, 11),
+ final_norm=False,
+ with_cls_token=True,
+ output_cls_token=True),
+ decode_head=dict(
+ type='DPTHead',
+ in_channels=(768, 768, 768, 768),
+ channels=256,
+ embed_dims=768,
+ post_process_channels=[96, 192, 384, 768],
+ num_classes=150,
+ readout_type='project',
+ input_transform='multiple_select',
+ in_index=(0, 1, 2, 3),
+ norm_cfg=norm_cfg,
+ loss_decode=dict(
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
+ auxiliary_head=None,
+ # model training and testing settings
+ train_cfg=dict(),
+ test_cfg=dict(mode='whole')) # yapf: disable
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/_base_/schedules/schedule_160k.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/_base_/schedules/schedule_160k.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b35ca427b8c14ee8973ed6c56f0a1e822a5b008
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/_base_/schedules/schedule_160k.py
@@ -0,0 +1,24 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# optimizer
+optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
+optimizer_config = dict()
+# learning policy
+lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+# runtime settings
+runner = dict(type='IterBasedRunner', max_iters=160000)
+checkpoint_config = dict(by_epoch=False, interval=16000)
+evaluation = dict(interval=16000, metric='mIoU', pre_eval=True)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/dpt/README.md b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/dpt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5e6257711fc6979a9d8bb50c0577784842b1a8a0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/dpt/README.md
@@ -0,0 +1,67 @@
+# DPT
+
+[Vision Transformer for Dense Prediction](https://arxiv.org/abs/2103.13413)
+
+## Introduction
+
+
+
+Official Repo
+
+Code Snippet
+
+## Abstract
+
+
+
+We introduce dense vision transformers, an architecture that leverages vision transformers in place of convolutional networks as a backbone for dense prediction tasks. We assemble tokens from various stages of the vision transformer into image-like representations at various resolutions and progressively combine them into full-resolution predictions using a convolutional decoder. The transformer backbone processes representations at a constant and relatively high resolution and has a global receptive field at every stage. These properties allow the dense vision transformer to provide finer-grained and more globally coherent predictions when compared to fully-convolutional networks. Our experiments show that this architecture yields substantial improvements on dense prediction tasks, especially when a large amount of training data is available. For monocular depth estimation, we observe an improvement of up to 28% in relative performance when compared to a state-of-the-art fully-convolutional network. When applied to semantic segmentation, dense vision transformers set a new state of the art on ADE20K with 49.02% mIoU. We further show that the architecture can be fine-tuned on smaller datasets such as NYUv2, KITTI, and Pascal Context where it also sets the new state of the art. Our models are available at [this https URL](https://github.com/isl-org/DPT).
+
+
+
+
+

+
+
+## Citation
+
+```bibtex
+@article{dosoViTskiy2020,
+ title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale},
+ author={DosoViTskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil},
+ journal={arXiv preprint arXiv:2010.11929},
+ year={2020}
+}
+
+@article{Ranftl2021,
+ author = {Ren\'{e} Ranftl and Alexey Bochkovskiy and Vladlen Koltun},
+ title = {Vision Transformers for Dense Prediction},
+ journal = {ArXiv preprint},
+ year = {2021},
+}
+```
+
+## Usage
+
+To use other repositories' pre-trained models, it is necessary to convert keys.
+
+We provide a script [`vit2mmseg.py`](../../tools/model_converters/vit2mmseg.py) in the tools directory to convert the key of models from [timm](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py) to MMSegmentation style.
+
+```shell
+python tools/model_converters/vit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH}
+```
+
+E.g.
+
+```shell
+python tools/model_converters/vit2mmseg.py https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth pretrain/jx_vit_base_p16_224-80ecf9dd.pth
+```
+
+This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`.
+
+## Results and models
+
+### ADE20K
+
+| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download |
+| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| DPT | ViT-B | 512x512 | 160000 | 8.09 | 10.41 | 46.97 | 48.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-db31cf52.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-20210809_172025.log.json) |
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/dpt/dpt.yml b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/dpt/dpt.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a4f9c65b790532b875669d4f0459ada8907af8db
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/dpt/dpt.yml
@@ -0,0 +1,37 @@
+Collections:
+- Name: DPT
+ Metadata:
+ Training Data:
+ - ADE20K
+ Paper:
+ URL: https://arxiv.org/abs/2103.13413
+ Title: Vision Transformer for Dense Prediction
+ README: configs/dpt/README.md
+ Code:
+ URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dpt_head.py#L215
+ Version: v0.17.0
+ Converted From:
+ Code: https://github.com/isl-org/DPT
+Models:
+- Name: dpt_vit-b16_512x512_160k_ade20k
+ In Collection: DPT
+ Metadata:
+ backbone: ViT-B
+ crop size: (512,512)
+ lr schd: 160000
+ inference time (ms/im):
+ - value: 96.06
+ hardware: V100
+ backend: PyTorch
+ batch size: 1
+ mode: FP32
+ resolution: (512,512)
+ Training Memory (GB): 8.09
+ Results:
+ - Task: Semantic Segmentation
+ Dataset: ADE20K
+ Metrics:
+ mIoU: 46.97
+ mIoU(ms+flip): 48.34
+ Config: configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py
+ Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-db31cf52.pth
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py
new file mode 100644
index 0000000000000000000000000000000000000000..75d23200ed29bbe55e32327c4b1691dca763e0d4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py
@@ -0,0 +1,47 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+_base_ = [
+ '../_base_/models/dpt_vit-b16.py', '../_base_/datasets/ade20k.py',
+ '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
+]
+
+# AdamW optimizer, no weight decay for position embedding & layer norm
+# in backbone
+optimizer = dict(
+ _delete_=True,
+ type='AdamW',
+ lr=0.00006,
+ betas=(0.9, 0.999),
+ weight_decay=0.01,
+ paramwise_cfg=dict(
+ custom_keys={
+ 'pos_embed': dict(decay_mult=0.),
+ 'cls_token': dict(decay_mult=0.),
+ 'norm': dict(decay_mult=0.)
+ }))
+
+lr_config = dict(
+ _delete_=True,
+ policy='poly',
+ warmup='linear',
+ warmup_iters=1500,
+ warmup_ratio=1e-6,
+ power=1.0,
+ min_lr=0.0,
+ by_epoch=False)
+
+# By default, models are trained on 8 GPUs with 2 images per GPU
+data = dict(samples_per_gpu=2, workers_per_gpu=2)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/env_npu.sh b/PyTorch/contrib/cv/semantic_segmentation/DPT/env_npu.sh
new file mode 100644
index 0000000000000000000000000000000000000000..abc92e4ae9ef638ea511eff36d8e0448d896695c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/env_npu.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+export install_path=/usr/local/Ascend
+
+if [ -d ${install_path}/toolkit ]; then
+ export LD_LIBRARY_PATH=${install_path}/fwkacllib/lib64/:/usr/include/hdf5/lib/:/usr/local/:/usr/local/lib/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons:${path_lib}:${LD_LIBRARY_PATH}
+ export PATH=${install_path}/fwkacllib/ccec_compiler/bin:${install_path}/fwkacllib/bin:$PATH
+ export PYTHONPATH=${install_path}/fwkacllib/python/site-packages:${install_path}/tfplugin/python/site-packages:${install_path}/toolkit/python/site-packages:$PYTHONPATH
+ export PYTHONPATH=/usr/local/python3.7.5/lib/python3.7/site-packages:$PYTHONPATH
+ export ASCEND_OPP_PATH=${install_path}/opp
+else
+ if [ -d ${install_path}/nnae/latest ];then
+ export LD_LIBRARY_PATH=${install_path}/nnae/latest/fwkacllib/lib64/:/usr/local/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:/usr/local/lib/:/usr/lib64/:/usr/lib/:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64_64-linux-gnu:$LD_LIBRARY_PATH
+ export PATH=$PATH:${install_path}/nnae/latest/fwkacllib/ccec_compiler/bin/:${install_path}/nnae/latest/toolkit/tools/ide_daemon/bin/
+ export ASCEND_OPP_PATH=${install_path}/nnae/latest/opp/
+ export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/nnae/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
+ export PYTHONPATH=${install_path}/nnae/latest/fwkacllib/python/site-packages/:${install_path}/nnae/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/nnae/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
+ export ASCEND_AICPU_PATH=${install_path}/nnae/latest
+ else
+ export LD_LIBRARY_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/:/usr/local/:/usr/local/lib/:/usr/lib64/:/usr/lib/:/usr/local/python3.7.5/lib/:/usr/local/openblas/lib:${install_path}/driver/lib64/common/:${install_path}/driver/lib64/driver/:${install_path}/add-ons/:/usr/lib/aarch64-linux-gnu:$LD_LIBRARY_PATH
+ export PATH=$PATH:${install_path}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${install_path}/ascend-toolkit/latest/toolkit/tools/ide_daemon/bin/
+ export ASCEND_OPP_PATH=${install_path}/ascend-toolkit/latest/opp/
+ export OPTION_EXEC_EXTERN_PLUGIN_PATH=${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libfe.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:${install_path}/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel/libge_local_engine.so
+ export PYTHONPATH=${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/auto_tune.egg/auto_tune:${install_path}/ascend-toolkit/latest/fwkacllib/python/site-packages/schedule_search.egg:$PYTHONPATH
+ export ASCEND_AICPU_PATH=${install_path}/ascend-toolkit/latest
+ fi
+fi
+
+${install_path}/driver/tools/msnpureport -g error -d 0
+${install_path}/driver/tools/msnpureport -g error -d 1
+${install_path}/driver/tools/msnpureport -g error -d 2
+${install_path}/driver/tools/msnpureport -g error -d 3
+${install_path}/driver/tools/msnpureport -g error -d 4
+${install_path}/driver/tools/msnpureport -g error -d 5
+${install_path}/driver/tools/msnpureport -g error -d 6
+${install_path}/driver/tools/msnpureport -g error -d 7
+
+#将Host日志输出到串口,0-关闭/1-开启
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+#设置默认日志级别,0-debug/1-info/2-warning/3-error
+export ASCEND_GLOBAL_LOG_LEVEL=0
+#设置Event日志开启标志,0-关闭/1-开启
+export ASCEND_GLOBAL_EVENT_ENABLE=0
+#设置是否开启taskque,0-关闭/1-开启
+export TASK_QUEUE_ENABLE=0
+#设置是否开启PTCopy,0-关闭/1-开启
+export PTCOPY_ENABLE=1
+#设置是否开启2个非连续combined标志,0-关闭/1-开启
+export COMBINED_ENABLE=1
+#设置是否开启3个非连续combined标志,0-关闭/1-开启
+export TRI_COMBINED_ENABLE=1
+#设置特殊场景是否需要重新编译,不需要修改
+export DYNAMIC_OP="ADD#MUL"
+# HCCL白名单开关,1-关闭/0-开启
+export HCCL_WHITELIST_DISABLE=1
+# HCCL默认超时时间120s较少,修改为1800s对齐PyTorch默认设置
+export HCCL_CONNECT_TIMEOUT=1800
+
+ulimit -SHn 512000
+
+path_lib=$(python3.7 -c """
+import sys
+import re
+result=''
+for index in range(len(sys.path)):
+ match_sit = re.search('-packages', sys.path[index])
+ if match_sit is not None:
+ match_lib = re.search('lib', sys.path[index])
+
+ if match_lib is not None:
+ end=match_lib.span()[1]
+ result += sys.path[index][0:end] + ':'
+
+ result+=sys.path[index] + '/torch/lib:'
+print(result)"""
+)
+
+echo ${path_lib}
+
+export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib/:${path_lib}:$LD_LIBRARY_PATH
+
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..90b9388db0f1710c14411f1104c44ba129dbaf0e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/__init__.py
@@ -0,0 +1,28 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+from .arraymisc import *
+from .fileio import *
+from .image import *
+from .utils import *
+from .version import *
+from .video import *
+from .visualization import *
+
+# The following modules are not imported to this level, so mmcv may be used
+# without PyTorch.
+# - runner
+# - parallel
+# - op
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/arraymisc/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/arraymisc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd56054acbdcaf8e30061c48217eaf85868b804f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/arraymisc/__init__.py
@@ -0,0 +1,17 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .quantization import dequantize, quantize
+
+__all__ = ['quantize', 'dequantize']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/arraymisc/quantization.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/arraymisc/quantization.py
new file mode 100644
index 0000000000000000000000000000000000000000..363b2f997446009c3aa492f3a30d8c4595127696
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/arraymisc/quantization.py
@@ -0,0 +1,68 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+
+
+def quantize(arr, min_val, max_val, levels, dtype=np.int64):
+ """Quantize an array of (-inf, inf) to [0, levels-1].
+
+ Args:
+ arr (ndarray): Input array.
+ min_val (scalar): Minimum value to be clipped.
+ max_val (scalar): Maximum value to be clipped.
+ levels (int): Quantization levels.
+ dtype (np.type): The type of the quantized array.
+
+ Returns:
+ tuple: Quantized array.
+ """
+ if not (isinstance(levels, int) and levels > 1):
+ raise ValueError(
+ f'levels must be a positive integer, but got {levels}')
+ if min_val >= max_val:
+ raise ValueError(
+ f'min_val ({min_val}) must be smaller than max_val ({max_val})')
+
+ arr = np.clip(arr, min_val, max_val) - min_val
+ quantized_arr = np.minimum(
+ np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1)
+
+ return quantized_arr
+
+
+def dequantize(arr, min_val, max_val, levels, dtype=np.float64):
+ """Dequantize an array.
+
+ Args:
+ arr (ndarray): Input array.
+ min_val (scalar): Minimum value to be clipped.
+ max_val (scalar): Maximum value to be clipped.
+ levels (int): Quantization levels.
+ dtype (np.type): The type of the dequantized array.
+
+ Returns:
+ tuple: Dequantized array.
+ """
+ if not (isinstance(levels, int) and levels > 1):
+ raise ValueError(
+ f'levels must be a positive integer, but got {levels}')
+ if min_val >= max_val:
+ raise ValueError(
+ f'min_val ({min_val}) must be smaller than max_val ({max_val})')
+
+ dequantized_arr = (arr + 0.5).astype(dtype) * (max_val -
+ min_val) / levels + min_val
+
+ return dequantized_arr
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2fc707b6886a4c6b05214f9a3888726cfc6233e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/__init__.py
@@ -0,0 +1,54 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .alexnet import AlexNet
+# yapf: disable
+from .bricks import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS,
+ PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS,
+ ContextBlock, Conv2d, Conv3d, ConvAWS2d, ConvModule,
+ ConvTranspose2d, ConvTranspose3d, ConvWS2d,
+ DepthwiseSeparableConvModule, GeneralizedAttention,
+ HSigmoid, HSwish, Linear, MaxPool2d, MaxPool3d,
+ NonLocal1d, NonLocal2d, NonLocal3d, Scale, Swish,
+ build_activation_layer, build_conv_layer,
+ build_norm_layer, build_padding_layer, build_plugin_layer,
+ build_upsample_layer, conv_ws_2d, is_norm)
+from .builder import MODELS, build_model_from_cfg
+# yapf: enable
+from .resnet import ResNet, make_res_layer
+from .utils import (INITIALIZERS, Caffe2XavierInit, ConstantInit, KaimingInit,
+ NormalInit, PretrainedInit, TruncNormalInit, UniformInit,
+ XavierInit, bias_init_with_prob, caffe2_xavier_init,
+ constant_init, fuse_conv_bn, get_model_complexity_info,
+ initialize, kaiming_init, normal_init, trunc_normal_init,
+ uniform_init, xavier_init)
+from .vgg import VGG, make_vgg_layer
+
+__all__ = [
+ 'AlexNet', 'VGG', 'make_vgg_layer', 'ResNet', 'make_res_layer',
+ 'constant_init', 'xavier_init', 'normal_init', 'trunc_normal_init',
+ 'uniform_init', 'kaiming_init', 'caffe2_xavier_init',
+ 'bias_init_with_prob', 'ConvModule', 'build_activation_layer',
+ 'build_conv_layer', 'build_norm_layer', 'build_padding_layer',
+ 'build_upsample_layer', 'build_plugin_layer', 'is_norm', 'NonLocal1d',
+ 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'HSigmoid', 'Swish', 'HSwish',
+ 'GeneralizedAttention', 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS',
+ 'PADDING_LAYERS', 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale',
+ 'get_model_complexity_info', 'conv_ws_2d', 'ConvAWS2d', 'ConvWS2d',
+ 'fuse_conv_bn', 'DepthwiseSeparableConvModule', 'Linear', 'Conv2d',
+ 'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d', 'Conv3d',
+ 'initialize', 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit',
+ 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit',
+ 'Caffe2XavierInit', 'MODELS', 'build_model_from_cfg'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/alexnet.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/alexnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..827157f9eaca3f71630bb1bfa2c0979ba254eb1e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/alexnet.py
@@ -0,0 +1,74 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+import torch.nn as nn
+
+
+class AlexNet(nn.Module):
+ """AlexNet backbone.
+
+ Args:
+ num_classes (int): number of classes for classification.
+ """
+
+ def __init__(self, num_classes=-1):
+ super(AlexNet, self).__init__()
+ self.num_classes = num_classes
+ self.features = nn.Sequential(
+ nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2),
+ nn.Conv2d(64, 192, kernel_size=5, padding=2),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2),
+ nn.Conv2d(192, 384, kernel_size=3, padding=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(384, 256, kernel_size=3, padding=1),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(256, 256, kernel_size=3, padding=1),
+ nn.ReLU(inplace=True),
+ nn.MaxPool2d(kernel_size=3, stride=2),
+ )
+ if self.num_classes > 0:
+ self.classifier = nn.Sequential(
+ nn.Dropout(),
+ nn.Linear(256 * 6 * 6, 4096),
+ nn.ReLU(inplace=True),
+ nn.Dropout(),
+ nn.Linear(4096, 4096),
+ nn.ReLU(inplace=True),
+ nn.Linear(4096, num_classes),
+ )
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ from ..runner import load_checkpoint
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ # use default initializer
+ pass
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+
+ x = self.features(x)
+ if self.num_classes > 0:
+ x = x.view(x.size(0), 256 * 6 * 6)
+ x = self.classifier(x)
+
+ return x
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..735d51caab3a3217ec14101268d05df196c610e3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/__init__.py
@@ -0,0 +1,48 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .activation import build_activation_layer
+from .context_block import ContextBlock
+from .conv import build_conv_layer
+from .conv2d_adaptive_padding import Conv2dAdaptivePadding
+from .conv_module import ConvModule
+from .conv_ws import ConvAWS2d, ConvWS2d, conv_ws_2d
+from .depthwise_separable_conv_module import DepthwiseSeparableConvModule
+from .drop import Dropout, DropPath
+from .generalized_attention import GeneralizedAttention
+from .hsigmoid import HSigmoid
+from .hswish import HSwish
+from .non_local import NonLocal1d, NonLocal2d, NonLocal3d
+from .norm import build_norm_layer, is_norm
+from .padding import build_padding_layer
+from .plugin import build_plugin_layer
+from .registry import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS,
+ PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS)
+from .scale import Scale
+from .swish import Swish
+from .upsample import build_upsample_layer
+from .wrappers import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d,
+ Linear, MaxPool2d, MaxPool3d)
+
+__all__ = [
+ 'ConvModule', 'build_activation_layer', 'build_conv_layer',
+ 'build_norm_layer', 'build_padding_layer', 'build_upsample_layer',
+ 'build_plugin_layer', 'is_norm', 'HSigmoid', 'HSwish', 'NonLocal1d',
+ 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'GeneralizedAttention',
+ 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS',
+ 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', 'ConvAWS2d', 'ConvWS2d',
+ 'conv_ws_2d', 'DepthwiseSeparableConvModule', 'Swish', 'Linear',
+ 'Conv2dAdaptivePadding', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d',
+ 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', 'Dropout', 'DropPath'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/activation.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/activation.py
new file mode 100644
index 0000000000000000000000000000000000000000..224b5f88429c9925eeb4d5dcca5455b03877d541
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/activation.py
@@ -0,0 +1,106 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from mmcv.utils import TORCH_VERSION, build_from_cfg, digit_version
+from .registry import ACTIVATION_LAYERS
+
+for module in [
+ nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU,
+ nn.Sigmoid, nn.Tanh
+]:
+ ACTIVATION_LAYERS.register_module(module=module)
+
+
+@ACTIVATION_LAYERS.register_module(name='Clip')
+@ACTIVATION_LAYERS.register_module()
+class Clamp(nn.Module):
+ """Clamp activation layer.
+
+ This activation function is to clamp the feature map value within
+ :math:`[min, max]`. More details can be found in ``torch.clamp()``.
+
+ Args:
+ min (Number | optional): Lower-bound of the range to be clamped to.
+ Default to -1.
+ max (Number | optional): Upper-bound of the range to be clamped to.
+ Default to 1.
+ """
+
+ def __init__(self, min=-1., max=1.):
+ super(Clamp, self).__init__()
+ self.min = min
+ self.max = max
+
+ def forward(self, x):
+ """Forward function.
+
+ Args:
+ x (torch.Tensor): The input tensor.
+
+ Returns:
+ torch.Tensor: Clamped tensor.
+ """
+ return torch.clamp(x, min=self.min, max=self.max)
+
+
+class GELU(nn.Module):
+ r"""Applies the Gaussian Error Linear Units function:
+
+ .. math::
+ \text{GELU}(x) = x * \Phi(x)
+ where :math:`\Phi(x)` is the Cumulative Distribution Function for
+ Gaussian Distribution.
+
+ Shape:
+ - Input: :math:`(N, *)` where `*` means, any number of additional
+ dimensions
+ - Output: :math:`(N, *)`, same shape as the input
+
+ .. image:: scripts/activation_images/GELU.png
+
+ Examples::
+
+ >>> m = nn.GELU()
+ >>> input = torch.randn(2)
+ >>> output = m(input)
+ """
+
+ def forward(self, input):
+ return F.gelu(input)
+
+
+if (TORCH_VERSION == 'parrots'
+ or digit_version(TORCH_VERSION) < digit_version('1.4')):
+ ACTIVATION_LAYERS.register_module(module=GELU)
+else:
+ ACTIVATION_LAYERS.register_module(module=nn.GELU)
+
+
+def build_activation_layer(cfg):
+ """Build activation layer.
+
+ Args:
+ cfg (dict): The activation layer config, which should contain:
+
+ - type (str): Layer type.
+ - layer args: Args needed to instantiate an activation layer.
+
+ Returns:
+ nn.Module: Created activation layer.
+ """
+ return build_from_cfg(cfg, ACTIVATION_LAYERS)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/context_block.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/context_block.py
new file mode 100644
index 0000000000000000000000000000000000000000..727112b94c8b2a20bd34e6f0af306d9e8adad9ea
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/context_block.py
@@ -0,0 +1,138 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from torch import nn
+
+from ..utils import constant_init, kaiming_init
+from .registry import PLUGIN_LAYERS
+
+
+def last_zero_init(m):
+ if isinstance(m, nn.Sequential):
+ constant_init(m[-1], val=0)
+ else:
+ constant_init(m, val=0)
+
+
+@PLUGIN_LAYERS.register_module()
+class ContextBlock(nn.Module):
+ """ContextBlock module in GCNet.
+
+ See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond'
+ (https://arxiv.org/abs/1904.11492) for details.
+
+ Args:
+ in_channels (int): Channels of the input feature map.
+ ratio (float): Ratio of channels of transform bottleneck
+ pooling_type (str): Pooling method for context modeling.
+ Options are 'att' and 'avg', stand for attention pooling and
+ average pooling respectively. Default: 'att'.
+ fusion_types (Sequence[str]): Fusion method for feature fusion,
+ Options are 'channels_add', 'channel_mul', stand for channelwise
+ addition and multiplication respectively. Default: ('channel_add',)
+ """
+
+ _abbr_ = 'context_block'
+
+ def __init__(self,
+ in_channels,
+ ratio,
+ pooling_type='att',
+ fusion_types=('channel_add', )):
+ super(ContextBlock, self).__init__()
+ assert pooling_type in ['avg', 'att']
+ assert isinstance(fusion_types, (list, tuple))
+ valid_fusion_types = ['channel_add', 'channel_mul']
+ assert all([f in valid_fusion_types for f in fusion_types])
+ assert len(fusion_types) > 0, 'at least one fusion should be used'
+ self.in_channels = in_channels
+ self.ratio = ratio
+ self.planes = int(in_channels * ratio)
+ self.pooling_type = pooling_type
+ self.fusion_types = fusion_types
+ if pooling_type == 'att':
+ self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1)
+ self.softmax = nn.Softmax(dim=2)
+ else:
+ self.avg_pool = nn.AdaptiveAvgPool2d(1)
+ if 'channel_add' in fusion_types:
+ self.channel_add_conv = nn.Sequential(
+ nn.Conv2d(self.in_channels, self.planes, kernel_size=1),
+ nn.LayerNorm([self.planes, 1, 1]),
+ nn.ReLU(inplace=True), # yapf: disable
+ nn.Conv2d(self.planes, self.in_channels, kernel_size=1))
+ else:
+ self.channel_add_conv = None
+ if 'channel_mul' in fusion_types:
+ self.channel_mul_conv = nn.Sequential(
+ nn.Conv2d(self.in_channels, self.planes, kernel_size=1),
+ nn.LayerNorm([self.planes, 1, 1]),
+ nn.ReLU(inplace=True), # yapf: disable
+ nn.Conv2d(self.planes, self.in_channels, kernel_size=1))
+ else:
+ self.channel_mul_conv = None
+ self.reset_parameters()
+
+ def reset_parameters(self):
+ if self.pooling_type == 'att':
+ kaiming_init(self.conv_mask, mode='fan_in')
+ self.conv_mask.inited = True
+
+ if self.channel_add_conv is not None:
+ last_zero_init(self.channel_add_conv)
+ if self.channel_mul_conv is not None:
+ last_zero_init(self.channel_mul_conv)
+
+ def spatial_pool(self, x):
+ batch, channel, height, width = x.size()
+ if self.pooling_type == 'att':
+ input_x = x
+ # [N, C, H * W]
+ input_x = input_x.view(batch, channel, height * width)
+ # [N, 1, C, H * W]
+ input_x = input_x.unsqueeze(1)
+ # [N, 1, H, W]
+ context_mask = self.conv_mask(x)
+ # [N, 1, H * W]
+ context_mask = context_mask.view(batch, 1, height * width)
+ # [N, 1, H * W]
+ context_mask = self.softmax(context_mask)
+ # [N, 1, H * W, 1]
+ context_mask = context_mask.unsqueeze(-1)
+ # [N, 1, C, 1]
+ context = torch.matmul(input_x, context_mask)
+ # [N, C, 1, 1]
+ context = context.view(batch, channel, 1, 1)
+ else:
+ # [N, C, 1, 1]
+ context = self.avg_pool(x)
+
+ return context
+
+ def forward(self, x):
+ # [N, C, 1, 1]
+ context = self.spatial_pool(x)
+
+ out = x
+ if self.channel_mul_conv is not None:
+ # [N, C, 1, 1]
+ channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
+ out = out * channel_mul_term
+ if self.channel_add_conv is not None:
+ # [N, C, 1, 1]
+ channel_add_term = self.channel_add_conv(context)
+ out = out + channel_add_term
+
+ return out
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/conv.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e223592f40bafd52fbf895b604270373557d993
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/conv.py
@@ -0,0 +1,57 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from torch import nn
+
+from .registry import CONV_LAYERS
+
+CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d)
+CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d)
+CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d)
+CONV_LAYERS.register_module('Conv', module=nn.Conv2d)
+
+
+def build_conv_layer(cfg, *args, **kwargs):
+ """Build convolution layer.
+
+ Args:
+ cfg (None or dict): The conv layer config, which should contain:
+ - type (str): Layer type.
+ - layer args: Args needed to instantiate an conv layer.
+ args (argument list): Arguments passed to the `__init__`
+ method of the corresponding conv layer.
+ kwargs (keyword arguments): Keyword arguments passed to the `__init__`
+ method of the corresponding conv layer.
+
+ Returns:
+ nn.Module: Created conv layer.
+ """
+ if cfg is None:
+ cfg_ = dict(type='Conv2d')
+ else:
+ if not isinstance(cfg, dict):
+ raise TypeError('cfg must be a dict')
+ if 'type' not in cfg:
+ raise KeyError('the cfg dict must contain the key "type"')
+ cfg_ = cfg.copy()
+
+ layer_type = cfg_.pop('type')
+ if layer_type not in CONV_LAYERS:
+ raise KeyError(f'Unrecognized norm type {layer_type}')
+ else:
+ conv_layer = CONV_LAYERS.get(layer_type)
+
+ layer = conv_layer(*args, **kwargs, **cfg_)
+
+ return layer
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/conv2d_adaptive_padding.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/conv2d_adaptive_padding.py
new file mode 100644
index 0000000000000000000000000000000000000000..86a1076e8bb01f793a5882823b5dab70e0b293b9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/conv2d_adaptive_padding.py
@@ -0,0 +1,75 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+
+from torch import nn
+from torch.nn import functional as F
+
+from .registry import CONV_LAYERS
+
+
+@CONV_LAYERS.register_module()
+class Conv2dAdaptivePadding(nn.Conv2d):
+ """Implementation of 2D convolution in tensorflow with `padding` as "same",
+ which applies padding to input (if needed) so that input image gets fully
+ covered by filter and stride you specified. For stride 1, this will ensure
+ that output image size is same as input. For stride of 2, output dimensions
+ will be half, for example.
+
+ Args:
+ in_channels (int): Number of channels in the input image
+ out_channels (int): Number of channels produced by the convolution
+ kernel_size (int or tuple): Size of the convolving kernel
+ stride (int or tuple, optional): Stride of the convolution. Default: 1
+ padding (int or tuple, optional): Zero-padding added to both sides of
+ the input. Default: 0
+ dilation (int or tuple, optional): Spacing between kernel elements.
+ Default: 1
+ groups (int, optional): Number of blocked connections from input
+ channels to output channels. Default: 1
+ bias (bool, optional): If ``True``, adds a learnable bias to the
+ output. Default: ``True``
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ bias=True):
+ super().__init__(in_channels, out_channels, kernel_size, stride, 0,
+ dilation, groups, bias)
+
+ def forward(self, x):
+ img_h, img_w = x.size()[-2:]
+ kernel_h, kernel_w = self.weight.size()[-2:]
+ stride_h, stride_w = self.stride
+ output_h = math.ceil(img_h / stride_h)
+ output_w = math.ceil(img_w / stride_w)
+ pad_h = (
+ max((output_h - 1) * self.stride[0] +
+ (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0))
+ pad_w = (
+ max((output_w - 1) * self.stride[1] +
+ (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0))
+ if pad_h > 0 or pad_w > 0:
+ x = F.pad(x, [
+ pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2
+ ])
+ return F.conv2d(x, self.weight, self.bias, self.stride, self.padding,
+ self.dilation, self.groups)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/conv_module.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/conv_module.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e0d96585f1f7f40e5e600f63a29043c04f4ba1a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/conv_module.py
@@ -0,0 +1,219 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+
+import torch.nn as nn
+
+from mmcv.utils import _BatchNorm, _InstanceNorm
+from ..utils import constant_init, kaiming_init
+from .activation import build_activation_layer
+from .conv import build_conv_layer
+from .norm import build_norm_layer
+from .padding import build_padding_layer
+from .registry import PLUGIN_LAYERS
+
+
+@PLUGIN_LAYERS.register_module()
+class ConvModule(nn.Module):
+ """A conv block that bundles conv/norm/activation layers.
+
+ This block simplifies the usage of convolution layers, which are commonly
+ used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU).
+ It is based upon three build methods: `build_conv_layer()`,
+ `build_norm_layer()` and `build_activation_layer()`.
+
+ Besides, we add some additional features in this module.
+ 1. Automatically set `bias` of the conv layer.
+ 2. Spectral norm is supported.
+ 3. More padding modes are supported. Before PyTorch 1.5, nn.Conv2d only
+ supports zero and circular padding, and we add "reflect" padding mode.
+
+ Args:
+ in_channels (int): Number of channels in the input feature map.
+ Same as that in ``nn._ConvNd``.
+ out_channels (int): Number of channels produced by the convolution.
+ Same as that in ``nn._ConvNd``.
+ kernel_size (int | tuple[int]): Size of the convolving kernel.
+ Same as that in ``nn._ConvNd``.
+ stride (int | tuple[int]): Stride of the convolution.
+ Same as that in ``nn._ConvNd``.
+ padding (int | tuple[int]): Zero-padding added to both sides of
+ the input. Same as that in ``nn._ConvNd``.
+ dilation (int | tuple[int]): Spacing between kernel elements.
+ Same as that in ``nn._ConvNd``.
+ groups (int): Number of blocked connections from input channels to
+ output channels. Same as that in ``nn._ConvNd``.
+ bias (bool | str): If specified as `auto`, it will be decided by the
+ norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise
+ False. Default: "auto".
+ conv_cfg (dict): Config dict for convolution layer. Default: None,
+ which means using conv2d.
+ norm_cfg (dict): Config dict for normalization layer. Default: None.
+ act_cfg (dict): Config dict for activation layer.
+ Default: dict(type='ReLU').
+ inplace (bool): Whether to use inplace mode for activation.
+ Default: True.
+ with_spectral_norm (bool): Whether use spectral norm in conv module.
+ Default: False.
+ padding_mode (str): If the `padding_mode` has not been supported by
+ current `Conv2d` in PyTorch, we will use our own padding layer
+ instead. Currently, we support ['zeros', 'circular'] with official
+ implementation and ['reflect'] with our own implementation.
+ Default: 'zeros'.
+ order (tuple[str]): The order of conv/norm/activation layers. It is a
+ sequence of "conv", "norm" and "act". Common examples are
+ ("conv", "norm", "act") and ("act", "conv", "norm").
+ Default: ('conv', 'norm', 'act').
+ """
+
+ _abbr_ = 'conv_block'
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ bias='auto',
+ conv_cfg=None,
+ norm_cfg=None,
+ act_cfg=dict(type='ReLU'),
+ inplace=True,
+ with_spectral_norm=False,
+ padding_mode='zeros',
+ order=('conv', 'norm', 'act')):
+ super(ConvModule, self).__init__()
+ assert conv_cfg is None or isinstance(conv_cfg, dict)
+ assert norm_cfg is None or isinstance(norm_cfg, dict)
+ assert act_cfg is None or isinstance(act_cfg, dict)
+ official_padding_mode = ['zeros', 'circular']
+ self.conv_cfg = conv_cfg
+ self.norm_cfg = norm_cfg
+ self.act_cfg = act_cfg
+ self.inplace = inplace
+ self.with_spectral_norm = with_spectral_norm
+ self.with_explicit_padding = padding_mode not in official_padding_mode
+ self.order = order
+ assert isinstance(self.order, tuple) and len(self.order) == 3
+ assert set(order) == set(['conv', 'norm', 'act'])
+
+ self.with_norm = norm_cfg is not None
+ self.with_activation = act_cfg is not None
+ # if the conv layer is before a norm layer, bias is unnecessary.
+ if bias == 'auto':
+ bias = not self.with_norm
+ self.with_bias = bias
+
+ if self.with_explicit_padding:
+ pad_cfg = dict(type=padding_mode)
+ self.padding_layer = build_padding_layer(pad_cfg, padding)
+
+ # reset padding to 0 for conv module
+ conv_padding = 0 if self.with_explicit_padding else padding
+ # build convolution layer
+ self.conv = build_conv_layer(
+ conv_cfg,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=stride,
+ padding=conv_padding,
+ dilation=dilation,
+ groups=groups,
+ bias=bias)
+ # export the attributes of self.conv to a higher level for convenience
+ self.in_channels = self.conv.in_channels
+ self.out_channels = self.conv.out_channels
+ self.kernel_size = self.conv.kernel_size
+ self.stride = self.conv.stride
+ self.padding = padding
+ self.dilation = self.conv.dilation
+ self.transposed = self.conv.transposed
+ self.output_padding = self.conv.output_padding
+ self.groups = self.conv.groups
+
+ if self.with_spectral_norm:
+ self.conv = nn.utils.spectral_norm(self.conv)
+
+ # build normalization layers
+ if self.with_norm:
+ # norm layer is after conv layer
+ if order.index('norm') > order.index('conv'):
+ norm_channels = out_channels
+ else:
+ norm_channels = in_channels
+ self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels)
+ self.add_module(self.norm_name, norm)
+ if self.with_bias:
+ if isinstance(norm, (_BatchNorm, _InstanceNorm)):
+ warnings.warn(
+ 'Unnecessary conv bias before batch/instance norm')
+ else:
+ self.norm_name = None
+
+ # build activation layer
+ if self.with_activation:
+ act_cfg_ = act_cfg.copy()
+ # nn.Tanh has no 'inplace' argument
+ if act_cfg_['type'] not in [
+ 'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish'
+ ]:
+ act_cfg_.setdefault('inplace', inplace)
+ self.activate = build_activation_layer(act_cfg_)
+
+ # Use msra init by default
+ self.init_weights()
+
+ @property
+ def norm(self):
+ if self.norm_name:
+ return getattr(self, self.norm_name)
+ else:
+ return None
+
+ def init_weights(self):
+ # 1. It is mainly for customized conv layers with their own
+ # initialization manners by calling their own ``init_weights()``,
+ # and we do not want ConvModule to override the initialization.
+ # 2. For customized conv layers without their own initialization
+ # manners (that is, they don't have their own ``init_weights()``)
+ # and PyTorch's conv layers, they will be initialized by
+ # this method with default ``kaiming_init``.
+ # Note: For PyTorch's conv layers, they will be overwritten by our
+ # initialization implementation using default ``kaiming_init``.
+ if not hasattr(self.conv, 'init_weights'):
+ if self.with_activation and self.act_cfg['type'] == 'LeakyReLU':
+ nonlinearity = 'leaky_relu'
+ a = self.act_cfg.get('negative_slope', 0.01)
+ else:
+ nonlinearity = 'relu'
+ a = 0
+ kaiming_init(self.conv, a=a, nonlinearity=nonlinearity)
+ if self.with_norm:
+ constant_init(self.norm, 1, bias=0)
+
+ def forward(self, x, activate=True, norm=True):
+ for layer in self.order:
+ if layer == 'conv':
+ if self.with_explicit_padding:
+ x = self.padding_layer(x)
+ x = self.conv(x)
+ elif layer == 'norm' and norm and self.with_norm:
+ x = self.norm(x)
+ elif layer == 'act' and activate and self.with_activation:
+ x = self.activate(x)
+ return x
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/conv_ws.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/conv_ws.py
new file mode 100644
index 0000000000000000000000000000000000000000..16a23a79fc9fd468902af922677fe7e0a1d36a51
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/conv_ws.py
@@ -0,0 +1,161 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .registry import CONV_LAYERS
+
+
+def conv_ws_2d(input,
+ weight,
+ bias=None,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ eps=1e-5):
+ c_in = weight.size(0)
+ weight_flat = weight.view(c_in, -1)
+ mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)
+ std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)
+ weight = (weight - mean) / (std + eps)
+ return F.conv2d(input, weight, bias, stride, padding, dilation, groups)
+
+
+@CONV_LAYERS.register_module('ConvWS')
+class ConvWS2d(nn.Conv2d):
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ bias=True,
+ eps=1e-5):
+ super(ConvWS2d, self).__init__(
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ groups=groups,
+ bias=bias)
+ self.eps = eps
+
+ def forward(self, x):
+ return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,
+ self.dilation, self.groups, self.eps)
+
+
+@CONV_LAYERS.register_module(name='ConvAWS')
+class ConvAWS2d(nn.Conv2d):
+ """AWS (Adaptive Weight Standardization)
+
+ This is a variant of Weight Standardization
+ (https://arxiv.org/pdf/1903.10520.pdf)
+ It is used in DetectoRS to avoid NaN
+ (https://arxiv.org/pdf/2006.02334.pdf)
+
+ Args:
+ in_channels (int): Number of channels in the input image
+ out_channels (int): Number of channels produced by the convolution
+ kernel_size (int or tuple): Size of the conv kernel
+ stride (int or tuple, optional): Stride of the convolution. Default: 1
+ padding (int or tuple, optional): Zero-padding added to both sides of
+ the input. Default: 0
+ dilation (int or tuple, optional): Spacing between kernel elements.
+ Default: 1
+ groups (int, optional): Number of blocked connections from input
+ channels to output channels. Default: 1
+ bias (bool, optional): If set True, adds a learnable bias to the
+ output. Default: True
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ groups=1,
+ bias=True):
+ super().__init__(
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ groups=groups,
+ bias=bias)
+ self.register_buffer('weight_gamma',
+ torch.ones(self.out_channels, 1, 1, 1))
+ self.register_buffer('weight_beta',
+ torch.zeros(self.out_channels, 1, 1, 1))
+
+ def _get_weight(self, weight):
+ weight_flat = weight.view(weight.size(0), -1)
+ mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1)
+ std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1)
+ weight = (weight - mean) / std
+ weight = self.weight_gamma * weight + self.weight_beta
+ return weight
+
+ def forward(self, x):
+ weight = self._get_weight(self.weight)
+ return F.conv2d(x, weight, self.bias, self.stride, self.padding,
+ self.dilation, self.groups)
+
+ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+ missing_keys, unexpected_keys, error_msgs):
+ """Override default load function.
+
+ AWS overrides the function _load_from_state_dict to recover
+ weight_gamma and weight_beta if they are missing. If weight_gamma and
+ weight_beta are found in the checkpoint, this function will return
+ after super()._load_from_state_dict. Otherwise, it will compute the
+ mean and std of the pretrained weights and store them in weight_beta
+ and weight_gamma.
+ """
+
+ self.weight_gamma.data.fill_(-1)
+ local_missing_keys = []
+ super()._load_from_state_dict(state_dict, prefix, local_metadata,
+ strict, local_missing_keys,
+ unexpected_keys, error_msgs)
+ if self.weight_gamma.data.mean() > 0:
+ for k in local_missing_keys:
+ missing_keys.append(k)
+ return
+ weight = self.weight.data
+ weight_flat = weight.view(weight.size(0), -1)
+ mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1)
+ std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1)
+ self.weight_beta.data.copy_(mean)
+ self.weight_gamma.data.copy_(std)
+ missing_gamma_beta = [
+ k for k in local_missing_keys
+ if k.endswith('weight_gamma') or k.endswith('weight_beta')
+ ]
+ for k in missing_gamma_beta:
+ local_missing_keys.remove(k)
+ for k in local_missing_keys:
+ missing_keys.append(k)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/depthwise_separable_conv_module.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/depthwise_separable_conv_module.py
new file mode 100644
index 0000000000000000000000000000000000000000..a211ec76cab8a414eb15c29933e891bcfad7bb40
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/depthwise_separable_conv_module.py
@@ -0,0 +1,109 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch.nn as nn
+
+from .conv_module import ConvModule
+
+
+class DepthwiseSeparableConvModule(nn.Module):
+ """Depthwise separable convolution module.
+
+ See https://arxiv.org/pdf/1704.04861.pdf for details.
+
+ This module can replace a ConvModule with the conv block replaced by two
+ conv block: depthwise conv block and pointwise conv block. The depthwise
+ conv block contains depthwise-conv/norm/activation layers. The pointwise
+ conv block contains pointwise-conv/norm/activation layers. It should be
+ noted that there will be norm/activation layer in the depthwise conv block
+ if `norm_cfg` and `act_cfg` are specified.
+
+ Args:
+ in_channels (int): Number of channels in the input feature map.
+ Same as that in ``nn._ConvNd``.
+ out_channels (int): Number of channels produced by the convolution.
+ Same as that in ``nn._ConvNd``.
+ kernel_size (int | tuple[int]): Size of the convolving kernel.
+ Same as that in ``nn._ConvNd``.
+ stride (int | tuple[int]): Stride of the convolution.
+ Same as that in ``nn._ConvNd``. Default: 1.
+ padding (int | tuple[int]): Zero-padding added to both sides of
+ the input. Same as that in ``nn._ConvNd``. Default: 0.
+ dilation (int | tuple[int]): Spacing between kernel elements.
+ Same as that in ``nn._ConvNd``. Default: 1.
+ norm_cfg (dict): Default norm config for both depthwise ConvModule and
+ pointwise ConvModule. Default: None.
+ act_cfg (dict): Default activation config for both depthwise ConvModule
+ and pointwise ConvModule. Default: dict(type='ReLU').
+ dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is
+ 'default', it will be the same as `norm_cfg`. Default: 'default'.
+ dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is
+ 'default', it will be the same as `act_cfg`. Default: 'default'.
+ pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is
+ 'default', it will be the same as `norm_cfg`. Default: 'default'.
+ pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is
+ 'default', it will be the same as `act_cfg`. Default: 'default'.
+ kwargs (optional): Other shared arguments for depthwise and pointwise
+ ConvModule. See ConvModule for ref.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride=1,
+ padding=0,
+ dilation=1,
+ norm_cfg=None,
+ act_cfg=dict(type='ReLU'),
+ dw_norm_cfg='default',
+ dw_act_cfg='default',
+ pw_norm_cfg='default',
+ pw_act_cfg='default',
+ **kwargs):
+ super(DepthwiseSeparableConvModule, self).__init__()
+ assert 'groups' not in kwargs, 'groups should not be specified'
+
+ # if norm/activation config of depthwise/pointwise ConvModule is not
+ # specified, use default config.
+ dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg
+ dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg
+ pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg
+ pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg
+
+ # depthwise convolution
+ self.depthwise_conv = ConvModule(
+ in_channels,
+ in_channels,
+ kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ groups=in_channels,
+ norm_cfg=dw_norm_cfg,
+ act_cfg=dw_act_cfg,
+ **kwargs)
+
+ self.pointwise_conv = ConvModule(
+ in_channels,
+ out_channels,
+ 1,
+ norm_cfg=pw_norm_cfg,
+ act_cfg=pw_act_cfg,
+ **kwargs)
+
+ def forward(self, x):
+ x = self.depthwise_conv(x)
+ x = self.pointwise_conv(x)
+ return x
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/drop.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/drop.py
new file mode 100644
index 0000000000000000000000000000000000000000..67b144a36951829a77f944368029eaaaa8a20860
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/drop.py
@@ -0,0 +1,78 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import torch.nn as nn
+
+from mmcv import build_from_cfg
+from .registry import DROPOUT_LAYERS
+
+
+def drop_path(x, drop_prob=0., training=False):
+ """Drop paths (Stochastic Depth) per sample (when applied in main path of
+ residual blocks).
+
+ We follow the implementation
+ https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501
+ """
+ if drop_prob == 0. or not training:
+ return x
+ keep_prob = 1 - drop_prob
+ # handle tensors with different dimensions, not just 4D tensors.
+ shape = (x.shape[0], ) + (1, ) * (x.ndim - 1)
+ random_tensor = keep_prob + torch.rand(
+ shape, dtype=x.dtype, device=x.device)
+ output = x.div(keep_prob) * random_tensor.floor()
+ return output
+
+
+@DROPOUT_LAYERS.register_module()
+class DropPath(nn.Module):
+ """Drop paths (Stochastic Depth) per sample (when applied in main path of
+ residual blocks).
+
+ We follow the implementation
+ https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501
+
+ Args:
+ drop_prob (float): Probability of the path to be zeroed. Default: 0.1
+ """
+
+ def __init__(self, drop_prob=0.1):
+ super(DropPath, self).__init__()
+ self.drop_prob = drop_prob
+
+ def forward(self, x):
+ return drop_path(x, self.drop_prob, self.training)
+
+
+@DROPOUT_LAYERS.register_module()
+class Dropout(nn.Dropout):
+ """A wrapper for ``torch.nn.Dropout``, We rename the ``p`` of
+ ``torch.nn.Dropout`` to ``drop_prob`` so as to be consistent with
+ ``DropPath``
+
+ Args:
+ drop_prob (float): Probability of the elements to be
+ zeroed. Default: 0.5.
+ inplace (bool): Do the operation inplace or not. Default: False.
+ """
+
+ def __init__(self, drop_prob=0.5, inplace=False):
+ super().__init__(p=drop_prob, inplace=inplace)
+
+
+def build_dropout(cfg, default_args=None):
+ """Builder for drop out layers."""
+ return build_from_cfg(cfg, DROPOUT_LAYERS, default_args)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/generalized_attention.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/generalized_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..b55547b8eec99be7878928ffc473045f87892052
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/generalized_attention.py
@@ -0,0 +1,425 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..utils import kaiming_init
+from .registry import PLUGIN_LAYERS
+
+
+@PLUGIN_LAYERS.register_module()
+class GeneralizedAttention(nn.Module):
+ """GeneralizedAttention module.
+
+ See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks'
+ (https://arxiv.org/abs/1711.07971) for details.
+
+ Args:
+ in_channels (int): Channels of the input feature map.
+ spatial_range (int): The spatial range. -1 indicates no spatial range
+ constraint. Default: -1.
+ num_heads (int): The head number of empirical_attention module.
+ Default: 9.
+ position_embedding_dim (int): The position embedding dimension.
+ Default: -1.
+ position_magnitude (int): A multiplier acting on coord difference.
+ Default: 1.
+ kv_stride (int): The feature stride acting on key/value feature map.
+ Default: 2.
+ q_stride (int): The feature stride acting on query feature map.
+ Default: 1.
+ attention_type (str): A binary indicator string for indicating which
+ items in generalized empirical_attention module are used.
+ Default: '1111'.
+
+ - '1000' indicates 'query and key content' (appr - appr) item,
+ - '0100' indicates 'query content and relative position'
+ (appr - position) item,
+ - '0010' indicates 'key content only' (bias - appr) item,
+ - '0001' indicates 'relative position only' (bias - position) item.
+ """
+
+ _abbr_ = 'gen_attention_block'
+
+ def __init__(self,
+ in_channels,
+ spatial_range=-1,
+ num_heads=9,
+ position_embedding_dim=-1,
+ position_magnitude=1,
+ kv_stride=2,
+ q_stride=1,
+ attention_type='1111'):
+
+ super(GeneralizedAttention, self).__init__()
+
+ # hard range means local range for non-local operation
+ self.position_embedding_dim = (
+ position_embedding_dim
+ if position_embedding_dim > 0 else in_channels)
+
+ self.position_magnitude = position_magnitude
+ self.num_heads = num_heads
+ self.in_channels = in_channels
+ self.spatial_range = spatial_range
+ self.kv_stride = kv_stride
+ self.q_stride = q_stride
+ self.attention_type = [bool(int(_)) for _ in attention_type]
+ self.qk_embed_dim = in_channels // num_heads
+ out_c = self.qk_embed_dim * num_heads
+
+ if self.attention_type[0] or self.attention_type[1]:
+ self.query_conv = nn.Conv2d(
+ in_channels=in_channels,
+ out_channels=out_c,
+ kernel_size=1,
+ bias=False)
+ self.query_conv.kaiming_init = True
+
+ if self.attention_type[0] or self.attention_type[2]:
+ self.key_conv = nn.Conv2d(
+ in_channels=in_channels,
+ out_channels=out_c,
+ kernel_size=1,
+ bias=False)
+ self.key_conv.kaiming_init = True
+
+ self.v_dim = in_channels // num_heads
+ self.value_conv = nn.Conv2d(
+ in_channels=in_channels,
+ out_channels=self.v_dim * num_heads,
+ kernel_size=1,
+ bias=False)
+ self.value_conv.kaiming_init = True
+
+ if self.attention_type[1] or self.attention_type[3]:
+ self.appr_geom_fc_x = nn.Linear(
+ self.position_embedding_dim // 2, out_c, bias=False)
+ self.appr_geom_fc_x.kaiming_init = True
+
+ self.appr_geom_fc_y = nn.Linear(
+ self.position_embedding_dim // 2, out_c, bias=False)
+ self.appr_geom_fc_y.kaiming_init = True
+
+ if self.attention_type[2]:
+ stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
+ appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv
+ self.appr_bias = nn.Parameter(appr_bias_value)
+
+ if self.attention_type[3]:
+ stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
+ geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv
+ self.geom_bias = nn.Parameter(geom_bias_value)
+
+ self.proj_conv = nn.Conv2d(
+ in_channels=self.v_dim * num_heads,
+ out_channels=in_channels,
+ kernel_size=1,
+ bias=True)
+ self.proj_conv.kaiming_init = True
+ self.gamma = nn.Parameter(torch.zeros(1))
+
+ if self.spatial_range >= 0:
+ # only works when non local is after 3*3 conv
+ if in_channels == 256:
+ max_len = 84
+ elif in_channels == 512:
+ max_len = 42
+
+ max_len_kv = int((max_len - 1.0) / self.kv_stride + 1)
+ local_constraint_map = np.ones(
+ (max_len, max_len, max_len_kv, max_len_kv), dtype=int)
+ for iy in range(max_len):
+ for ix in range(max_len):
+ local_constraint_map[
+ iy, ix,
+ max((iy - self.spatial_range) //
+ self.kv_stride, 0):min((iy + self.spatial_range +
+ 1) // self.kv_stride +
+ 1, max_len),
+ max((ix - self.spatial_range) //
+ self.kv_stride, 0):min((ix + self.spatial_range +
+ 1) // self.kv_stride +
+ 1, max_len)] = 0
+
+ self.local_constraint_map = nn.Parameter(
+ torch.from_numpy(local_constraint_map).byte(),
+ requires_grad=False)
+
+ if self.q_stride > 1:
+ self.q_downsample = nn.AvgPool2d(
+ kernel_size=1, stride=self.q_stride)
+ else:
+ self.q_downsample = None
+
+ if self.kv_stride > 1:
+ self.kv_downsample = nn.AvgPool2d(
+ kernel_size=1, stride=self.kv_stride)
+ else:
+ self.kv_downsample = None
+
+ self.init_weights()
+
+ def get_position_embedding(self,
+ h,
+ w,
+ h_kv,
+ w_kv,
+ q_stride,
+ kv_stride,
+ device,
+ dtype,
+ feat_dim,
+ wave_length=1000):
+ # the default type of Tensor is float32, leading to type mismatch
+ # in fp16 mode. Cast it to support fp16 mode.
+ h_idxs = torch.linspace(0, h - 1, h).to(device=device, dtype=dtype)
+ h_idxs = h_idxs.view((h, 1)) * q_stride
+
+ w_idxs = torch.linspace(0, w - 1, w).to(device=device, dtype=dtype)
+ w_idxs = w_idxs.view((w, 1)) * q_stride
+
+ h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to(
+ device=device, dtype=dtype)
+ h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride
+
+ w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to(
+ device=device, dtype=dtype)
+ w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride
+
+ # (h, h_kv, 1)
+ h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0)
+ h_diff *= self.position_magnitude
+
+ # (w, w_kv, 1)
+ w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0)
+ w_diff *= self.position_magnitude
+
+ feat_range = torch.arange(0, feat_dim / 4).to(
+ device=device, dtype=dtype)
+
+ dim_mat = torch.Tensor([wave_length]).to(device=device, dtype=dtype)
+ dim_mat = dim_mat**((4. / feat_dim) * feat_range)
+ dim_mat = dim_mat.view((1, 1, -1))
+
+ embedding_x = torch.cat(
+ ((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2)
+
+ embedding_y = torch.cat(
+ ((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2)
+
+ return embedding_x, embedding_y
+
+ def forward(self, x_input):
+ num_heads = self.num_heads
+
+ # use empirical_attention
+ if self.q_downsample is not None:
+ x_q = self.q_downsample(x_input)
+ else:
+ x_q = x_input
+ n, _, h, w = x_q.shape
+
+ if self.kv_downsample is not None:
+ x_kv = self.kv_downsample(x_input)
+ else:
+ x_kv = x_input
+ _, _, h_kv, w_kv = x_kv.shape
+
+ if self.attention_type[0] or self.attention_type[1]:
+ proj_query = self.query_conv(x_q).view(
+ (n, num_heads, self.qk_embed_dim, h * w))
+ proj_query = proj_query.permute(0, 1, 3, 2)
+
+ if self.attention_type[0] or self.attention_type[2]:
+ proj_key = self.key_conv(x_kv).view(
+ (n, num_heads, self.qk_embed_dim, h_kv * w_kv))
+
+ if self.attention_type[1] or self.attention_type[3]:
+ position_embed_x, position_embed_y = self.get_position_embedding(
+ h, w, h_kv, w_kv, self.q_stride, self.kv_stride,
+ x_input.device, x_input.dtype, self.position_embedding_dim)
+ # (n, num_heads, w, w_kv, dim)
+ position_feat_x = self.appr_geom_fc_x(position_embed_x).\
+ view(1, w, w_kv, num_heads, self.qk_embed_dim).\
+ permute(0, 3, 1, 2, 4).\
+ repeat(n, 1, 1, 1, 1)
+
+ # (n, num_heads, h, h_kv, dim)
+ position_feat_y = self.appr_geom_fc_y(position_embed_y).\
+ view(1, h, h_kv, num_heads, self.qk_embed_dim).\
+ permute(0, 3, 1, 2, 4).\
+ repeat(n, 1, 1, 1, 1)
+
+ position_feat_x /= math.sqrt(2)
+ position_feat_y /= math.sqrt(2)
+
+ # accelerate for saliency only
+ if (np.sum(self.attention_type) == 1) and self.attention_type[2]:
+ appr_bias = self.appr_bias.\
+ view(1, num_heads, 1, self.qk_embed_dim).\
+ repeat(n, 1, 1, 1)
+
+ energy = torch.matmul(appr_bias, proj_key).\
+ view(n, num_heads, 1, h_kv * w_kv)
+
+ h = 1
+ w = 1
+ else:
+ # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for
+ if not self.attention_type[0]:
+ energy = torch.zeros(
+ n,
+ num_heads,
+ h,
+ w,
+ h_kv,
+ w_kv,
+ dtype=x_input.dtype,
+ device=x_input.device)
+
+ # attention_type[0]: appr - appr
+ # attention_type[1]: appr - position
+ # attention_type[2]: bias - appr
+ # attention_type[3]: bias - position
+ if self.attention_type[0] or self.attention_type[2]:
+ if self.attention_type[0] and self.attention_type[2]:
+ appr_bias = self.appr_bias.\
+ view(1, num_heads, 1, self.qk_embed_dim)
+ energy = torch.matmul(proj_query + appr_bias, proj_key).\
+ view(n, num_heads, h, w, h_kv, w_kv)
+
+ elif self.attention_type[0]:
+ energy = torch.matmul(proj_query, proj_key).\
+ view(n, num_heads, h, w, h_kv, w_kv)
+
+ elif self.attention_type[2]:
+ appr_bias = self.appr_bias.\
+ view(1, num_heads, 1, self.qk_embed_dim).\
+ repeat(n, 1, 1, 1)
+
+ energy += torch.matmul(appr_bias, proj_key).\
+ view(n, num_heads, 1, 1, h_kv, w_kv)
+
+ if self.attention_type[1] or self.attention_type[3]:
+ if self.attention_type[1] and self.attention_type[3]:
+ geom_bias = self.geom_bias.\
+ view(1, num_heads, 1, self.qk_embed_dim)
+
+ proj_query_reshape = (proj_query + geom_bias).\
+ view(n, num_heads, h, w, self.qk_embed_dim)
+
+ energy_x = torch.matmul(
+ proj_query_reshape.permute(0, 1, 3, 2, 4),
+ position_feat_x.permute(0, 1, 2, 4, 3))
+ energy_x = energy_x.\
+ permute(0, 1, 3, 2, 4).unsqueeze(4)
+
+ energy_y = torch.matmul(
+ proj_query_reshape,
+ position_feat_y.permute(0, 1, 2, 4, 3))
+ energy_y = energy_y.unsqueeze(5)
+
+ energy += energy_x + energy_y
+
+ elif self.attention_type[1]:
+ proj_query_reshape = proj_query.\
+ view(n, num_heads, h, w, self.qk_embed_dim)
+ proj_query_reshape = proj_query_reshape.\
+ permute(0, 1, 3, 2, 4)
+ position_feat_x_reshape = position_feat_x.\
+ permute(0, 1, 2, 4, 3)
+ position_feat_y_reshape = position_feat_y.\
+ permute(0, 1, 2, 4, 3)
+
+ energy_x = torch.matmul(proj_query_reshape,
+ position_feat_x_reshape)
+ energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4)
+
+ energy_y = torch.matmul(proj_query_reshape,
+ position_feat_y_reshape)
+ energy_y = energy_y.unsqueeze(5)
+
+ energy += energy_x + energy_y
+
+ elif self.attention_type[3]:
+ geom_bias = self.geom_bias.\
+ view(1, num_heads, self.qk_embed_dim, 1).\
+ repeat(n, 1, 1, 1)
+
+ position_feat_x_reshape = position_feat_x.\
+ view(n, num_heads, w*w_kv, self.qk_embed_dim)
+
+ position_feat_y_reshape = position_feat_y.\
+ view(n, num_heads, h * h_kv, self.qk_embed_dim)
+
+ energy_x = torch.matmul(position_feat_x_reshape, geom_bias)
+ energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv)
+
+ energy_y = torch.matmul(position_feat_y_reshape, geom_bias)
+ energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1)
+
+ energy += energy_x + energy_y
+
+ energy = energy.view(n, num_heads, h * w, h_kv * w_kv)
+
+ if self.spatial_range >= 0:
+ cur_local_constraint_map = \
+ self.local_constraint_map[:h, :w, :h_kv, :w_kv].\
+ contiguous().\
+ view(1, 1, h*w, h_kv*w_kv)
+
+ energy = energy.masked_fill_(cur_local_constraint_map,
+ float('-inf'))
+
+ attention = F.softmax(energy, 3)
+
+ proj_value = self.value_conv(x_kv)
+ proj_value_reshape = proj_value.\
+ view((n, num_heads, self.v_dim, h_kv * w_kv)).\
+ permute(0, 1, 3, 2)
+
+ out = torch.matmul(attention, proj_value_reshape).\
+ permute(0, 1, 3, 2).\
+ contiguous().\
+ view(n, self.v_dim * self.num_heads, h, w)
+
+ out = self.proj_conv(out)
+
+ # output is downsampled, upsample back to input size
+ if self.q_downsample is not None:
+ out = F.interpolate(
+ out,
+ size=x_input.shape[2:],
+ mode='bilinear',
+ align_corners=False)
+
+ out = self.gamma * out + x_input
+ return out
+
+ def init_weights(self):
+ for m in self.modules():
+ if hasattr(m, 'kaiming_init') and m.kaiming_init:
+ kaiming_init(
+ m,
+ mode='fan_in',
+ nonlinearity='leaky_relu',
+ bias=0,
+ distribution='uniform',
+ a=1)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/hsigmoid.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/hsigmoid.py
new file mode 100644
index 0000000000000000000000000000000000000000..adc169c039506d50f5d5970b3a52d606f6d91379
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/hsigmoid.py
@@ -0,0 +1,59 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+
+import torch.nn as nn
+
+from .registry import ACTIVATION_LAYERS
+
+
+@ACTIVATION_LAYERS.register_module()
+class HSigmoid(nn.Module):
+ """Hard Sigmoid Module. Apply the hard sigmoid function:
+ Hsigmoid(x) = min(max((x + bias) / divisor, min_value), max_value)
+ Default: Hsigmoid(x) = min(max((x + 3) / 6, 0), 1)
+
+ Note:
+ In MMCV v1.4.4, we modified the default value of args to align with
+ PyTorch official.
+
+ Args:
+ bias (float): Bias of the input feature map. Default: 3.0.
+ divisor (float): Divisor of the input feature map. Default: 6.0.
+ min_value (float): Lower bound value. Default: 0.0.
+ max_value (float): Upper bound value. Default: 1.0.
+
+ Returns:
+ Tensor: The output tensor.
+ """
+
+ def __init__(self, bias=3.0, divisor=6.0, min_value=0.0, max_value=1.0):
+ super(HSigmoid, self).__init__()
+ warnings.warn(
+ 'In MMCV v1.4.4, we modified the default value of args to align '
+ 'with PyTorch official. Previous Implementation: '
+ 'Hsigmoid(x) = min(max((x + 1) / 2, 0), 1). '
+ 'Current Implementation: '
+ 'Hsigmoid(x) = min(max((x + 3) / 6, 0), 1).')
+ self.bias = bias
+ self.divisor = divisor
+ assert self.divisor != 0
+ self.min_value = min_value
+ self.max_value = max_value
+
+ def forward(self, x):
+ x = (x + self.bias) / self.divisor
+
+ return x.clamp_(self.min_value, self.max_value)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/hswish.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/hswish.py
new file mode 100644
index 0000000000000000000000000000000000000000..399abc65dec7792dd0112a4430e5c17fb6e43b4c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/hswish.py
@@ -0,0 +1,42 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch.nn as nn
+
+from .registry import ACTIVATION_LAYERS
+
+
+@ACTIVATION_LAYERS.register_module()
+class HSwish(nn.Module):
+ """Hard Swish Module.
+
+ This module applies the hard swish function:
+
+ .. math::
+ Hswish(x) = x * ReLU6(x + 3) / 6
+
+ Args:
+ inplace (bool): can optionally do the operation in-place.
+ Default: False.
+
+ Returns:
+ Tensor: The output tensor.
+ """
+
+ def __init__(self, inplace=False):
+ super(HSwish, self).__init__()
+ self.act = nn.ReLU6(inplace)
+
+ def forward(self, x):
+ return x * self.act(x + 3) / 6
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/non_local.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/non_local.py
new file mode 100644
index 0000000000000000000000000000000000000000..2372866116dd9e3321976e73277cc68e4c7d0217
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/non_local.py
@@ -0,0 +1,319 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABCMeta
+
+import torch
+import torch.nn as nn
+
+from ..utils import constant_init, normal_init
+from .conv_module import ConvModule
+from .registry import PLUGIN_LAYERS
+
+
+class _NonLocalNd(nn.Module, metaclass=ABCMeta):
+ """Basic Non-local module.
+
+ This module is proposed in
+ "Non-local Neural Networks"
+ Paper reference: https://arxiv.org/abs/1711.07971
+ Code reference: https://github.com/AlexHex7/Non-local_pytorch
+
+ Args:
+ in_channels (int): Channels of the input feature map.
+ reduction (int): Channel reduction ratio. Default: 2.
+ use_scale (bool): Whether to scale pairwise_weight by
+ `1/sqrt(inter_channels)` when the mode is `embedded_gaussian`.
+ Default: True.
+ conv_cfg (None | dict): The config dict for convolution layers.
+ If not specified, it will use `nn.Conv2d` for convolution layers.
+ Default: None.
+ norm_cfg (None | dict): The config dict for normalization layers.
+ Default: None. (This parameter is only applicable to conv_out.)
+ mode (str): Options are `gaussian`, `concatenation`,
+ `embedded_gaussian` and `dot_product`. Default: embedded_gaussian.
+ """
+
+ def __init__(self,
+ in_channels,
+ reduction=2,
+ use_scale=True,
+ conv_cfg=None,
+ norm_cfg=None,
+ mode='embedded_gaussian',
+ **kwargs):
+ super(_NonLocalNd, self).__init__()
+ self.in_channels = in_channels
+ self.reduction = reduction
+ self.use_scale = use_scale
+ self.inter_channels = max(in_channels // reduction, 1)
+ self.mode = mode
+
+ if mode not in [
+ 'gaussian', 'embedded_gaussian', 'dot_product', 'concatenation'
+ ]:
+ raise ValueError("Mode should be in 'gaussian', 'concatenation', "
+ f"'embedded_gaussian' or 'dot_product', but got "
+ f'{mode} instead.')
+
+ # g, theta, phi are defaulted as `nn.ConvNd`.
+ # Here we use ConvModule for potential usage.
+ self.g = ConvModule(
+ self.in_channels,
+ self.inter_channels,
+ kernel_size=1,
+ conv_cfg=conv_cfg,
+ act_cfg=None)
+ self.conv_out = ConvModule(
+ self.inter_channels,
+ self.in_channels,
+ kernel_size=1,
+ conv_cfg=conv_cfg,
+ norm_cfg=norm_cfg,
+ act_cfg=None)
+
+ if self.mode != 'gaussian':
+ self.theta = ConvModule(
+ self.in_channels,
+ self.inter_channels,
+ kernel_size=1,
+ conv_cfg=conv_cfg,
+ act_cfg=None)
+ self.phi = ConvModule(
+ self.in_channels,
+ self.inter_channels,
+ kernel_size=1,
+ conv_cfg=conv_cfg,
+ act_cfg=None)
+
+ if self.mode == 'concatenation':
+ self.concat_project = ConvModule(
+ self.inter_channels * 2,
+ 1,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ bias=False,
+ act_cfg=dict(type='ReLU'))
+
+ self.init_weights(**kwargs)
+
+ def init_weights(self, std=0.01, zeros_init=True):
+ if self.mode != 'gaussian':
+ for m in [self.g, self.theta, self.phi]:
+ normal_init(m.conv, std=std)
+ else:
+ normal_init(self.g.conv, std=std)
+ if zeros_init:
+ if self.conv_out.norm_cfg is None:
+ constant_init(self.conv_out.conv, 0)
+ else:
+ constant_init(self.conv_out.norm, 0)
+ else:
+ if self.conv_out.norm_cfg is None:
+ normal_init(self.conv_out.conv, std=std)
+ else:
+ normal_init(self.conv_out.norm, std=std)
+
+ def gaussian(self, theta_x, phi_x):
+ # NonLocal1d pairwise_weight: [N, H, H]
+ # NonLocal2d pairwise_weight: [N, HxW, HxW]
+ # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+ pairwise_weight = torch.matmul(theta_x, phi_x)
+ pairwise_weight = pairwise_weight.softmax(dim=-1)
+ return pairwise_weight
+
+ def embedded_gaussian(self, theta_x, phi_x):
+ # NonLocal1d pairwise_weight: [N, H, H]
+ # NonLocal2d pairwise_weight: [N, HxW, HxW]
+ # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+ pairwise_weight = torch.matmul(theta_x, phi_x)
+ if self.use_scale:
+ # theta_x.shape[-1] is `self.inter_channels`
+ pairwise_weight /= theta_x.shape[-1]**0.5
+ pairwise_weight = pairwise_weight.softmax(dim=-1)
+ return pairwise_weight
+
+ def dot_product(self, theta_x, phi_x):
+ # NonLocal1d pairwise_weight: [N, H, H]
+ # NonLocal2d pairwise_weight: [N, HxW, HxW]
+ # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+ pairwise_weight = torch.matmul(theta_x, phi_x)
+ pairwise_weight /= pairwise_weight.shape[-1]
+ return pairwise_weight
+
+ def concatenation(self, theta_x, phi_x):
+ # NonLocal1d pairwise_weight: [N, H, H]
+ # NonLocal2d pairwise_weight: [N, HxW, HxW]
+ # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+ h = theta_x.size(2)
+ w = phi_x.size(3)
+ theta_x = theta_x.repeat(1, 1, 1, w)
+ phi_x = phi_x.repeat(1, 1, h, 1)
+
+ concat_feature = torch.cat([theta_x, phi_x], dim=1)
+ pairwise_weight = self.concat_project(concat_feature)
+ n, _, h, w = pairwise_weight.size()
+ pairwise_weight = pairwise_weight.view(n, h, w)
+ pairwise_weight /= pairwise_weight.shape[-1]
+
+ return pairwise_weight
+
+ def forward(self, x):
+ # Assume `reduction = 1`, then `inter_channels = C`
+ # or `inter_channels = C` when `mode="gaussian"`
+
+ # NonLocal1d x: [N, C, H]
+ # NonLocal2d x: [N, C, H, W]
+ # NonLocal3d x: [N, C, T, H, W]
+ n = x.size(0)
+
+ # NonLocal1d g_x: [N, H, C]
+ # NonLocal2d g_x: [N, HxW, C]
+ # NonLocal3d g_x: [N, TxHxW, C]
+ g_x = self.g(x).view(n, self.inter_channels, -1)
+ g_x = g_x.permute(0, 2, 1)
+
+ # NonLocal1d theta_x: [N, H, C], phi_x: [N, C, H]
+ # NonLocal2d theta_x: [N, HxW, C], phi_x: [N, C, HxW]
+ # NonLocal3d theta_x: [N, TxHxW, C], phi_x: [N, C, TxHxW]
+ if self.mode == 'gaussian':
+ theta_x = x.view(n, self.in_channels, -1)
+ theta_x = theta_x.permute(0, 2, 1)
+ if self.sub_sample:
+ phi_x = self.phi(x).view(n, self.in_channels, -1)
+ else:
+ phi_x = x.view(n, self.in_channels, -1)
+ elif self.mode == 'concatenation':
+ theta_x = self.theta(x).view(n, self.inter_channels, -1, 1)
+ phi_x = self.phi(x).view(n, self.inter_channels, 1, -1)
+ else:
+ theta_x = self.theta(x).view(n, self.inter_channels, -1)
+ theta_x = theta_x.permute(0, 2, 1)
+ phi_x = self.phi(x).view(n, self.inter_channels, -1)
+
+ pairwise_func = getattr(self, self.mode)
+ # NonLocal1d pairwise_weight: [N, H, H]
+ # NonLocal2d pairwise_weight: [N, HxW, HxW]
+ # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
+ pairwise_weight = pairwise_func(theta_x, phi_x)
+
+ # NonLocal1d y: [N, H, C]
+ # NonLocal2d y: [N, HxW, C]
+ # NonLocal3d y: [N, TxHxW, C]
+ y = torch.matmul(pairwise_weight, g_x)
+ # NonLocal1d y: [N, C, H]
+ # NonLocal2d y: [N, C, H, W]
+ # NonLocal3d y: [N, C, T, H, W]
+ y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels,
+ *x.size()[2:])
+
+ output = x + self.conv_out(y)
+
+ return output
+
+
+class NonLocal1d(_NonLocalNd):
+ """1D Non-local module.
+
+ Args:
+ in_channels (int): Same as `NonLocalND`.
+ sub_sample (bool): Whether to apply max pooling after pairwise
+ function (Note that the `sub_sample` is applied on spatial only).
+ Default: False.
+ conv_cfg (None | dict): Same as `NonLocalND`.
+ Default: dict(type='Conv1d').
+ """
+
+ def __init__(self,
+ in_channels,
+ sub_sample=False,
+ conv_cfg=dict(type='Conv1d'),
+ **kwargs):
+ super(NonLocal1d, self).__init__(
+ in_channels, conv_cfg=conv_cfg, **kwargs)
+
+ self.sub_sample = sub_sample
+
+ if sub_sample:
+ max_pool_layer = nn.MaxPool1d(kernel_size=2)
+ self.g = nn.Sequential(self.g, max_pool_layer)
+ if self.mode != 'gaussian':
+ self.phi = nn.Sequential(self.phi, max_pool_layer)
+ else:
+ self.phi = max_pool_layer
+
+
+@PLUGIN_LAYERS.register_module()
+class NonLocal2d(_NonLocalNd):
+ """2D Non-local module.
+
+ Args:
+ in_channels (int): Same as `NonLocalND`.
+ sub_sample (bool): Whether to apply max pooling after pairwise
+ function (Note that the `sub_sample` is applied on spatial only).
+ Default: False.
+ conv_cfg (None | dict): Same as `NonLocalND`.
+ Default: dict(type='Conv2d').
+ """
+
+ _abbr_ = 'nonlocal_block'
+
+ def __init__(self,
+ in_channels,
+ sub_sample=False,
+ conv_cfg=dict(type='Conv2d'),
+ **kwargs):
+ super(NonLocal2d, self).__init__(
+ in_channels, conv_cfg=conv_cfg, **kwargs)
+
+ self.sub_sample = sub_sample
+
+ if sub_sample:
+ max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
+ self.g = nn.Sequential(self.g, max_pool_layer)
+ if self.mode != 'gaussian':
+ self.phi = nn.Sequential(self.phi, max_pool_layer)
+ else:
+ self.phi = max_pool_layer
+
+
+class NonLocal3d(_NonLocalNd):
+ """3D Non-local module.
+
+ Args:
+ in_channels (int): Same as `NonLocalND`.
+ sub_sample (bool): Whether to apply max pooling after pairwise
+ function (Note that the `sub_sample` is applied on spatial only).
+ Default: False.
+ conv_cfg (None | dict): Same as `NonLocalND`.
+ Default: dict(type='Conv3d').
+ """
+
+ def __init__(self,
+ in_channels,
+ sub_sample=False,
+ conv_cfg=dict(type='Conv3d'),
+ **kwargs):
+ super(NonLocal3d, self).__init__(
+ in_channels, conv_cfg=conv_cfg, **kwargs)
+ self.sub_sample = sub_sample
+
+ if sub_sample:
+ max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
+ self.g = nn.Sequential(self.g, max_pool_layer)
+ if self.mode != 'gaussian':
+ self.phi = nn.Sequential(self.phi, max_pool_layer)
+ else:
+ self.phi = max_pool_layer
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/norm.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a11604c3f0f4317dddd68ed3abc0af15f18190d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/norm.py
@@ -0,0 +1,157 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+
+import torch.nn as nn
+
+from mmcv.utils import is_tuple_of
+from mmcv.utils.parrots_wrapper import SyncBatchNorm, _BatchNorm, _InstanceNorm
+from .registry import NORM_LAYERS
+
+NORM_LAYERS.register_module('BN', module=nn.BatchNorm2d)
+NORM_LAYERS.register_module('BN1d', module=nn.BatchNorm1d)
+NORM_LAYERS.register_module('BN2d', module=nn.BatchNorm2d)
+NORM_LAYERS.register_module('BN3d', module=nn.BatchNorm3d)
+NORM_LAYERS.register_module('SyncBN', module=SyncBatchNorm)
+NORM_LAYERS.register_module('GN', module=nn.GroupNorm)
+NORM_LAYERS.register_module('LN', module=nn.LayerNorm)
+NORM_LAYERS.register_module('IN', module=nn.InstanceNorm2d)
+NORM_LAYERS.register_module('IN1d', module=nn.InstanceNorm1d)
+NORM_LAYERS.register_module('IN2d', module=nn.InstanceNorm2d)
+NORM_LAYERS.register_module('IN3d', module=nn.InstanceNorm3d)
+
+
+def infer_abbr(class_type):
+ """Infer abbreviation from the class name.
+
+ When we build a norm layer with `build_norm_layer()`, we want to preserve
+ the norm type in variable names, e.g, self.bn1, self.gn. This method will
+ infer the abbreviation to map class types to abbreviations.
+
+ Rule 1: If the class has the property "_abbr_", return the property.
+ Rule 2: If the parent class is _BatchNorm, GroupNorm, LayerNorm or
+ InstanceNorm, the abbreviation of this layer will be "bn", "gn", "ln" and
+ "in" respectively.
+ Rule 3: If the class name contains "batch", "group", "layer" or "instance",
+ the abbreviation of this layer will be "bn", "gn", "ln" and "in"
+ respectively.
+ Rule 4: Otherwise, the abbreviation falls back to "norm".
+
+ Args:
+ class_type (type): The norm layer type.
+
+ Returns:
+ str: The inferred abbreviation.
+ """
+ if not inspect.isclass(class_type):
+ raise TypeError(
+ f'class_type must be a type, but got {type(class_type)}')
+ if hasattr(class_type, '_abbr_'):
+ return class_type._abbr_
+ if issubclass(class_type, _InstanceNorm): # IN is a subclass of BN
+ return 'in'
+ elif issubclass(class_type, _BatchNorm):
+ return 'bn'
+ elif issubclass(class_type, nn.GroupNorm):
+ return 'gn'
+ elif issubclass(class_type, nn.LayerNorm):
+ return 'ln'
+ else:
+ class_name = class_type.__name__.lower()
+ if 'batch' in class_name:
+ return 'bn'
+ elif 'group' in class_name:
+ return 'gn'
+ elif 'layer' in class_name:
+ return 'ln'
+ elif 'instance' in class_name:
+ return 'in'
+ else:
+ return 'norm_layer'
+
+
+def build_norm_layer(cfg, num_features, postfix=''):
+ """Build normalization layer.
+
+ Args:
+ cfg (dict): The norm layer config, which should contain:
+
+ - type (str): Layer type.
+ - layer args: Args needed to instantiate a norm layer.
+ - requires_grad (bool, optional): Whether stop gradient updates.
+ num_features (int): Number of input channels.
+ postfix (int | str): The postfix to be appended into norm abbreviation
+ to create named layer.
+
+ Returns:
+ tuple[str, nn.Module]: The first element is the layer name consisting
+ of abbreviation and postfix, e.g., bn1, gn. The second element is the
+ created norm layer.
+ """
+ if not isinstance(cfg, dict):
+ raise TypeError('cfg must be a dict')
+ if 'type' not in cfg:
+ raise KeyError('the cfg dict must contain the key "type"')
+ cfg_ = cfg.copy()
+
+ layer_type = cfg_.pop('type')
+ if layer_type not in NORM_LAYERS:
+ raise KeyError(f'Unrecognized norm type {layer_type}')
+
+ norm_layer = NORM_LAYERS.get(layer_type)
+ abbr = infer_abbr(norm_layer)
+
+ assert isinstance(postfix, (int, str))
+ name = abbr + str(postfix)
+
+ requires_grad = cfg_.pop('requires_grad', True)
+ cfg_.setdefault('eps', 1e-5)
+ if layer_type != 'GN':
+ layer = norm_layer(num_features, **cfg_)
+ if layer_type == 'SyncBN' and hasattr(layer, '_specify_ddp_gpu_num'):
+ layer._specify_ddp_gpu_num(1)
+ else:
+ assert 'num_groups' in cfg_
+ layer = norm_layer(num_channels=num_features, **cfg_)
+
+ for param in layer.parameters():
+ param.requires_grad = requires_grad
+
+ return name, layer
+
+
+def is_norm(layer, exclude=None):
+ """Check if a layer is a normalization layer.
+
+ Args:
+ layer (nn.Module): The layer to be checked.
+ exclude (type | tuple[type]): Types to be excluded.
+
+ Returns:
+ bool: Whether the layer is a norm layer.
+ """
+ if exclude is not None:
+ if not isinstance(exclude, tuple):
+ exclude = (exclude, )
+ if not is_tuple_of(exclude, type):
+ raise TypeError(
+ f'"exclude" must be either None or type or a tuple of types, '
+ f'but got {type(exclude)}: {exclude}')
+
+ if exclude and isinstance(layer, exclude):
+ return False
+
+ all_norm_bases = (_BatchNorm, _InstanceNorm, nn.GroupNorm, nn.LayerNorm)
+ return isinstance(layer, all_norm_bases)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/padding.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/padding.py
new file mode 100644
index 0000000000000000000000000000000000000000..72a844d78b25afbce3cd0b66cde152037e1585bd
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/padding.py
@@ -0,0 +1,49 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch.nn as nn
+
+from .registry import PADDING_LAYERS
+
+PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d)
+PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d)
+PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d)
+
+
+def build_padding_layer(cfg, *args, **kwargs):
+ """Build padding layer.
+
+ Args:
+ cfg (None or dict): The padding layer config, which should contain:
+ - type (str): Layer type.
+ - layer args: Args needed to instantiate a padding layer.
+
+ Returns:
+ nn.Module: Created padding layer.
+ """
+ if not isinstance(cfg, dict):
+ raise TypeError('cfg must be a dict')
+ if 'type' not in cfg:
+ raise KeyError('the cfg dict must contain the key "type"')
+
+ cfg_ = cfg.copy()
+ padding_type = cfg_.pop('type')
+ if padding_type not in PADDING_LAYERS:
+ raise KeyError(f'Unrecognized padding type {padding_type}.')
+ else:
+ padding_layer = PADDING_LAYERS.get(padding_type)
+
+ layer = padding_layer(*args, **kwargs, **cfg_)
+
+ return layer
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/plugin.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..26085fd6e83bf5d40d496f21a4eb98dc6e0129c1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/plugin.py
@@ -0,0 +1,102 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+import platform
+
+from .registry import PLUGIN_LAYERS
+
+if platform.system() == 'Windows':
+ import regex as re
+else:
+ import re
+
+
+def infer_abbr(class_type):
+ """Infer abbreviation from the class name.
+
+ This method will infer the abbreviation to map class types to
+ abbreviations.
+
+ Rule 1: If the class has the property "abbr", return the property.
+ Rule 2: Otherwise, the abbreviation falls back to snake case of class
+ name, e.g. the abbreviation of ``FancyBlock`` will be ``fancy_block``.
+
+ Args:
+ class_type (type): The norm layer type.
+
+ Returns:
+ str: The inferred abbreviation.
+ """
+
+ def camel2snack(word):
+ """Convert camel case word into snack case.
+
+ Modified from `inflection lib
+ `_.
+
+ Example::
+
+ >>> camel2snack("FancyBlock")
+ 'fancy_block'
+ """
+
+ word = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', word)
+ word = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', word)
+ word = word.replace('-', '_')
+ return word.lower()
+
+ if not inspect.isclass(class_type):
+ raise TypeError(
+ f'class_type must be a type, but got {type(class_type)}')
+ if hasattr(class_type, '_abbr_'):
+ return class_type._abbr_
+ else:
+ return camel2snack(class_type.__name__)
+
+
+def build_plugin_layer(cfg, postfix='', **kwargs):
+ """Build plugin layer.
+
+ Args:
+ cfg (None or dict): cfg should contain:
+
+ - type (str): identify plugin layer type.
+ - layer args: args needed to instantiate a plugin layer.
+ postfix (int, str): appended into norm abbreviation to
+ create named layer. Default: ''.
+
+ Returns:
+ tuple[str, nn.Module]: The first one is the concatenation of
+ abbreviation and postfix. The second is the created plugin layer.
+ """
+ if not isinstance(cfg, dict):
+ raise TypeError('cfg must be a dict')
+ if 'type' not in cfg:
+ raise KeyError('the cfg dict must contain the key "type"')
+ cfg_ = cfg.copy()
+
+ layer_type = cfg_.pop('type')
+ if layer_type not in PLUGIN_LAYERS:
+ raise KeyError(f'Unrecognized plugin type {layer_type}')
+
+ plugin_layer = PLUGIN_LAYERS.get(layer_type)
+ abbr = infer_abbr(plugin_layer)
+
+ assert isinstance(postfix, (int, str))
+ name = abbr + str(postfix)
+
+ layer = plugin_layer(**kwargs, **cfg_)
+
+ return name, layer
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/registry.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea0d81266a5513eb37c5cd3dceda40efa1437132
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/registry.py
@@ -0,0 +1,29 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mmcv.utils import Registry
+
+CONV_LAYERS = Registry('conv layer')
+NORM_LAYERS = Registry('norm layer')
+ACTIVATION_LAYERS = Registry('activation layer')
+PADDING_LAYERS = Registry('padding layer')
+UPSAMPLE_LAYERS = Registry('upsample layer')
+PLUGIN_LAYERS = Registry('plugin layer')
+
+DROPOUT_LAYERS = Registry('drop out layers')
+POSITIONAL_ENCODING = Registry('position encoding')
+ATTENTION = Registry('attention')
+FEEDFORWARD_NETWORK = Registry('feed-forward Network')
+TRANSFORMER_LAYER = Registry('transformerLayer')
+TRANSFORMER_LAYER_SEQUENCE = Registry('transformer-layers sequence')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/scale.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/scale.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc237a901454435479bb1ec53707b1b772790128
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/scale.py
@@ -0,0 +1,34 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import torch.nn as nn
+
+
+class Scale(nn.Module):
+ """A learnable scale parameter.
+
+ This layer scales the input by a learnable factor. It multiplies a
+ learnable scale parameter of shape (1,) with input of any shape.
+
+ Args:
+ scale (float): Initial value of scale factor. Default: 1.0
+ """
+
+ def __init__(self, scale=1.0):
+ super(Scale, self).__init__()
+ self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float))
+
+ def forward(self, x):
+ return x * self.scale
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/swish.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/swish.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4f0cff2f830582a65a2a03880ce9a3cc950b4b0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/swish.py
@@ -0,0 +1,38 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import torch.nn as nn
+
+from .registry import ACTIVATION_LAYERS
+
+
+@ACTIVATION_LAYERS.register_module()
+class Swish(nn.Module):
+ """Swish Module.
+
+ This module applies the swish function:
+
+ .. math::
+ Swish(x) = x * Sigmoid(x)
+
+ Returns:
+ Tensor: The output tensor.
+ """
+
+ def __init__(self):
+ super(Swish, self).__init__()
+
+ def forward(self, x):
+ return x * torch.sigmoid(x)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/transformer.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9d8b904edfbb0083c9ceff2eff00a8357328c5b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/transformer.py
@@ -0,0 +1,956 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+import math
+import warnings
+from typing import Sequence
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from mmcv.cnn import (Linear, build_activation_layer, build_conv_layer,
+ build_norm_layer)
+from mmcv.runner.base_module import BaseModule, ModuleList, Sequential
+from mmcv.utils import (ConfigDict, build_from_cfg, deprecated_api_warning,
+ to_2tuple)
+from .drop import build_dropout
+from .registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING,
+ TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE)
+
+# Avoid BC-breaking of importing MultiScaleDeformableAttention from this file
+try:
+ from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention # noqa F401
+ warnings.warn(
+ ImportWarning(
+ '``MultiScaleDeformableAttention`` has been moved to '
+ '``mmcv.ops.multi_scale_deform_attn``, please change original path ' # noqa E501
+ '``from mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` ' # noqa E501
+ 'to ``from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` ' # noqa E501
+ ))
+
+except ImportError:
+ warnings.warn('Fail to import ``MultiScaleDeformableAttention`` from '
+ '``mmcv.ops.multi_scale_deform_attn``, '
+ 'You should install ``mmcv-full`` if you need this module. ')
+
+
+def build_positional_encoding(cfg, default_args=None):
+ """Builder for Position Encoding."""
+ return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args)
+
+
+def build_attention(cfg, default_args=None):
+ """Builder for attention."""
+ return build_from_cfg(cfg, ATTENTION, default_args)
+
+
+def build_feedforward_network(cfg, default_args=None):
+ """Builder for feed-forward network (FFN)."""
+ return build_from_cfg(cfg, FEEDFORWARD_NETWORK, default_args)
+
+
+def build_transformer_layer(cfg, default_args=None):
+ """Builder for transformer layer."""
+ return build_from_cfg(cfg, TRANSFORMER_LAYER, default_args)
+
+
+def build_transformer_layer_sequence(cfg, default_args=None):
+ """Builder for transformer encoder and transformer decoder."""
+ return build_from_cfg(cfg, TRANSFORMER_LAYER_SEQUENCE, default_args)
+
+
+class AdaptivePadding(nn.Module):
+ """Applies padding adaptively to the input.
+
+ This module can make input get fully covered by filter
+ you specified. It support two modes "same" and "corner". The
+ "same" mode is same with "SAME" padding mode in TensorFlow, pad
+ zero around input. The "corner" mode would pad zero
+ to bottom right.
+
+ Args:
+ kernel_size (int | tuple): Size of the kernel. Default: 1.
+ stride (int | tuple): Stride of the filter. Default: 1.
+ dilation (int | tuple): Spacing between kernel elements.
+ Default: 1.
+ padding (str): Support "same" and "corner", "corner" mode
+ would pad zero to bottom right, and "same" mode would
+ pad zero around input. Default: "corner".
+
+ Example:
+ >>> kernel_size = 16
+ >>> stride = 16
+ >>> dilation = 1
+ >>> input = torch.rand(1, 1, 15, 17)
+ >>> adap_pad = AdaptivePadding(
+ >>> kernel_size=kernel_size,
+ >>> stride=stride,
+ >>> dilation=dilation,
+ >>> padding="corner")
+ >>> out = adap_pad(input)
+ >>> assert (out.shape[2], out.shape[3]) == (16, 32)
+ >>> input = torch.rand(1, 1, 16, 17)
+ >>> out = adap_pad(input)
+ >>> assert (out.shape[2], out.shape[3]) == (16, 32)
+ """
+
+ def __init__(self, kernel_size=1, stride=1, dilation=1, padding='corner'):
+ super(AdaptivePadding, self).__init__()
+ assert padding in ('same', 'corner')
+
+ kernel_size = to_2tuple(kernel_size)
+ stride = to_2tuple(stride)
+ dilation = to_2tuple(dilation)
+
+ self.padding = padding
+ self.kernel_size = kernel_size
+ self.stride = stride
+ self.dilation = dilation
+
+ def get_pad_shape(self, input_shape):
+ """Calculate the padding size of input.
+
+ Args:
+ input_shape (:obj:`torch.Size`): arrange as (H, W).
+
+ Returns:
+ Tuple[int]: The padding size along the
+ original H and W directions
+ """
+ input_h, input_w = input_shape
+ kernel_h, kernel_w = self.kernel_size
+ stride_h, stride_w = self.stride
+ output_h = math.ceil(input_h / stride_h)
+ output_w = math.ceil(input_w / stride_w)
+ pad_h = max((output_h - 1) * stride_h +
+ (kernel_h - 1) * self.dilation[0] + 1 - input_h, 0)
+ pad_w = max((output_w - 1) * stride_w +
+ (kernel_w - 1) * self.dilation[1] + 1 - input_w, 0)
+ return pad_h, pad_w
+
+ def forward(self, x):
+ """Add padding to `x`
+
+ Args:
+ x (Tensor): Input tensor has shape (B, C, H, W).
+
+ Returns:
+ Tensor: The tensor with adaptive padding
+ """
+ pad_h, pad_w = self.get_pad_shape(x.size()[-2:])
+ if pad_h > 0 or pad_w > 0:
+ if self.padding == 'corner':
+ x = F.pad(x, [0, pad_w, 0, pad_h])
+ elif self.padding == 'same':
+ x = F.pad(x, [
+ pad_w // 2, pad_w - pad_w // 2, pad_h // 2,
+ pad_h - pad_h // 2
+ ])
+ return x
+
+
+class PatchEmbed(BaseModule):
+ """Image to Patch Embedding.
+
+ We use a conv layer to implement PatchEmbed.
+
+ Args:
+ in_channels (int): The num of input channels. Default: 3
+ embed_dims (int): The dimensions of embedding. Default: 768
+ conv_type (str): The type of convolution
+ to generate patch embedding. Default: "Conv2d".
+ kernel_size (int): The kernel_size of embedding conv. Default: 16.
+ stride (int): The slide stride of embedding conv.
+ Default: 16.
+ padding (int | tuple | string): The padding length of
+ embedding conv. When it is a string, it means the mode
+ of adaptive padding, support "same" and "corner" now.
+ Default: "corner".
+ dilation (int): The dilation rate of embedding conv. Default: 1.
+ bias (bool): Bias of embed conv. Default: True.
+ norm_cfg (dict, optional): Config dict for normalization layer.
+ Default: None.
+ input_size (int | tuple | None): The size of input, which will be
+ used to calculate the out size. Only works when `dynamic_size`
+ is False. Default: None.
+ init_cfg (`mmcv.ConfigDict`, optional): The Config for initialization.
+ Default: None.
+ """
+
+ def __init__(self,
+ in_channels=3,
+ embed_dims=768,
+ conv_type='Conv2d',
+ kernel_size=16,
+ stride=16,
+ padding='corner',
+ dilation=1,
+ bias=True,
+ norm_cfg=None,
+ input_size=None,
+ init_cfg=None):
+ super(PatchEmbed, self).__init__(init_cfg=init_cfg)
+
+ self.embed_dims = embed_dims
+ if stride is None:
+ stride = kernel_size
+
+ kernel_size = to_2tuple(kernel_size)
+ stride = to_2tuple(stride)
+ dilation = to_2tuple(dilation)
+
+ if isinstance(padding, str):
+ self.adaptive_padding = AdaptivePadding(
+ kernel_size=kernel_size,
+ stride=stride,
+ dilation=dilation,
+ padding=padding)
+ # disable the padding of conv
+ padding = 0
+ else:
+ self.adaptive_padding = None
+ padding = to_2tuple(padding)
+
+ self.projection = build_conv_layer(
+ dict(type=conv_type),
+ in_channels=in_channels,
+ out_channels=embed_dims,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ bias=bias)
+
+ if norm_cfg is not None:
+ self.norm = build_norm_layer(norm_cfg, embed_dims)[1]
+ else:
+ self.norm = None
+
+ if input_size:
+ input_size = to_2tuple(input_size)
+ # `init_out_size` would be used outside to
+ # calculate the num_patches
+ # e.g. when `use_abs_pos_embed` outside
+ self.init_input_size = input_size
+ if self.adaptive_padding:
+ pad_h, pad_w = self.adaptive_padding.get_pad_shape(input_size)
+ input_h, input_w = input_size
+ input_h = input_h + pad_h
+ input_w = input_w + pad_w
+ input_size = (input_h, input_w)
+
+ # https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
+ h_out = (input_size[0] + 2 * padding[0] - dilation[0] *
+ (kernel_size[0] - 1) - 1) // stride[0] + 1
+ w_out = (input_size[1] + 2 * padding[1] - dilation[1] *
+ (kernel_size[1] - 1) - 1) // stride[1] + 1
+ self.init_out_size = (h_out, w_out)
+ else:
+ self.init_input_size = None
+ self.init_out_size = None
+
+ def forward(self, x):
+ """
+ Args:
+ x (Tensor): Has shape (B, C, H, W). In most case, C is 3.
+
+ Returns:
+ tuple: Contains merged results and its spatial shape.
+
+ - x (Tensor): Has shape (B, out_h * out_w, embed_dims)
+ - out_size (tuple[int]): Spatial shape of x, arrange as
+ (out_h, out_w).
+ """
+
+ if self.adaptive_padding:
+ x = self.adaptive_padding(x)
+
+ x = self.projection(x)
+ out_size = (x.shape[2], x.shape[3])
+ x = x.flatten(2).transpose(1, 2)
+ if self.norm is not None:
+ x = self.norm(x)
+ return x, out_size
+
+
+class PatchMerging(BaseModule):
+ """Merge patch feature map.
+
+ This layer groups feature map by kernel_size, and applies norm and linear
+ layers to the grouped feature map ((used in Swin Transformer)).
+ Our implementation uses `nn.Unfold` to
+ merge patches, which is about 25% faster than the original
+ implementation. However, we need to modify pretrained
+ models for compatibility.
+
+ Args:
+ in_channels (int): The num of input channels.
+ to gets fully covered by filter and stride you specified.
+ out_channels (int): The num of output channels.
+ kernel_size (int | tuple, optional): the kernel size in the unfold
+ layer. Defaults to 2.
+ stride (int | tuple, optional): the stride of the sliding blocks in the
+ unfold layer. Default: None. (Would be set as `kernel_size`)
+ padding (int | tuple | string ): The padding length of
+ embedding conv. When it is a string, it means the mode
+ of adaptive padding, support "same" and "corner" now.
+ Default: "corner".
+ dilation (int | tuple, optional): dilation parameter in the unfold
+ layer. Default: 1.
+ bias (bool, optional): Whether to add bias in linear layer or not.
+ Defaults: False.
+ norm_cfg (dict, optional): Config dict for normalization layer.
+ Default: dict(type='LN').
+ init_cfg (dict, optional): The extra config for initialization.
+ Default: None.
+ """
+
+ def __init__(self,
+ in_channels,
+ out_channels,
+ kernel_size=2,
+ stride=None,
+ padding='corner',
+ dilation=1,
+ bias=False,
+ norm_cfg=dict(type='LN'),
+ init_cfg=None):
+ super().__init__(init_cfg=init_cfg)
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ if stride:
+ stride = stride
+ else:
+ stride = kernel_size
+
+ kernel_size = to_2tuple(kernel_size)
+ stride = to_2tuple(stride)
+ dilation = to_2tuple(dilation)
+
+ if isinstance(padding, str):
+ self.adaptive_padding = AdaptivePadding(
+ kernel_size=kernel_size,
+ stride=stride,
+ dilation=dilation,
+ padding=padding)
+ # disable the padding of unfold
+ padding = 0
+ else:
+ self.adaptive_padding = None
+
+ padding = to_2tuple(padding)
+ self.sampler = nn.Unfold(
+ kernel_size=kernel_size,
+ dilation=dilation,
+ padding=padding,
+ stride=stride)
+
+ sample_dim = kernel_size[0] * kernel_size[1] * in_channels
+
+ if norm_cfg is not None:
+ self.norm = build_norm_layer(norm_cfg, sample_dim)[1]
+ else:
+ self.norm = None
+
+ self.reduction = nn.Linear(sample_dim, out_channels, bias=bias)
+
+ def forward(self, x, input_size):
+ """
+ Args:
+ x (Tensor): Has shape (B, H*W, C_in).
+ input_size (tuple[int]): The spatial shape of x, arrange as (H, W).
+ Default: None.
+
+ Returns:
+ tuple: Contains merged results and its spatial shape.
+
+ - x (Tensor): Has shape (B, Merged_H * Merged_W, C_out)
+ - out_size (tuple[int]): Spatial shape of x, arrange as
+ (Merged_H, Merged_W).
+ """
+ B, L, C = x.shape
+ assert isinstance(input_size, Sequence), f'Expect ' \
+ f'input_size is ' \
+ f'`Sequence` ' \
+ f'but get {input_size}'
+
+ H, W = input_size
+ assert L == H * W, 'input feature has wrong size'
+
+ x = x.view(B, H, W, C).permute([0, 3, 1, 2]) # B, C, H, W
+
+ if self.adaptive_padding:
+ x = self.adaptive_padding(x)
+ H, W = x.shape[-2:]
+
+ # Use nn.Unfold to merge patch. About 25% faster than original method,
+ # but need to modify pretrained model for compatibility
+ # if kernel_size=2 and stride=2, x should has shape (B, 4*C, H/2*W/2)
+ x = self.sampler(x)
+
+ out_h = (H + 2 * self.sampler.padding[0] - self.sampler.dilation[0] *
+ (self.sampler.kernel_size[0] - 1) -
+ 1) // self.sampler.stride[0] + 1
+ out_w = (W + 2 * self.sampler.padding[1] - self.sampler.dilation[1] *
+ (self.sampler.kernel_size[1] - 1) -
+ 1) // self.sampler.stride[1] + 1
+
+ output_size = (out_h, out_w)
+ x = x.transpose(1, 2) # B, H/2*W/2, 4*C
+ x = self.norm(x) if self.norm else x
+ x = self.reduction(x)
+ return x, output_size
+
+
+@ATTENTION.register_module()
+class MultiheadAttention(BaseModule):
+ """A wrapper for ``torch.nn.MultiheadAttention``.
+
+ This module implements MultiheadAttention with identity connection,
+ and positional encoding is also passed as input.
+
+ Args:
+ embed_dims (int): The embedding dimension.
+ num_heads (int): Parallel attention heads.
+ attn_drop (float): A Dropout layer on attn_output_weights.
+ Default: 0.0.
+ proj_drop (float): A Dropout layer after `nn.MultiheadAttention`.
+ Default: 0.0.
+ dropout_layer (obj:`ConfigDict`): The dropout_layer used
+ when adding the shortcut.
+ init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
+ Default: None.
+ batch_first (bool): When it is True, Key, Query and Value are shape of
+ (batch, n, embed_dim), otherwise (n, batch, embed_dim).
+ Default to False.
+ """
+
+ def __init__(self,
+ embed_dims,
+ num_heads,
+ attn_drop=0.,
+ proj_drop=0.,
+ dropout_layer=dict(type='Dropout', drop_prob=0.),
+ init_cfg=None,
+ batch_first=False,
+ **kwargs):
+ super(MultiheadAttention, self).__init__(init_cfg)
+ if 'dropout' in kwargs:
+ warnings.warn(
+ 'The arguments `dropout` in MultiheadAttention '
+ 'has been deprecated, now you can separately '
+ 'set `attn_drop`(float), proj_drop(float), '
+ 'and `dropout_layer`(dict) ', DeprecationWarning)
+ attn_drop = kwargs['dropout']
+ dropout_layer['drop_prob'] = kwargs.pop('dropout')
+
+ self.embed_dims = embed_dims
+ self.num_heads = num_heads
+ self.batch_first = batch_first
+
+ self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop,
+ **kwargs)
+
+ self.proj_drop = nn.Dropout(proj_drop)
+ self.dropout_layer = build_dropout(
+ dropout_layer) if dropout_layer else nn.Identity()
+
+ @deprecated_api_warning({'residual': 'identity'},
+ cls_name='MultiheadAttention')
+ def forward(self,
+ query,
+ key=None,
+ value=None,
+ identity=None,
+ query_pos=None,
+ key_pos=None,
+ attn_mask=None,
+ key_padding_mask=None,
+ **kwargs):
+ """Forward function for `MultiheadAttention`.
+
+ **kwargs allow passing a more general data flow when combining
+ with other operations in `transformerlayer`.
+
+ Args:
+ query (Tensor): The input query with shape [num_queries, bs,
+ embed_dims] if self.batch_first is False, else
+ [bs, num_queries embed_dims].
+ key (Tensor): The key tensor with shape [num_keys, bs,
+ embed_dims] if self.batch_first is False, else
+ [bs, num_keys, embed_dims] .
+ If None, the ``query`` will be used. Defaults to None.
+ value (Tensor): The value tensor with same shape as `key`.
+ Same in `nn.MultiheadAttention.forward`. Defaults to None.
+ If None, the `key` will be used.
+ identity (Tensor): This tensor, with the same shape as x,
+ will be used for the identity link.
+ If None, `x` will be used. Defaults to None.
+ query_pos (Tensor): The positional encoding for query, with
+ the same shape as `x`. If not None, it will
+ be added to `x` before forward function. Defaults to None.
+ key_pos (Tensor): The positional encoding for `key`, with the
+ same shape as `key`. Defaults to None. If not None, it will
+ be added to `key` before forward function. If None, and
+ `query_pos` has the same shape as `key`, then `query_pos`
+ will be used for `key_pos`. Defaults to None.
+ attn_mask (Tensor): ByteTensor mask with shape [num_queries,
+ num_keys]. Same in `nn.MultiheadAttention.forward`.
+ Defaults to None.
+ key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys].
+ Defaults to None.
+
+ Returns:
+ Tensor: forwarded results with shape
+ [num_queries, bs, embed_dims]
+ if self.batch_first is False, else
+ [bs, num_queries embed_dims].
+ """
+
+ if key is None:
+ key = query
+ if value is None:
+ value = key
+ if identity is None:
+ identity = query
+ if key_pos is None:
+ if query_pos is not None:
+ # use query_pos if key_pos is not available
+ if query_pos.shape == key.shape:
+ key_pos = query_pos
+ else:
+ warnings.warn(f'position encoding of key is'
+ f'missing in {self.__class__.__name__}.')
+ if query_pos is not None:
+ query = query + query_pos
+ if key_pos is not None:
+ key = key + key_pos
+
+ # Because the dataflow('key', 'query', 'value') of
+ # ``torch.nn.MultiheadAttention`` is (num_query, batch,
+ # embed_dims), We should adjust the shape of dataflow from
+ # batch_first (batch, num_query, embed_dims) to num_query_first
+ # (num_query ,batch, embed_dims), and recover ``attn_output``
+ # from num_query_first to batch_first.
+ if self.batch_first:
+ query = query.transpose(0, 1)
+ key = key.transpose(0, 1)
+ value = value.transpose(0, 1)
+
+ out = self.attn(
+ query=query,
+ key=key,
+ value=value,
+ attn_mask=attn_mask,
+ key_padding_mask=key_padding_mask)[0]
+
+ if self.batch_first:
+ out = out.transpose(0, 1)
+
+ return identity + self.dropout_layer(self.proj_drop(out))
+
+
+@FEEDFORWARD_NETWORK.register_module()
+class FFN(BaseModule):
+ """Implements feed-forward networks (FFNs) with identity connection.
+
+ Args:
+ embed_dims (int): The feature dimension. Same as
+ `MultiheadAttention`. Defaults: 256.
+ feedforward_channels (int): The hidden dimension of FFNs.
+ Defaults: 1024.
+ num_fcs (int, optional): The number of fully-connected layers in
+ FFNs. Default: 2.
+ act_cfg (dict, optional): The activation config for FFNs.
+ Default: dict(type='ReLU')
+ ffn_drop (float, optional): Probability of an element to be
+ zeroed in FFN. Default 0.0.
+ add_identity (bool, optional): Whether to add the
+ identity connection. Default: `True`.
+ dropout_layer (obj:`ConfigDict`): The dropout_layer used
+ when adding the shortcut.
+ init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
+ Default: None.
+ """
+
+ @deprecated_api_warning(
+ {
+ 'dropout': 'ffn_drop',
+ 'add_residual': 'add_identity'
+ },
+ cls_name='FFN')
+ def __init__(self,
+ embed_dims=256,
+ feedforward_channels=1024,
+ num_fcs=2,
+ act_cfg=dict(type='ReLU', inplace=True),
+ ffn_drop=0.,
+ dropout_layer=None,
+ add_identity=True,
+ init_cfg=None,
+ **kwargs):
+ super(FFN, self).__init__(init_cfg)
+ assert num_fcs >= 2, 'num_fcs should be no less ' \
+ f'than 2. got {num_fcs}.'
+ self.embed_dims = embed_dims
+ self.feedforward_channels = feedforward_channels
+ self.num_fcs = num_fcs
+ self.act_cfg = act_cfg
+ self.activate = build_activation_layer(act_cfg)
+
+ layers = []
+ in_channels = embed_dims
+ for _ in range(num_fcs - 1):
+ layers.append(
+ Sequential(
+ Linear(in_channels, feedforward_channels), self.activate,
+ nn.Dropout(ffn_drop)))
+ in_channels = feedforward_channels
+ layers.append(Linear(feedforward_channels, embed_dims))
+ layers.append(nn.Dropout(ffn_drop))
+ self.layers = Sequential(*layers)
+ self.dropout_layer = build_dropout(
+ dropout_layer) if dropout_layer else torch.nn.Identity()
+ self.add_identity = add_identity
+
+ @deprecated_api_warning({'residual': 'identity'}, cls_name='FFN')
+ def forward(self, x, identity=None):
+ """Forward function for `FFN`.
+
+ The function would add x to the output tensor if residue is None.
+ """
+ out = self.layers(x)
+ if not self.add_identity:
+ return self.dropout_layer(out)
+ if identity is None:
+ identity = x
+ return identity + self.dropout_layer(out)
+
+
+@TRANSFORMER_LAYER.register_module()
+class BaseTransformerLayer(BaseModule):
+ """Base `TransformerLayer` for vision transformer.
+
+ It can be built from `mmcv.ConfigDict` and support more flexible
+ customization, for example, using any number of `FFN or LN ` and
+ use different kinds of `attention` by specifying a list of `ConfigDict`
+ named `attn_cfgs`. It is worth mentioning that it supports `prenorm`
+ when you specifying `norm` as the first element of `operation_order`.
+ More details about the `prenorm`: `On Layer Normalization in the
+ Transformer Architecture `_ .
+
+ Args:
+ attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):
+ Configs for `self_attention` or `cross_attention` modules,
+ The order of the configs in the list should be consistent with
+ corresponding attentions in operation_order.
+ If it is a dict, all of the attention modules in operation_order
+ will be built with this config. Default: None.
+ ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):
+ Configs for FFN, The order of the configs in the list should be
+ consistent with corresponding ffn in operation_order.
+ If it is a dict, all of the attention modules in operation_order
+ will be built with this config.
+ operation_order (tuple[str]): The execution order of operation
+ in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').
+ Support `prenorm` when you specifying first element as `norm`.
+ Default:None.
+ norm_cfg (dict): Config dict for normalization layer.
+ Default: dict(type='LN').
+ init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
+ Default: None.
+ batch_first (bool): Key, Query and Value are shape
+ of (batch, n, embed_dim)
+ or (n, batch, embed_dim). Default to False.
+ """
+
+ def __init__(self,
+ attn_cfgs=None,
+ ffn_cfgs=dict(
+ type='FFN',
+ embed_dims=256,
+ feedforward_channels=1024,
+ num_fcs=2,
+ ffn_drop=0.,
+ act_cfg=dict(type='ReLU', inplace=True),
+ ),
+ operation_order=None,
+ norm_cfg=dict(type='LN'),
+ init_cfg=None,
+ batch_first=False,
+ **kwargs):
+
+ deprecated_args = dict(
+ feedforward_channels='feedforward_channels',
+ ffn_dropout='ffn_drop',
+ ffn_num_fcs='num_fcs')
+ for ori_name, new_name in deprecated_args.items():
+ if ori_name in kwargs:
+ warnings.warn(
+ f'The arguments `{ori_name}` in BaseTransformerLayer '
+ f'has been deprecated, now you should set `{new_name}` '
+ f'and other FFN related arguments '
+ f'to a dict named `ffn_cfgs`. ', DeprecationWarning)
+ ffn_cfgs[new_name] = kwargs[ori_name]
+
+ super(BaseTransformerLayer, self).__init__(init_cfg)
+
+ self.batch_first = batch_first
+
+ assert set(operation_order) & set(
+ ['self_attn', 'norm', 'ffn', 'cross_attn']) == \
+ set(operation_order), f'The operation_order of' \
+ f' {self.__class__.__name__} should ' \
+ f'contains all four operation type ' \
+ f"{['self_attn', 'norm', 'ffn', 'cross_attn']}"
+
+ num_attn = operation_order.count('self_attn') + operation_order.count(
+ 'cross_attn')
+ if isinstance(attn_cfgs, dict):
+ attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)]
+ else:
+ assert num_attn == len(attn_cfgs), f'The length ' \
+ f'of attn_cfg {num_attn} is ' \
+ f'not consistent with the number of attention' \
+ f'in operation_order {operation_order}.'
+
+ self.num_attn = num_attn
+ self.operation_order = operation_order
+ self.norm_cfg = norm_cfg
+ self.pre_norm = operation_order[0] == 'norm'
+ self.attentions = ModuleList()
+
+ index = 0
+ for operation_name in operation_order:
+ if operation_name in ['self_attn', 'cross_attn']:
+ if 'batch_first' in attn_cfgs[index]:
+ assert self.batch_first == attn_cfgs[index]['batch_first']
+ else:
+ attn_cfgs[index]['batch_first'] = self.batch_first
+ attention = build_attention(attn_cfgs[index])
+ # Some custom attentions used as `self_attn`
+ # or `cross_attn` can have different behavior.
+ attention.operation_name = operation_name
+ self.attentions.append(attention)
+ index += 1
+
+ self.embed_dims = self.attentions[0].embed_dims
+
+ self.ffns = ModuleList()
+ num_ffns = operation_order.count('ffn')
+ if isinstance(ffn_cfgs, dict):
+ ffn_cfgs = ConfigDict(ffn_cfgs)
+ if isinstance(ffn_cfgs, dict):
+ ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)]
+ assert len(ffn_cfgs) == num_ffns
+ for ffn_index in range(num_ffns):
+ if 'embed_dims' not in ffn_cfgs[ffn_index]:
+ ffn_cfgs['embed_dims'] = self.embed_dims
+ else:
+ assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims
+ self.ffns.append(
+ build_feedforward_network(ffn_cfgs[ffn_index],
+ dict(type='FFN')))
+
+ self.norms = ModuleList()
+ num_norms = operation_order.count('norm')
+ for _ in range(num_norms):
+ self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1])
+
+ def forward(self,
+ query,
+ key=None,
+ value=None,
+ query_pos=None,
+ key_pos=None,
+ attn_masks=None,
+ query_key_padding_mask=None,
+ key_padding_mask=None,
+ **kwargs):
+ """Forward function for `TransformerDecoderLayer`.
+
+ **kwargs contains some specific arguments of attentions.
+
+ Args:
+ query (Tensor): The input query with shape
+ [num_queries, bs, embed_dims] if
+ self.batch_first is False, else
+ [bs, num_queries embed_dims].
+ key (Tensor): The key tensor with shape [num_keys, bs,
+ embed_dims] if self.batch_first is False, else
+ [bs, num_keys, embed_dims] .
+ value (Tensor): The value tensor with same shape as `key`.
+ query_pos (Tensor): The positional encoding for `query`.
+ Default: None.
+ key_pos (Tensor): The positional encoding for `key`.
+ Default: None.
+ attn_masks (List[Tensor] | None): 2D Tensor used in
+ calculation of corresponding attention. The length of
+ it should equal to the number of `attention` in
+ `operation_order`. Default: None.
+ query_key_padding_mask (Tensor): ByteTensor for `query`, with
+ shape [bs, num_queries]. Only used in `self_attn` layer.
+ Defaults to None.
+ key_padding_mask (Tensor): ByteTensor for `query`, with
+ shape [bs, num_keys]. Default: None.
+
+ Returns:
+ Tensor: forwarded results with shape [num_queries, bs, embed_dims].
+ """
+
+ norm_index = 0
+ attn_index = 0
+ ffn_index = 0
+ identity = query
+ if attn_masks is None:
+ attn_masks = [None for _ in range(self.num_attn)]
+ elif isinstance(attn_masks, torch.Tensor):
+ attn_masks = [
+ copy.deepcopy(attn_masks) for _ in range(self.num_attn)
+ ]
+ warnings.warn(f'Use same attn_mask in all attentions in '
+ f'{self.__class__.__name__} ')
+ else:
+ assert len(attn_masks) == self.num_attn, f'The length of ' \
+ f'attn_masks {len(attn_masks)} must be equal ' \
+ f'to the number of attention in ' \
+ f'operation_order {self.num_attn}'
+
+ for layer in self.operation_order:
+ if layer == 'self_attn':
+ temp_key = temp_value = query
+ query = self.attentions[attn_index](
+ query,
+ temp_key,
+ temp_value,
+ identity if self.pre_norm else None,
+ query_pos=query_pos,
+ key_pos=query_pos,
+ attn_mask=attn_masks[attn_index],
+ key_padding_mask=query_key_padding_mask,
+ **kwargs)
+ attn_index += 1
+ identity = query
+
+ elif layer == 'norm':
+ query = self.norms[norm_index](query)
+ norm_index += 1
+
+ elif layer == 'cross_attn':
+ query = self.attentions[attn_index](
+ query,
+ key,
+ value,
+ identity if self.pre_norm else None,
+ query_pos=query_pos,
+ key_pos=key_pos,
+ attn_mask=attn_masks[attn_index],
+ key_padding_mask=key_padding_mask,
+ **kwargs)
+ attn_index += 1
+ identity = query
+
+ elif layer == 'ffn':
+ query = self.ffns[ffn_index](
+ query, identity if self.pre_norm else None)
+ ffn_index += 1
+
+ return query
+
+
+@TRANSFORMER_LAYER_SEQUENCE.register_module()
+class TransformerLayerSequence(BaseModule):
+ """Base class for TransformerEncoder and TransformerDecoder in vision
+ transformer.
+
+ As base-class of Encoder and Decoder in vision transformer.
+ Support customization such as specifying different kind
+ of `transformer_layer` in `transformer_coder`.
+
+ Args:
+ transformerlayer (list[obj:`mmcv.ConfigDict`] |
+ obj:`mmcv.ConfigDict`): Config of transformerlayer
+ in TransformerCoder. If it is obj:`mmcv.ConfigDict`,
+ it would be repeated `num_layer` times to a
+ list[`mmcv.ConfigDict`]. Default: None.
+ num_layers (int): The number of `TransformerLayer`. Default: None.
+ init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
+ Default: None.
+ """
+
+ def __init__(self, transformerlayers=None, num_layers=None, init_cfg=None):
+ super(TransformerLayerSequence, self).__init__(init_cfg)
+ if isinstance(transformerlayers, dict):
+ transformerlayers = [
+ copy.deepcopy(transformerlayers) for _ in range(num_layers)
+ ]
+ else:
+ assert isinstance(transformerlayers, list) and \
+ len(transformerlayers) == num_layers
+ self.num_layers = num_layers
+ self.layers = ModuleList()
+ for i in range(num_layers):
+ self.layers.append(build_transformer_layer(transformerlayers[i]))
+ self.embed_dims = self.layers[0].embed_dims
+ self.pre_norm = self.layers[0].pre_norm
+
+ def forward(self,
+ query,
+ key,
+ value,
+ query_pos=None,
+ key_pos=None,
+ attn_masks=None,
+ query_key_padding_mask=None,
+ key_padding_mask=None,
+ **kwargs):
+ """Forward function for `TransformerCoder`.
+
+ Args:
+ query (Tensor): Input query with shape
+ `(num_queries, bs, embed_dims)`.
+ key (Tensor): The key tensor with shape
+ `(num_keys, bs, embed_dims)`.
+ value (Tensor): The value tensor with shape
+ `(num_keys, bs, embed_dims)`.
+ query_pos (Tensor): The positional encoding for `query`.
+ Default: None.
+ key_pos (Tensor): The positional encoding for `key`.
+ Default: None.
+ attn_masks (List[Tensor], optional): Each element is 2D Tensor
+ which is used in calculation of corresponding attention in
+ operation_order. Default: None.
+ query_key_padding_mask (Tensor): ByteTensor for `query`, with
+ shape [bs, num_queries]. Only used in self-attention
+ Default: None.
+ key_padding_mask (Tensor): ByteTensor for `query`, with
+ shape [bs, num_keys]. Default: None.
+
+ Returns:
+ Tensor: results with shape [num_queries, bs, embed_dims].
+ """
+ for layer in self.layers:
+ query = layer(
+ query,
+ key,
+ value,
+ query_pos=query_pos,
+ key_pos=key_pos,
+ attn_masks=attn_masks,
+ query_key_padding_mask=query_key_padding_mask,
+ key_padding_mask=key_padding_mask,
+ **kwargs)
+ return query
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/upsample.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/upsample.py
new file mode 100644
index 0000000000000000000000000000000000000000..081d22c86a511d0372195e3d40207624bf3cb6c3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/upsample.py
@@ -0,0 +1,97 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..utils import xavier_init
+from .registry import UPSAMPLE_LAYERS
+
+UPSAMPLE_LAYERS.register_module('nearest', module=nn.Upsample)
+UPSAMPLE_LAYERS.register_module('bilinear', module=nn.Upsample)
+
+
+@UPSAMPLE_LAYERS.register_module(name='pixel_shuffle')
+class PixelShufflePack(nn.Module):
+ """Pixel Shuffle upsample layer.
+
+ This module packs `F.pixel_shuffle()` and a nn.Conv2d module together to
+ achieve a simple upsampling with pixel shuffle.
+
+ Args:
+ in_channels (int): Number of input channels.
+ out_channels (int): Number of output channels.
+ scale_factor (int): Upsample ratio.
+ upsample_kernel (int): Kernel size of the conv layer to expand the
+ channels.
+ """
+
+ def __init__(self, in_channels, out_channels, scale_factor,
+ upsample_kernel):
+ super(PixelShufflePack, self).__init__()
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.scale_factor = scale_factor
+ self.upsample_kernel = upsample_kernel
+ self.upsample_conv = nn.Conv2d(
+ self.in_channels,
+ self.out_channels * scale_factor * scale_factor,
+ self.upsample_kernel,
+ padding=(self.upsample_kernel - 1) // 2)
+ self.init_weights()
+
+ def init_weights(self):
+ xavier_init(self.upsample_conv, distribution='uniform')
+
+ def forward(self, x):
+ x = self.upsample_conv(x)
+ x = F.pixel_shuffle(x, self.scale_factor)
+ return x
+
+
+def build_upsample_layer(cfg, *args, **kwargs):
+ """Build upsample layer.
+
+ Args:
+ cfg (dict): The upsample layer config, which should contain:
+
+ - type (str): Layer type.
+ - scale_factor (int): Upsample ratio, which is not applicable to
+ deconv.
+ - layer args: Args needed to instantiate a upsample layer.
+ args (argument list): Arguments passed to the ``__init__``
+ method of the corresponding conv layer.
+ kwargs (keyword arguments): Keyword arguments passed to the
+ ``__init__`` method of the corresponding conv layer.
+
+ Returns:
+ nn.Module: Created upsample layer.
+ """
+ if not isinstance(cfg, dict):
+ raise TypeError(f'cfg must be a dict, but got {type(cfg)}')
+ if 'type' not in cfg:
+ raise KeyError(
+ f'the cfg dict must contain the key "type", but got {cfg}')
+ cfg_ = cfg.copy()
+
+ layer_type = cfg_.pop('type')
+ if layer_type not in UPSAMPLE_LAYERS:
+ raise KeyError(f'Unrecognized upsample type {layer_type}')
+ else:
+ upsample = UPSAMPLE_LAYERS.get(layer_type)
+
+ if upsample is nn.Upsample:
+ cfg_['mode'] = layer_type
+ layer = upsample(*args, **kwargs, **cfg_)
+ return layer
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/wrappers.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/wrappers.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8df7dc51d4941dbe460add9d10464bc7ecb9331
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/bricks/wrappers.py
@@ -0,0 +1,193 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""Modified from https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/wrappers.py # noqa: E501
+
+Wrap some nn modules to support empty tensor input. Currently, these wrappers
+are mainly used in mask heads like fcn_mask_head and maskiou_heads since mask
+heads are trained on only positive RoIs.
+"""
+import math
+
+import torch
+import torch.nn as nn
+from torch.nn.modules.utils import _pair, _triple
+
+from .registry import CONV_LAYERS, UPSAMPLE_LAYERS
+
+if torch.__version__ == 'parrots':
+ TORCH_VERSION = torch.__version__
+else:
+ # torch.__version__ could be 1.3.1+cu92, we only need the first two
+ # for comparison
+ TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2])
+
+
+def obsolete_torch_version(torch_version, version_threshold):
+ return torch_version == 'parrots' or torch_version <= version_threshold
+
+
+class NewEmptyTensorOp(torch.autograd.Function):
+
+ @staticmethod
+ def forward(ctx, x, new_shape):
+ ctx.shape = x.shape
+ return x.new_empty(new_shape)
+
+ @staticmethod
+ def backward(ctx, grad):
+ shape = ctx.shape
+ return NewEmptyTensorOp.apply(grad, shape), None
+
+
+@CONV_LAYERS.register_module('Conv', force=True)
+class Conv2d(nn.Conv2d):
+
+ def forward(self, x):
+ if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
+ out_shape = [x.shape[0], self.out_channels]
+ for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size,
+ self.padding, self.stride, self.dilation):
+ o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1
+ out_shape.append(o)
+ empty = NewEmptyTensorOp.apply(x, out_shape)
+ if self.training:
+ # produce dummy gradient to avoid DDP warning.
+ dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
+ return empty + dummy
+ else:
+ return empty
+
+ return super().forward(x)
+
+
+@CONV_LAYERS.register_module('Conv3d', force=True)
+class Conv3d(nn.Conv3d):
+
+ def forward(self, x):
+ if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
+ out_shape = [x.shape[0], self.out_channels]
+ for i, k, p, s, d in zip(x.shape[-3:], self.kernel_size,
+ self.padding, self.stride, self.dilation):
+ o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1
+ out_shape.append(o)
+ empty = NewEmptyTensorOp.apply(x, out_shape)
+ if self.training:
+ # produce dummy gradient to avoid DDP warning.
+ dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
+ return empty + dummy
+ else:
+ return empty
+
+ return super().forward(x)
+
+
+@CONV_LAYERS.register_module()
+@CONV_LAYERS.register_module('deconv')
+@UPSAMPLE_LAYERS.register_module('deconv', force=True)
+class ConvTranspose2d(nn.ConvTranspose2d):
+
+ def forward(self, x):
+ if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
+ out_shape = [x.shape[0], self.out_channels]
+ for i, k, p, s, d, op in zip(x.shape[-2:], self.kernel_size,
+ self.padding, self.stride,
+ self.dilation, self.output_padding):
+ out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op)
+ empty = NewEmptyTensorOp.apply(x, out_shape)
+ if self.training:
+ # produce dummy gradient to avoid DDP warning.
+ dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
+ return empty + dummy
+ else:
+ return empty
+
+ return super().forward(x)
+
+
+@CONV_LAYERS.register_module()
+@CONV_LAYERS.register_module('deconv3d')
+@UPSAMPLE_LAYERS.register_module('deconv3d', force=True)
+class ConvTranspose3d(nn.ConvTranspose3d):
+
+ def forward(self, x):
+ if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
+ out_shape = [x.shape[0], self.out_channels]
+ for i, k, p, s, d, op in zip(x.shape[-3:], self.kernel_size,
+ self.padding, self.stride,
+ self.dilation, self.output_padding):
+ out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op)
+ empty = NewEmptyTensorOp.apply(x, out_shape)
+ if self.training:
+ # produce dummy gradient to avoid DDP warning.
+ dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
+ return empty + dummy
+ else:
+ return empty
+
+ return super().forward(x)
+
+
+class MaxPool2d(nn.MaxPool2d):
+
+ def forward(self, x):
+ # PyTorch 1.9 does not support empty tensor inference yet
+ if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)):
+ out_shape = list(x.shape[:2])
+ for i, k, p, s, d in zip(x.shape[-2:], _pair(self.kernel_size),
+ _pair(self.padding), _pair(self.stride),
+ _pair(self.dilation)):
+ o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1
+ o = math.ceil(o) if self.ceil_mode else math.floor(o)
+ out_shape.append(o)
+ empty = NewEmptyTensorOp.apply(x, out_shape)
+ return empty
+
+ return super().forward(x)
+
+
+class MaxPool3d(nn.MaxPool3d):
+
+ def forward(self, x):
+ # PyTorch 1.9 does not support empty tensor inference yet
+ if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)):
+ out_shape = list(x.shape[:2])
+ for i, k, p, s, d in zip(x.shape[-3:], _triple(self.kernel_size),
+ _triple(self.padding),
+ _triple(self.stride),
+ _triple(self.dilation)):
+ o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1
+ o = math.ceil(o) if self.ceil_mode else math.floor(o)
+ out_shape.append(o)
+ empty = NewEmptyTensorOp.apply(x, out_shape)
+ return empty
+
+ return super().forward(x)
+
+
+class Linear(torch.nn.Linear):
+
+ def forward(self, x):
+ # empty tensor forward of Linear layer is supported in Pytorch 1.6
+ if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 5)):
+ out_shape = [x.shape[0], self.out_features]
+ empty = NewEmptyTensorOp.apply(x, out_shape)
+ if self.training:
+ # produce dummy gradient to avoid DDP warning.
+ dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
+ return empty + dummy
+ else:
+ return empty
+
+ return super().forward(x)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/builder.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..07d37b06f64b7aba94f5315b01261af50e8ea4a9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/builder.py
@@ -0,0 +1,43 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ..runner import Sequential
+from ..utils import Registry, build_from_cfg
+
+
+def build_model_from_cfg(cfg, registry, default_args=None):
+ """Build a PyTorch model from config dict(s). Different from
+ ``build_from_cfg``, if cfg is a list, a ``nn.Sequential`` will be built.
+
+ Args:
+ cfg (dict, list[dict]): The config of modules, is is either a config
+ dict or a list of config dicts. If cfg is a list, a
+ the built modules will be wrapped with ``nn.Sequential``.
+ registry (:obj:`Registry`): A registry the module belongs to.
+ default_args (dict, optional): Default arguments to build the module.
+ Defaults to None.
+
+ Returns:
+ nn.Module: A built nn module.
+ """
+ if isinstance(cfg, list):
+ modules = [
+ build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
+ ]
+ return Sequential(*modules)
+ else:
+ return build_from_cfg(cfg, registry, default_args)
+
+
+MODELS = Registry('model', build_func=build_model_from_cfg)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/resnet.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..584c1718d33070e7dede8defe9369106d6652b96
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/resnet.py
@@ -0,0 +1,329 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+import torch.nn as nn
+import torch.utils.checkpoint as cp
+
+from .utils import constant_init, kaiming_init
+
+
+def conv3x3(in_planes, out_planes, stride=1, dilation=1):
+ """3x3 convolution with padding."""
+ return nn.Conv2d(
+ in_planes,
+ out_planes,
+ kernel_size=3,
+ stride=stride,
+ padding=dilation,
+ dilation=dilation,
+ bias=False)
+
+
+class BasicBlock(nn.Module):
+ expansion = 1
+
+ def __init__(self,
+ inplanes,
+ planes,
+ stride=1,
+ dilation=1,
+ downsample=None,
+ style='pytorch',
+ with_cp=False):
+ super(BasicBlock, self).__init__()
+ assert style in ['pytorch', 'caffe']
+ self.conv1 = conv3x3(inplanes, planes, stride, dilation)
+ self.bn1 = nn.BatchNorm2d(planes)
+ self.relu = nn.ReLU(inplace=True)
+ self.conv2 = conv3x3(planes, planes)
+ self.bn2 = nn.BatchNorm2d(planes)
+ self.downsample = downsample
+ self.stride = stride
+ self.dilation = dilation
+ assert not with_cp
+
+ def forward(self, x):
+ residual = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.bn2(out)
+
+ if self.downsample is not None:
+ residual = self.downsample(x)
+
+ out += residual
+ out = self.relu(out)
+
+ return out
+
+
+class Bottleneck(nn.Module):
+ expansion = 4
+
+ def __init__(self,
+ inplanes,
+ planes,
+ stride=1,
+ dilation=1,
+ downsample=None,
+ style='pytorch',
+ with_cp=False):
+ """Bottleneck block.
+
+ If style is "pytorch", the stride-two layer is the 3x3 conv layer, if
+ it is "caffe", the stride-two layer is the first 1x1 conv layer.
+ """
+ super(Bottleneck, self).__init__()
+ assert style in ['pytorch', 'caffe']
+ if style == 'pytorch':
+ conv1_stride = 1
+ conv2_stride = stride
+ else:
+ conv1_stride = stride
+ conv2_stride = 1
+ self.conv1 = nn.Conv2d(
+ inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False)
+ self.conv2 = nn.Conv2d(
+ planes,
+ planes,
+ kernel_size=3,
+ stride=conv2_stride,
+ padding=dilation,
+ dilation=dilation,
+ bias=False)
+
+ self.bn1 = nn.BatchNorm2d(planes)
+ self.bn2 = nn.BatchNorm2d(planes)
+ self.conv3 = nn.Conv2d(
+ planes, planes * self.expansion, kernel_size=1, bias=False)
+ self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+ self.stride = stride
+ self.dilation = dilation
+ self.with_cp = with_cp
+
+ def forward(self, x):
+
+ def _inner_forward(x):
+ residual = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.bn2(out)
+ out = self.relu(out)
+
+ out = self.conv3(out)
+ out = self.bn3(out)
+
+ if self.downsample is not None:
+ residual = self.downsample(x)
+
+ out += residual
+
+ return out
+
+ if self.with_cp and x.requires_grad:
+ out = cp.checkpoint(_inner_forward, x)
+ else:
+ out = _inner_forward(x)
+
+ out = self.relu(out)
+
+ return out
+
+
+def make_res_layer(block,
+ inplanes,
+ planes,
+ blocks,
+ stride=1,
+ dilation=1,
+ style='pytorch',
+ with_cp=False):
+ downsample = None
+ if stride != 1 or inplanes != planes * block.expansion:
+ downsample = nn.Sequential(
+ nn.Conv2d(
+ inplanes,
+ planes * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ bias=False),
+ nn.BatchNorm2d(planes * block.expansion),
+ )
+
+ layers = []
+ layers.append(
+ block(
+ inplanes,
+ planes,
+ stride,
+ dilation,
+ downsample,
+ style=style,
+ with_cp=with_cp))
+ inplanes = planes * block.expansion
+ for _ in range(1, blocks):
+ layers.append(
+ block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp))
+
+ return nn.Sequential(*layers)
+
+
+class ResNet(nn.Module):
+ """ResNet backbone.
+
+ Args:
+ depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
+ num_stages (int): Resnet stages, normally 4.
+ strides (Sequence[int]): Strides of the first block of each stage.
+ dilations (Sequence[int]): Dilation of each stage.
+ out_indices (Sequence[int]): Output from which stages.
+ style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
+ layer is the 3x3 conv layer, otherwise the stride-two layer is
+ the first 1x1 conv layer.
+ frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+ not freezing any parameters.
+ bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
+ running stats (mean and var).
+ bn_frozen (bool): Whether to freeze weight and bias of BN layers.
+ with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+ memory while slowing down the training speed.
+ """
+
+ arch_settings = {
+ 18: (BasicBlock, (2, 2, 2, 2)),
+ 34: (BasicBlock, (3, 4, 6, 3)),
+ 50: (Bottleneck, (3, 4, 6, 3)),
+ 101: (Bottleneck, (3, 4, 23, 3)),
+ 152: (Bottleneck, (3, 8, 36, 3))
+ }
+
+ def __init__(self,
+ depth,
+ num_stages=4,
+ strides=(1, 2, 2, 2),
+ dilations=(1, 1, 1, 1),
+ out_indices=(0, 1, 2, 3),
+ style='pytorch',
+ frozen_stages=-1,
+ bn_eval=True,
+ bn_frozen=False,
+ with_cp=False):
+ super(ResNet, self).__init__()
+ if depth not in self.arch_settings:
+ raise KeyError(f'invalid depth {depth} for resnet')
+ assert num_stages >= 1 and num_stages <= 4
+ block, stage_blocks = self.arch_settings[depth]
+ stage_blocks = stage_blocks[:num_stages]
+ assert len(strides) == len(dilations) == num_stages
+ assert max(out_indices) < num_stages
+
+ self.out_indices = out_indices
+ self.style = style
+ self.frozen_stages = frozen_stages
+ self.bn_eval = bn_eval
+ self.bn_frozen = bn_frozen
+ self.with_cp = with_cp
+
+ self.inplanes = 64
+ self.conv1 = nn.Conv2d(
+ 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+ self.bn1 = nn.BatchNorm2d(64)
+ self.relu = nn.ReLU(inplace=True)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ self.res_layers = []
+ for i, num_blocks in enumerate(stage_blocks):
+ stride = strides[i]
+ dilation = dilations[i]
+ planes = 64 * 2**i
+ res_layer = make_res_layer(
+ block,
+ self.inplanes,
+ planes,
+ num_blocks,
+ stride=stride,
+ dilation=dilation,
+ style=self.style,
+ with_cp=with_cp)
+ self.inplanes = planes * block.expansion
+ layer_name = f'layer{i + 1}'
+ self.add_module(layer_name, res_layer)
+ self.res_layers.append(layer_name)
+
+ self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1)
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ from ..runner import load_checkpoint
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.bn1(x)
+ x = self.relu(x)
+ x = self.maxpool(x)
+ outs = []
+ for i, layer_name in enumerate(self.res_layers):
+ res_layer = getattr(self, layer_name)
+ x = res_layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+ if len(outs) == 1:
+ return outs[0]
+ else:
+ return tuple(outs)
+
+ def train(self, mode=True):
+ super(ResNet, self).train(mode)
+ if self.bn_eval:
+ for m in self.modules():
+ if isinstance(m, nn.BatchNorm2d):
+ m.eval()
+ if self.bn_frozen:
+ for params in m.parameters():
+ params.requires_grad = False
+ if mode and self.frozen_stages >= 0:
+ for param in self.conv1.parameters():
+ param.requires_grad = False
+ for param in self.bn1.parameters():
+ param.requires_grad = False
+ self.bn1.eval()
+ self.bn1.weight.requires_grad = False
+ self.bn1.bias.requires_grad = False
+ for i in range(1, self.frozen_stages + 1):
+ mod = getattr(self, f'layer{i}')
+ mod.eval()
+ for param in mod.parameters():
+ param.requires_grad = False
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd99a47b99bc1fd0337f3d03bd8666eb19fd197a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/__init__.py
@@ -0,0 +1,32 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .flops_counter import get_model_complexity_info
+from .fuse_conv_bn import fuse_conv_bn
+from .sync_bn import revert_sync_batchnorm
+from .weight_init import (INITIALIZERS, Caffe2XavierInit, ConstantInit,
+ KaimingInit, NormalInit, PretrainedInit,
+ TruncNormalInit, UniformInit, XavierInit,
+ bias_init_with_prob, caffe2_xavier_init,
+ constant_init, initialize, kaiming_init, normal_init,
+ trunc_normal_init, uniform_init, xavier_init)
+
+__all__ = [
+ 'get_model_complexity_info', 'bias_init_with_prob', 'caffe2_xavier_init',
+ 'constant_init', 'kaiming_init', 'normal_init', 'trunc_normal_init',
+ 'uniform_init', 'xavier_init', 'fuse_conv_bn', 'initialize',
+ 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit',
+ 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit',
+ 'Caffe2XavierInit', 'revert_sync_batchnorm'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/flops_counter.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/flops_counter.py
new file mode 100644
index 0000000000000000000000000000000000000000..fda698ed9acfbcbcb4fb42397fbcf7ea00733914
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/flops_counter.py
@@ -0,0 +1,589 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import warnings
+from functools import partial
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+import mmcv
+
+
+def get_model_complexity_info(model,
+ input_shape,
+ print_per_layer_stat=True,
+ as_strings=True,
+ input_constructor=None,
+ flush=False,
+ ost=sys.stdout):
+ """Get complexity information of a model.
+
+ This method can calculate FLOPs and parameter counts of a model with
+ corresponding input shape. It can also print complexity information for
+ each layer in a model.
+
+ Supported layers are listed as below:
+ - Convolutions: ``nn.Conv1d``, ``nn.Conv2d``, ``nn.Conv3d``.
+ - Activations: ``nn.ReLU``, ``nn.PReLU``, ``nn.ELU``,
+ ``nn.LeakyReLU``, ``nn.ReLU6``.
+ - Poolings: ``nn.MaxPool1d``, ``nn.MaxPool2d``, ``nn.MaxPool3d``,
+ ``nn.AvgPool1d``, ``nn.AvgPool2d``, ``nn.AvgPool3d``,
+ ``nn.AdaptiveMaxPool1d``, ``nn.AdaptiveMaxPool2d``,
+ ``nn.AdaptiveMaxPool3d``, ``nn.AdaptiveAvgPool1d``,
+ ``nn.AdaptiveAvgPool2d``, ``nn.AdaptiveAvgPool3d``.
+ - BatchNorms: ``nn.BatchNorm1d``, ``nn.BatchNorm2d``,
+ ``nn.BatchNorm3d``, ``nn.GroupNorm``, ``nn.InstanceNorm1d``,
+ ``InstanceNorm2d``, ``InstanceNorm3d``, ``nn.LayerNorm``.
+ - Linear: ``nn.Linear``.
+ - Deconvolution: ``nn.ConvTranspose2d``.
+ - Upsample: ``nn.Upsample``.
+
+ Args:
+ model (nn.Module): The model for complexity calculation.
+ input_shape (tuple): Input shape used for calculation.
+ print_per_layer_stat (bool): Whether to print complexity information
+ for each layer in a model. Default: True.
+ as_strings (bool): Output FLOPs and params counts in a string form.
+ Default: True.
+ input_constructor (None | callable): If specified, it takes a callable
+ method that generates input. otherwise, it will generate a random
+ tensor with input shape to calculate FLOPs. Default: None.
+ flush (bool): same as that in :func:`print`. Default: False.
+ ost (stream): same as ``file`` param in :func:`print`.
+ Default: sys.stdout.
+
+ Returns:
+ tuple[float | str]: If ``as_strings`` is set to True, it will return
+ FLOPs and parameter counts in a string format. otherwise, it will
+ return those in a float number format.
+ """
+ assert type(input_shape) is tuple
+ assert len(input_shape) >= 1
+ assert isinstance(model, nn.Module)
+ flops_model = add_flops_counting_methods(model)
+ flops_model.eval()
+ flops_model.start_flops_count()
+ if input_constructor:
+ input = input_constructor(input_shape)
+ _ = flops_model(**input)
+ else:
+ try:
+ batch = torch.ones(()).new_empty(
+ (1, *input_shape),
+ dtype=next(flops_model.parameters()).dtype,
+ device=next(flops_model.parameters()).device)
+ except StopIteration:
+ # Avoid StopIteration for models which have no parameters,
+ # like `nn.Relu()`, `nn.AvgPool2d`, etc.
+ batch = torch.ones(()).new_empty((1, *input_shape))
+
+ _ = flops_model(batch)
+
+ flops_count, params_count = flops_model.compute_average_flops_cost()
+ if print_per_layer_stat:
+ print_model_with_flops(
+ flops_model, flops_count, params_count, ost=ost, flush=flush)
+ flops_model.stop_flops_count()
+
+ if as_strings:
+ return flops_to_string(flops_count), params_to_string(params_count)
+
+ return flops_count, params_count
+
+
+def flops_to_string(flops, units='GFLOPs', precision=2):
+ """Convert FLOPs number into a string.
+
+ Note that Here we take a multiply-add counts as one FLOP.
+
+ Args:
+ flops (float): FLOPs number to be converted.
+ units (str | None): Converted FLOPs units. Options are None, 'GFLOPs',
+ 'MFLOPs', 'KFLOPs', 'FLOPs'. If set to None, it will automatically
+ choose the most suitable unit for FLOPs. Default: 'GFLOPs'.
+ precision (int): Digit number after the decimal point. Default: 2.
+
+ Returns:
+ str: The converted FLOPs number with units.
+
+ Examples:
+ >>> flops_to_string(1e9)
+ '1.0 GFLOPs'
+ >>> flops_to_string(2e5, 'MFLOPs')
+ '0.2 MFLOPs'
+ >>> flops_to_string(3e-9, None)
+ '3e-09 FLOPs'
+ """
+ if units is None:
+ if flops // 10**9 > 0:
+ return str(round(flops / 10.**9, precision)) + ' GFLOPs'
+ elif flops // 10**6 > 0:
+ return str(round(flops / 10.**6, precision)) + ' MFLOPs'
+ elif flops // 10**3 > 0:
+ return str(round(flops / 10.**3, precision)) + ' KFLOPs'
+ else:
+ return str(flops) + ' FLOPs'
+ else:
+ if units == 'GFLOPs':
+ return str(round(flops / 10.**9, precision)) + ' ' + units
+ elif units == 'MFLOPs':
+ return str(round(flops / 10.**6, precision)) + ' ' + units
+ elif units == 'KFLOPs':
+ return str(round(flops / 10.**3, precision)) + ' ' + units
+ else:
+ return str(flops) + ' FLOPs'
+
+
+def params_to_string(num_params, units=None, precision=2):
+ """Convert parameter number into a string.
+
+ Args:
+ num_params (float): Parameter number to be converted.
+ units (str | None): Converted FLOPs units. Options are None, 'M',
+ 'K' and ''. If set to None, it will automatically choose the most
+ suitable unit for Parameter number. Default: None.
+ precision (int): Digit number after the decimal point. Default: 2.
+
+ Returns:
+ str: The converted parameter number with units.
+
+ Examples:
+ >>> params_to_string(1e9)
+ '1000.0 M'
+ >>> params_to_string(2e5)
+ '200.0 k'
+ >>> params_to_string(3e-9)
+ '3e-09'
+ """
+ if units is None:
+ if num_params // 10**6 > 0:
+ return str(round(num_params / 10**6, precision)) + ' M'
+ elif num_params // 10**3:
+ return str(round(num_params / 10**3, precision)) + ' k'
+ else:
+ return str(num_params)
+ else:
+ if units == 'M':
+ return str(round(num_params / 10.**6, precision)) + ' ' + units
+ elif units == 'K':
+ return str(round(num_params / 10.**3, precision)) + ' ' + units
+ else:
+ return str(num_params)
+
+
+def print_model_with_flops(model,
+ total_flops,
+ total_params,
+ units='GFLOPs',
+ precision=3,
+ ost=sys.stdout,
+ flush=False):
+ """Print a model with FLOPs for each layer.
+
+ Args:
+ model (nn.Module): The model to be printed.
+ total_flops (float): Total FLOPs of the model.
+ total_params (float): Total parameter counts of the model.
+ units (str | None): Converted FLOPs units. Default: 'GFLOPs'.
+ precision (int): Digit number after the decimal point. Default: 3.
+ ost (stream): same as `file` param in :func:`print`.
+ Default: sys.stdout.
+ flush (bool): same as that in :func:`print`. Default: False.
+
+ Example:
+ >>> class ExampleModel(nn.Module):
+
+ >>> def __init__(self):
+ >>> super().__init__()
+ >>> self.conv1 = nn.Conv2d(3, 8, 3)
+ >>> self.conv2 = nn.Conv2d(8, 256, 3)
+ >>> self.conv3 = nn.Conv2d(256, 8, 3)
+ >>> self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
+ >>> self.flatten = nn.Flatten()
+ >>> self.fc = nn.Linear(8, 1)
+
+ >>> def forward(self, x):
+ >>> x = self.conv1(x)
+ >>> x = self.conv2(x)
+ >>> x = self.conv3(x)
+ >>> x = self.avg_pool(x)
+ >>> x = self.flatten(x)
+ >>> x = self.fc(x)
+ >>> return x
+
+ >>> model = ExampleModel()
+ >>> x = (3, 16, 16)
+ to print the complexity information state for each layer, you can use
+ >>> get_model_complexity_info(model, x)
+ or directly use
+ >>> print_model_with_flops(model, 4579784.0, 37361)
+ ExampleModel(
+ 0.037 M, 100.000% Params, 0.005 GFLOPs, 100.000% FLOPs,
+ (conv1): Conv2d(0.0 M, 0.600% Params, 0.0 GFLOPs, 0.959% FLOPs, 3, 8, kernel_size=(3, 3), stride=(1, 1)) # noqa: E501
+ (conv2): Conv2d(0.019 M, 50.020% Params, 0.003 GFLOPs, 58.760% FLOPs, 8, 256, kernel_size=(3, 3), stride=(1, 1))
+ (conv3): Conv2d(0.018 M, 49.356% Params, 0.002 GFLOPs, 40.264% FLOPs, 256, 8, kernel_size=(3, 3), stride=(1, 1))
+ (avg_pool): AdaptiveAvgPool2d(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.017% FLOPs, output_size=(1, 1))
+ (flatten): Flatten(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.000% FLOPs, )
+ (fc): Linear(0.0 M, 0.024% Params, 0.0 GFLOPs, 0.000% FLOPs, in_features=8, out_features=1, bias=True)
+ )
+ """
+
+ def accumulate_params(self):
+ if is_supported_instance(self):
+ return self.__params__
+ else:
+ sum = 0
+ for m in self.children():
+ sum += m.accumulate_params()
+ return sum
+
+ def accumulate_flops(self):
+ if is_supported_instance(self):
+ return self.__flops__ / model.__batch_counter__
+ else:
+ sum = 0
+ for m in self.children():
+ sum += m.accumulate_flops()
+ return sum
+
+ def flops_repr(self):
+ accumulated_num_params = self.accumulate_params()
+ accumulated_flops_cost = self.accumulate_flops()
+ return ', '.join([
+ params_to_string(
+ accumulated_num_params, units='M', precision=precision),
+ '{:.3%} Params'.format(accumulated_num_params / total_params),
+ flops_to_string(
+ accumulated_flops_cost, units=units, precision=precision),
+ '{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops),
+ self.original_extra_repr()
+ ])
+
+ def add_extra_repr(m):
+ m.accumulate_flops = accumulate_flops.__get__(m)
+ m.accumulate_params = accumulate_params.__get__(m)
+ flops_extra_repr = flops_repr.__get__(m)
+ if m.extra_repr != flops_extra_repr:
+ m.original_extra_repr = m.extra_repr
+ m.extra_repr = flops_extra_repr
+ assert m.extra_repr != m.original_extra_repr
+
+ def del_extra_repr(m):
+ if hasattr(m, 'original_extra_repr'):
+ m.extra_repr = m.original_extra_repr
+ del m.original_extra_repr
+ if hasattr(m, 'accumulate_flops'):
+ del m.accumulate_flops
+
+ model.apply(add_extra_repr)
+ print(model, file=ost, flush=flush)
+ model.apply(del_extra_repr)
+
+
+def get_model_parameters_number(model):
+ """Calculate parameter number of a model.
+
+ Args:
+ model (nn.module): The model for parameter number calculation.
+
+ Returns:
+ float: Parameter number of the model.
+ """
+ num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+ return num_params
+
+
+def add_flops_counting_methods(net_main_module):
+ # adding additional methods to the existing module object,
+ # this is done this way so that each function has access to self object
+ net_main_module.start_flops_count = start_flops_count.__get__(
+ net_main_module)
+ net_main_module.stop_flops_count = stop_flops_count.__get__(
+ net_main_module)
+ net_main_module.reset_flops_count = reset_flops_count.__get__(
+ net_main_module)
+ net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__( # noqa: E501
+ net_main_module)
+
+ net_main_module.reset_flops_count()
+
+ return net_main_module
+
+
+def compute_average_flops_cost(self):
+ """Compute average FLOPs cost.
+
+ A method to compute average FLOPs cost, which will be available after
+ `add_flops_counting_methods()` is called on a desired net object.
+
+ Returns:
+ float: Current mean flops consumption per image.
+ """
+ batches_count = self.__batch_counter__
+ flops_sum = 0
+ for module in self.modules():
+ if is_supported_instance(module):
+ flops_sum += module.__flops__
+ params_sum = get_model_parameters_number(self)
+ return flops_sum / batches_count, params_sum
+
+
+def start_flops_count(self):
+ """Activate the computation of mean flops consumption per image.
+
+ A method to activate the computation of mean flops consumption per image.
+ which will be available after ``add_flops_counting_methods()`` is called on
+ a desired net object. It should be called before running the network.
+ """
+ add_batch_counter_hook_function(self)
+
+ def add_flops_counter_hook_function(module):
+ if is_supported_instance(module):
+ if hasattr(module, '__flops_handle__'):
+ return
+
+ else:
+ handle = module.register_forward_hook(
+ get_modules_mapping()[type(module)])
+
+ module.__flops_handle__ = handle
+
+ self.apply(partial(add_flops_counter_hook_function))
+
+
+def stop_flops_count(self):
+ """Stop computing the mean flops consumption per image.
+
+ A method to stop computing the mean flops consumption per image, which will
+ be available after ``add_flops_counting_methods()`` is called on a desired
+ net object. It can be called to pause the computation whenever.
+ """
+ remove_batch_counter_hook_function(self)
+ self.apply(remove_flops_counter_hook_function)
+
+
+def reset_flops_count(self):
+ """Reset statistics computed so far.
+
+ A method to Reset computed statistics, which will be available after
+ `add_flops_counting_methods()` is called on a desired net object.
+ """
+ add_batch_counter_variables_or_reset(self)
+ self.apply(add_flops_counter_variable_or_reset)
+
+
+# ---- Internal functions
+def empty_flops_counter_hook(module, input, output):
+ module.__flops__ += 0
+
+
+def upsample_flops_counter_hook(module, input, output):
+ output_size = output[0]
+ batch_size = output_size.shape[0]
+ output_elements_count = batch_size
+ for val in output_size.shape[1:]:
+ output_elements_count *= val
+ module.__flops__ += int(output_elements_count)
+
+
+def relu_flops_counter_hook(module, input, output):
+ active_elements_count = output.numel()
+ module.__flops__ += int(active_elements_count)
+
+
+def linear_flops_counter_hook(module, input, output):
+ input = input[0]
+ output_last_dim = output.shape[
+ -1] # pytorch checks dimensions, so here we don't care much
+ module.__flops__ += int(np.prod(input.shape) * output_last_dim)
+
+
+def pool_flops_counter_hook(module, input, output):
+ input = input[0]
+ module.__flops__ += int(np.prod(input.shape))
+
+
+def norm_flops_counter_hook(module, input, output):
+ input = input[0]
+
+ batch_flops = np.prod(input.shape)
+ if (getattr(module, 'affine', False)
+ or getattr(module, 'elementwise_affine', False)):
+ batch_flops *= 2
+ module.__flops__ += int(batch_flops)
+
+
+def deconv_flops_counter_hook(conv_module, input, output):
+ # Can have multiple inputs, getting the first one
+ input = input[0]
+
+ batch_size = input.shape[0]
+ input_height, input_width = input.shape[2:]
+
+ kernel_height, kernel_width = conv_module.kernel_size
+ in_channels = conv_module.in_channels
+ out_channels = conv_module.out_channels
+ groups = conv_module.groups
+
+ filters_per_channel = out_channels // groups
+ conv_per_position_flops = (
+ kernel_height * kernel_width * in_channels * filters_per_channel)
+
+ active_elements_count = batch_size * input_height * input_width
+ overall_conv_flops = conv_per_position_flops * active_elements_count
+ bias_flops = 0
+ if conv_module.bias is not None:
+ output_height, output_width = output.shape[2:]
+ bias_flops = out_channels * batch_size * output_height * output_height
+ overall_flops = overall_conv_flops + bias_flops
+
+ conv_module.__flops__ += int(overall_flops)
+
+
+def conv_flops_counter_hook(conv_module, input, output):
+ # Can have multiple inputs, getting the first one
+ input = input[0]
+
+ batch_size = input.shape[0]
+ output_dims = list(output.shape[2:])
+
+ kernel_dims = list(conv_module.kernel_size)
+ in_channels = conv_module.in_channels
+ out_channels = conv_module.out_channels
+ groups = conv_module.groups
+
+ filters_per_channel = out_channels // groups
+ conv_per_position_flops = int(
+ np.prod(kernel_dims)) * in_channels * filters_per_channel
+
+ active_elements_count = batch_size * int(np.prod(output_dims))
+
+ overall_conv_flops = conv_per_position_flops * active_elements_count
+
+ bias_flops = 0
+
+ if conv_module.bias is not None:
+
+ bias_flops = out_channels * active_elements_count
+
+ overall_flops = overall_conv_flops + bias_flops
+
+ conv_module.__flops__ += int(overall_flops)
+
+
+def batch_counter_hook(module, input, output):
+ batch_size = 1
+ if len(input) > 0:
+ # Can have multiple inputs, getting the first one
+ input = input[0]
+ batch_size = len(input)
+ else:
+ warnings.warn('No positional inputs found for a module, '
+ 'assuming batch size is 1.')
+ module.__batch_counter__ += batch_size
+
+
+def add_batch_counter_variables_or_reset(module):
+
+ module.__batch_counter__ = 0
+
+
+def add_batch_counter_hook_function(module):
+ if hasattr(module, '__batch_counter_handle__'):
+ return
+
+ handle = module.register_forward_hook(batch_counter_hook)
+ module.__batch_counter_handle__ = handle
+
+
+def remove_batch_counter_hook_function(module):
+ if hasattr(module, '__batch_counter_handle__'):
+ module.__batch_counter_handle__.remove()
+ del module.__batch_counter_handle__
+
+
+def add_flops_counter_variable_or_reset(module):
+ if is_supported_instance(module):
+ if hasattr(module, '__flops__') or hasattr(module, '__params__'):
+ warnings.warn('variables __flops__ or __params__ are already '
+ 'defined for the module' + type(module).__name__ +
+ ' ptflops can affect your code!')
+ module.__flops__ = 0
+ module.__params__ = get_model_parameters_number(module)
+
+
+def is_supported_instance(module):
+ if type(module) in get_modules_mapping():
+ return True
+ return False
+
+
+def remove_flops_counter_hook_function(module):
+ if is_supported_instance(module):
+ if hasattr(module, '__flops_handle__'):
+ module.__flops_handle__.remove()
+ del module.__flops_handle__
+
+
+def get_modules_mapping():
+ return {
+ # convolutions
+ nn.Conv1d: conv_flops_counter_hook,
+ nn.Conv2d: conv_flops_counter_hook,
+ mmcv.cnn.bricks.Conv2d: conv_flops_counter_hook,
+ nn.Conv3d: conv_flops_counter_hook,
+ mmcv.cnn.bricks.Conv3d: conv_flops_counter_hook,
+ # activations
+ nn.ReLU: relu_flops_counter_hook,
+ nn.PReLU: relu_flops_counter_hook,
+ nn.ELU: relu_flops_counter_hook,
+ nn.LeakyReLU: relu_flops_counter_hook,
+ nn.ReLU6: relu_flops_counter_hook,
+ # poolings
+ nn.MaxPool1d: pool_flops_counter_hook,
+ nn.AvgPool1d: pool_flops_counter_hook,
+ nn.AvgPool2d: pool_flops_counter_hook,
+ nn.MaxPool2d: pool_flops_counter_hook,
+ mmcv.cnn.bricks.MaxPool2d: pool_flops_counter_hook,
+ nn.MaxPool3d: pool_flops_counter_hook,
+ mmcv.cnn.bricks.MaxPool3d: pool_flops_counter_hook,
+ nn.AvgPool3d: pool_flops_counter_hook,
+ nn.AdaptiveMaxPool1d: pool_flops_counter_hook,
+ nn.AdaptiveAvgPool1d: pool_flops_counter_hook,
+ nn.AdaptiveMaxPool2d: pool_flops_counter_hook,
+ nn.AdaptiveAvgPool2d: pool_flops_counter_hook,
+ nn.AdaptiveMaxPool3d: pool_flops_counter_hook,
+ nn.AdaptiveAvgPool3d: pool_flops_counter_hook,
+ # normalizations
+ nn.BatchNorm1d: norm_flops_counter_hook,
+ nn.BatchNorm2d: norm_flops_counter_hook,
+ nn.BatchNorm3d: norm_flops_counter_hook,
+ nn.GroupNorm: norm_flops_counter_hook,
+ nn.InstanceNorm1d: norm_flops_counter_hook,
+ nn.InstanceNorm2d: norm_flops_counter_hook,
+ nn.InstanceNorm3d: norm_flops_counter_hook,
+ nn.LayerNorm: norm_flops_counter_hook,
+ # FC
+ nn.Linear: linear_flops_counter_hook,
+ mmcv.cnn.bricks.Linear: linear_flops_counter_hook,
+ # Upscale
+ nn.Upsample: upsample_flops_counter_hook,
+ # Deconvolution
+ nn.ConvTranspose2d: deconv_flops_counter_hook,
+ mmcv.cnn.bricks.ConvTranspose2d: deconv_flops_counter_hook,
+ }
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/fuse_conv_bn.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/fuse_conv_bn.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa39d248bf40d093c598535c072e2cb3b2578976
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/fuse_conv_bn.py
@@ -0,0 +1,72 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import torch.nn as nn
+
+
+def _fuse_conv_bn(conv, bn):
+ """Fuse conv and bn into one module.
+
+ Args:
+ conv (nn.Module): Conv to be fused.
+ bn (nn.Module): BN to be fused.
+
+ Returns:
+ nn.Module: Fused module.
+ """
+ conv_w = conv.weight
+ conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
+ bn.running_mean)
+
+ factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
+ conv.weight = nn.Parameter(conv_w *
+ factor.reshape([conv.out_channels, 1, 1, 1]))
+ conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
+ return conv
+
+
+def fuse_conv_bn(module):
+ """Recursively fuse conv and bn in a module.
+
+ During inference, the functionary of batch norm layers is turned off
+ but only the mean and var alone channels are used, which exposes the
+ chance to fuse it with the preceding conv layers to save computations and
+ simplify network structures.
+
+ Args:
+ module (nn.Module): Module to be fused.
+
+ Returns:
+ nn.Module: Fused module.
+ """
+ last_conv = None
+ last_conv_name = None
+
+ for name, child in module.named_children():
+ if isinstance(child,
+ (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)):
+ if last_conv is None: # only fuse BN that is after Conv
+ continue
+ fused_conv = _fuse_conv_bn(last_conv, child)
+ module._modules[last_conv_name] = fused_conv
+ # To reduce changes, set BN as Identity instead of deleting it.
+ module._modules[name] = nn.Identity()
+ last_conv = None
+ elif isinstance(child, nn.Conv2d):
+ last_conv = child
+ last_conv_name = name
+ else:
+ fuse_conv_bn(child)
+ return module
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/sync_bn.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/sync_bn.py
new file mode 100644
index 0000000000000000000000000000000000000000..7cd54b89a146169bb67667b97354c5de8d2e8eaf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/sync_bn.py
@@ -0,0 +1,73 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+
+import mmcv
+
+
+class _BatchNormXd(torch.nn.modules.batchnorm._BatchNorm):
+ """A general BatchNorm layer without input dimension check.
+
+ Reproduced from @kapily's work:
+ (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547)
+ The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc
+ is `_check_input_dim` that is designed for tensor sanity checks.
+ The check has been bypassed in this class for the convenience of converting
+ SyncBatchNorm.
+ """
+
+ def _check_input_dim(self, input):
+ return
+
+
+def revert_sync_batchnorm(module):
+ """Helper function to convert all `SyncBatchNorm` (SyncBN) and
+ `mmcv.ops.sync_bn.SyncBatchNorm`(MMSyncBN) layers in the model to
+ `BatchNormXd` layers.
+
+ Adapted from @kapily's work:
+ (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547)
+
+ Args:
+ module (nn.Module): The module containing `SyncBatchNorm` layers.
+
+ Returns:
+ module_output: The converted module with `BatchNormXd` layers.
+ """
+ module_output = module
+ module_checklist = [torch.nn.modules.batchnorm.SyncBatchNorm]
+ if hasattr(mmcv, 'ops'):
+ module_checklist.append(mmcv.ops.SyncBatchNorm)
+ if isinstance(module, tuple(module_checklist)):
+ module_output = _BatchNormXd(module.num_features, module.eps,
+ module.momentum, module.affine,
+ module.track_running_stats)
+ if module.affine:
+ # no_grad() may not be needed here but
+ # just to be consistent with `convert_sync_batchnorm()`
+ with torch.no_grad():
+ module_output.weight = module.weight
+ module_output.bias = module.bias
+ module_output.running_mean = module.running_mean
+ module_output.running_var = module.running_var
+ module_output.num_batches_tracked = module.num_batches_tracked
+ module_output.training = module.training
+ # qconfig exists in quantized models
+ if hasattr(module, 'qconfig'):
+ module_output.qconfig = module.qconfig
+ for name, child in module.named_children():
+ module_output.add_module(name, revert_sync_batchnorm(child))
+ del module
+ return module_output
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/weight_init.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/weight_init.py
new file mode 100644
index 0000000000000000000000000000000000000000..51077db352fc3ed9de5f9373c3236da5b9645544
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/utils/weight_init.py
@@ -0,0 +1,698 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+import math
+import warnings
+
+import numpy as np
+import torch
+import torch.nn as nn
+from torch import Tensor
+
+from mmcv.utils import Registry, build_from_cfg, get_logger, print_log
+
+INITIALIZERS = Registry('initializer')
+
+
+def update_init_info(module, init_info):
+ """Update the `_params_init_info` in the module if the value of parameters
+ are changed.
+
+ Args:
+ module (obj:`nn.Module`): The module of PyTorch with a user-defined
+ attribute `_params_init_info` which records the initialization
+ information.
+ init_info (str): The string that describes the initialization.
+ """
+ assert hasattr(
+ module,
+ '_params_init_info'), f'Can not find `_params_init_info` in {module}'
+ for name, param in module.named_parameters():
+
+ assert param in module._params_init_info, (
+ f'Find a new :obj:`Parameter` '
+ f'named `{name}` during executing the '
+ f'`init_weights` of '
+ f'`{module.__class__.__name__}`. '
+ f'Please do not add or '
+ f'replace parameters during executing '
+ f'the `init_weights`. ')
+
+ # The parameter has been changed during executing the
+ # `init_weights` of module
+ mean_value = param.data.mean()
+ if module._params_init_info[param]['tmp_mean_value'] != mean_value:
+ module._params_init_info[param]['init_info'] = init_info
+ module._params_init_info[param]['tmp_mean_value'] = mean_value
+
+
+def constant_init(module, val, bias=0):
+ if hasattr(module, 'weight') and module.weight is not None:
+ nn.init.constant_(module.weight, val)
+ if hasattr(module, 'bias') and module.bias is not None:
+ nn.init.constant_(module.bias, bias)
+
+
+def xavier_init(module, gain=1, bias=0, distribution='normal'):
+ assert distribution in ['uniform', 'normal']
+ if hasattr(module, 'weight') and module.weight is not None:
+ if distribution == 'uniform':
+ nn.init.xavier_uniform_(module.weight, gain=gain)
+ else:
+ nn.init.xavier_normal_(module.weight, gain=gain)
+ if hasattr(module, 'bias') and module.bias is not None:
+ nn.init.constant_(module.bias, bias)
+
+
+def normal_init(module, mean=0, std=1, bias=0):
+ if hasattr(module, 'weight') and module.weight is not None:
+ nn.init.normal_(module.weight, mean, std)
+ if hasattr(module, 'bias') and module.bias is not None:
+ nn.init.constant_(module.bias, bias)
+
+
+def trunc_normal_init(module: nn.Module,
+ mean: float = 0,
+ std: float = 1,
+ a: float = -2,
+ b: float = 2,
+ bias: float = 0) -> None:
+ if hasattr(module, 'weight') and module.weight is not None:
+ trunc_normal_(module.weight, mean, std, a, b) # type: ignore
+ if hasattr(module, 'bias') and module.bias is not None:
+ nn.init.constant_(module.bias, bias) # type: ignore
+
+
+def uniform_init(module, a=0, b=1, bias=0):
+ if hasattr(module, 'weight') and module.weight is not None:
+ nn.init.uniform_(module.weight, a, b)
+ if hasattr(module, 'bias') and module.bias is not None:
+ nn.init.constant_(module.bias, bias)
+
+
+def kaiming_init(module,
+ a=0,
+ mode='fan_out',
+ nonlinearity='relu',
+ bias=0,
+ distribution='normal'):
+ assert distribution in ['uniform', 'normal']
+ if hasattr(module, 'weight') and module.weight is not None:
+ if distribution == 'uniform':
+ nn.init.kaiming_uniform_(
+ module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
+ else:
+ nn.init.kaiming_normal_(
+ module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
+ if hasattr(module, 'bias') and module.bias is not None:
+ nn.init.constant_(module.bias, bias)
+
+
+def caffe2_xavier_init(module, bias=0):
+ # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch
+ # Acknowledgment to FAIR's internal code
+ kaiming_init(
+ module,
+ a=1,
+ mode='fan_in',
+ nonlinearity='leaky_relu',
+ bias=bias,
+ distribution='uniform')
+
+
+def bias_init_with_prob(prior_prob):
+ """initialize conv/fc bias value according to a given probability value."""
+ bias_init = float(-np.log((1 - prior_prob) / prior_prob))
+ return bias_init
+
+
+def _get_bases_name(m):
+ return [b.__name__ for b in m.__class__.__bases__]
+
+
+class BaseInit(object):
+
+ def __init__(self, *, bias=0, bias_prob=None, layer=None):
+ self.wholemodule = False
+ if not isinstance(bias, (int, float)):
+ raise TypeError(f'bias must be a number, but got a {type(bias)}')
+
+ if bias_prob is not None:
+ if not isinstance(bias_prob, float):
+ raise TypeError(f'bias_prob type must be float, \
+ but got {type(bias_prob)}')
+
+ if layer is not None:
+ if not isinstance(layer, (str, list)):
+ raise TypeError(f'layer must be a str or a list of str, \
+ but got a {type(layer)}')
+ else:
+ layer = []
+
+ if bias_prob is not None:
+ self.bias = bias_init_with_prob(bias_prob)
+ else:
+ self.bias = bias
+ self.layer = [layer] if isinstance(layer, str) else layer
+
+ def _get_init_info(self):
+ info = f'{self.__class__.__name__}, bias={self.bias}'
+ return info
+
+
+@INITIALIZERS.register_module(name='Constant')
+class ConstantInit(BaseInit):
+ """Initialize module parameters with constant values.
+
+ Args:
+ val (int | float): the value to fill the weights in the module with
+ bias (int | float): the value to fill the bias. Defaults to 0.
+ bias_prob (float, optional): the probability for bias initialization.
+ Defaults to None.
+ layer (str | list[str], optional): the layer will be initialized.
+ Defaults to None.
+ """
+
+ def __init__(self, val, **kwargs):
+ super().__init__(**kwargs)
+ self.val = val
+
+ def __call__(self, module):
+
+ def init(m):
+ if self.wholemodule:
+ constant_init(m, self.val, self.bias)
+ else:
+ layername = m.__class__.__name__
+ basesname = _get_bases_name(m)
+ if len(set(self.layer) & set([layername] + basesname)):
+ constant_init(m, self.val, self.bias)
+
+ module.apply(init)
+ if hasattr(module, '_params_init_info'):
+ update_init_info(module, init_info=self._get_init_info())
+
+ def _get_init_info(self):
+ info = f'{self.__class__.__name__}: val={self.val}, bias={self.bias}'
+ return info
+
+
+@INITIALIZERS.register_module(name='Xavier')
+class XavierInit(BaseInit):
+ r"""Initialize module parameters with values according to the method
+ described in `Understanding the difficulty of training deep feedforward
+ neural networks - Glorot, X. & Bengio, Y. (2010).
+ `_
+
+ Args:
+ gain (int | float): an optional scaling factor. Defaults to 1.
+ bias (int | float): the value to fill the bias. Defaults to 0.
+ bias_prob (float, optional): the probability for bias initialization.
+ Defaults to None.
+ distribution (str): distribution either be ``'normal'``
+ or ``'uniform'``. Defaults to ``'normal'``.
+ layer (str | list[str], optional): the layer will be initialized.
+ Defaults to None.
+ """
+
+ def __init__(self, gain=1, distribution='normal', **kwargs):
+ super().__init__(**kwargs)
+ self.gain = gain
+ self.distribution = distribution
+
+ def __call__(self, module):
+
+ def init(m):
+ if self.wholemodule:
+ xavier_init(m, self.gain, self.bias, self.distribution)
+ else:
+ layername = m.__class__.__name__
+ basesname = _get_bases_name(m)
+ if len(set(self.layer) & set([layername] + basesname)):
+ xavier_init(m, self.gain, self.bias, self.distribution)
+
+ module.apply(init)
+ if hasattr(module, '_params_init_info'):
+ update_init_info(module, init_info=self._get_init_info())
+
+ def _get_init_info(self):
+ info = f'{self.__class__.__name__}: gain={self.gain}, ' \
+ f'distribution={self.distribution}, bias={self.bias}'
+ return info
+
+
+@INITIALIZERS.register_module(name='Normal')
+class NormalInit(BaseInit):
+ r"""Initialize module parameters with the values drawn from the normal
+ distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`.
+
+ Args:
+ mean (int | float):the mean of the normal distribution. Defaults to 0.
+ std (int | float): the standard deviation of the normal distribution.
+ Defaults to 1.
+ bias (int | float): the value to fill the bias. Defaults to 0.
+ bias_prob (float, optional): the probability for bias initialization.
+ Defaults to None.
+ layer (str | list[str], optional): the layer will be initialized.
+ Defaults to None.
+
+ """
+
+ def __init__(self, mean=0, std=1, **kwargs):
+ super().__init__(**kwargs)
+ self.mean = mean
+ self.std = std
+
+ def __call__(self, module):
+
+ def init(m):
+ if self.wholemodule:
+ normal_init(m, self.mean, self.std, self.bias)
+ else:
+ layername = m.__class__.__name__
+ basesname = _get_bases_name(m)
+ if len(set(self.layer) & set([layername] + basesname)):
+ normal_init(m, self.mean, self.std, self.bias)
+
+ module.apply(init)
+ if hasattr(module, '_params_init_info'):
+ update_init_info(module, init_info=self._get_init_info())
+
+ def _get_init_info(self):
+ info = f'{self.__class__.__name__}: mean={self.mean},' \
+ f' std={self.std}, bias={self.bias}'
+ return info
+
+
+@INITIALIZERS.register_module(name='TruncNormal')
+class TruncNormalInit(BaseInit):
+ r"""Initialize module parameters with the values drawn from the normal
+ distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` with values
+ outside :math:`[a, b]`.
+
+ Args:
+ mean (float): the mean of the normal distribution. Defaults to 0.
+ std (float): the standard deviation of the normal distribution.
+ Defaults to 1.
+ a (float): The minimum cutoff value.
+ b ( float): The maximum cutoff value.
+ bias (float): the value to fill the bias. Defaults to 0.
+ bias_prob (float, optional): the probability for bias initialization.
+ Defaults to None.
+ layer (str | list[str], optional): the layer will be initialized.
+ Defaults to None.
+
+ """
+
+ def __init__(self,
+ mean: float = 0,
+ std: float = 1,
+ a: float = -2,
+ b: float = 2,
+ **kwargs) -> None:
+ super().__init__(**kwargs)
+ self.mean = mean
+ self.std = std
+ self.a = a
+ self.b = b
+
+ def __call__(self, module: nn.Module) -> None:
+
+ def init(m):
+ if self.wholemodule:
+ trunc_normal_init(m, self.mean, self.std, self.a, self.b,
+ self.bias)
+ else:
+ layername = m.__class__.__name__
+ basesname = _get_bases_name(m)
+ if len(set(self.layer) & set([layername] + basesname)):
+ trunc_normal_init(m, self.mean, self.std, self.a, self.b,
+ self.bias)
+
+ module.apply(init)
+ if hasattr(module, '_params_init_info'):
+ update_init_info(module, init_info=self._get_init_info())
+
+ def _get_init_info(self):
+ info = f'{self.__class__.__name__}: a={self.a}, b={self.b},' \
+ f' mean={self.mean}, std={self.std}, bias={self.bias}'
+ return info
+
+
+@INITIALIZERS.register_module(name='Uniform')
+class UniformInit(BaseInit):
+ r"""Initialize module parameters with values drawn from the uniform
+ distribution :math:`\mathcal{U}(a, b)`.
+
+ Args:
+ a (int | float): the lower bound of the uniform distribution.
+ Defaults to 0.
+ b (int | float): the upper bound of the uniform distribution.
+ Defaults to 1.
+ bias (int | float): the value to fill the bias. Defaults to 0.
+ bias_prob (float, optional): the probability for bias initialization.
+ Defaults to None.
+ layer (str | list[str], optional): the layer will be initialized.
+ Defaults to None.
+ """
+
+ def __init__(self, a=0, b=1, **kwargs):
+ super().__init__(**kwargs)
+ self.a = a
+ self.b = b
+
+ def __call__(self, module):
+
+ def init(m):
+ if self.wholemodule:
+ uniform_init(m, self.a, self.b, self.bias)
+ else:
+ layername = m.__class__.__name__
+ basesname = _get_bases_name(m)
+ if len(set(self.layer) & set([layername] + basesname)):
+ uniform_init(m, self.a, self.b, self.bias)
+
+ module.apply(init)
+ if hasattr(module, '_params_init_info'):
+ update_init_info(module, init_info=self._get_init_info())
+
+ def _get_init_info(self):
+ info = f'{self.__class__.__name__}: a={self.a},' \
+ f' b={self.b}, bias={self.bias}'
+ return info
+
+
+@INITIALIZERS.register_module(name='Kaiming')
+class KaimingInit(BaseInit):
+ r"""Initialize module parameters with the values according to the method
+ described in `Delving deep into rectifiers: Surpassing human-level
+ performance on ImageNet classification - He, K. et al. (2015).
+ `_
+
+ Args:
+ a (int | float): the negative slope of the rectifier used after this
+ layer (only used with ``'leaky_relu'``). Defaults to 0.
+ mode (str): either ``'fan_in'`` or ``'fan_out'``. Choosing
+ ``'fan_in'`` preserves the magnitude of the variance of the weights
+ in the forward pass. Choosing ``'fan_out'`` preserves the
+ magnitudes in the backwards pass. Defaults to ``'fan_out'``.
+ nonlinearity (str): the non-linear function (`nn.functional` name),
+ recommended to use only with ``'relu'`` or ``'leaky_relu'`` .
+ Defaults to 'relu'.
+ bias (int | float): the value to fill the bias. Defaults to 0.
+ bias_prob (float, optional): the probability for bias initialization.
+ Defaults to None.
+ distribution (str): distribution either be ``'normal'`` or
+ ``'uniform'``. Defaults to ``'normal'``.
+ layer (str | list[str], optional): the layer will be initialized.
+ Defaults to None.
+ """
+
+ def __init__(self,
+ a=0,
+ mode='fan_out',
+ nonlinearity='relu',
+ distribution='normal',
+ **kwargs):
+ super().__init__(**kwargs)
+ self.a = a
+ self.mode = mode
+ self.nonlinearity = nonlinearity
+ self.distribution = distribution
+
+ def __call__(self, module):
+
+ def init(m):
+ if self.wholemodule:
+ kaiming_init(m, self.a, self.mode, self.nonlinearity,
+ self.bias, self.distribution)
+ else:
+ layername = m.__class__.__name__
+ basesname = _get_bases_name(m)
+ if len(set(self.layer) & set([layername] + basesname)):
+ kaiming_init(m, self.a, self.mode, self.nonlinearity,
+ self.bias, self.distribution)
+
+ module.apply(init)
+ if hasattr(module, '_params_init_info'):
+ update_init_info(module, init_info=self._get_init_info())
+
+ def _get_init_info(self):
+ info = f'{self.__class__.__name__}: a={self.a}, mode={self.mode}, ' \
+ f'nonlinearity={self.nonlinearity}, ' \
+ f'distribution ={self.distribution}, bias={self.bias}'
+ return info
+
+
+@INITIALIZERS.register_module(name='Caffe2Xavier')
+class Caffe2XavierInit(KaimingInit):
+ # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch
+ # Acknowledgment to FAIR's internal code
+ def __init__(self, **kwargs):
+ super().__init__(
+ a=1,
+ mode='fan_in',
+ nonlinearity='leaky_relu',
+ distribution='uniform',
+ **kwargs)
+
+ def __call__(self, module):
+ super().__call__(module)
+
+
+@INITIALIZERS.register_module(name='Pretrained')
+class PretrainedInit(object):
+ """Initialize module by loading a pretrained model.
+
+ Args:
+ checkpoint (str): the checkpoint file of the pretrained model should
+ be load.
+ prefix (str, optional): the prefix of a sub-module in the pretrained
+ model. it is for loading a part of the pretrained model to
+ initialize. For example, if we would like to only load the
+ backbone of a detector model, we can set ``prefix='backbone.'``.
+ Defaults to None.
+ map_location (str): map tensors into proper locations.
+ """
+
+ def __init__(self, checkpoint, prefix=None, map_location=None):
+ self.checkpoint = checkpoint
+ self.prefix = prefix
+ self.map_location = map_location
+
+ def __call__(self, module):
+ from mmcv.runner import (_load_checkpoint_with_prefix, load_checkpoint,
+ load_state_dict)
+ logger = get_logger('mmcv')
+ if self.prefix is None:
+ print_log(f'load model from: {self.checkpoint}', logger=logger)
+ load_checkpoint(
+ module,
+ self.checkpoint,
+ map_location=self.map_location,
+ strict=False,
+ logger=logger)
+ else:
+ print_log(
+ f'load {self.prefix} in model from: {self.checkpoint}',
+ logger=logger)
+ state_dict = _load_checkpoint_with_prefix(
+ self.prefix, self.checkpoint, map_location=self.map_location)
+ load_state_dict(module, state_dict, strict=False, logger=logger)
+
+ if hasattr(module, '_params_init_info'):
+ update_init_info(module, init_info=self._get_init_info())
+
+ def _get_init_info(self):
+ info = f'{self.__class__.__name__}: load from {self.checkpoint}'
+ return info
+
+
+def _initialize(module, cfg, wholemodule=False):
+ func = build_from_cfg(cfg, INITIALIZERS)
+ # wholemodule flag is for override mode, there is no layer key in override
+ # and initializer will give init values for the whole module with the name
+ # in override.
+ func.wholemodule = wholemodule
+ func(module)
+
+
+def _initialize_override(module, override, cfg):
+ if not isinstance(override, (dict, list)):
+ raise TypeError(f'override must be a dict or a list of dict, \
+ but got {type(override)}')
+
+ override = [override] if isinstance(override, dict) else override
+
+ for override_ in override:
+
+ cp_override = copy.deepcopy(override_)
+ name = cp_override.pop('name', None)
+ if name is None:
+ raise ValueError('`override` must contain the key "name",'
+ f'but got {cp_override}')
+ # if override only has name key, it means use args in init_cfg
+ if not cp_override:
+ cp_override.update(cfg)
+ # if override has name key and other args except type key, it will
+ # raise error
+ elif 'type' not in cp_override.keys():
+ raise ValueError(
+ f'`override` need "type" key, but got {cp_override}')
+
+ if hasattr(module, name):
+ _initialize(getattr(module, name), cp_override, wholemodule=True)
+ else:
+ raise RuntimeError(f'module did not have attribute {name}, '
+ f'but init_cfg is {cp_override}.')
+
+
+def initialize(module, init_cfg):
+ r"""Initialize a module.
+
+ Args:
+ module (``torch.nn.Module``): the module will be initialized.
+ init_cfg (dict | list[dict]): initialization configuration dict to
+ define initializer. OpenMMLab has implemented 6 initializers
+ including ``Constant``, ``Xavier``, ``Normal``, ``Uniform``,
+ ``Kaiming``, and ``Pretrained``.
+
+ Example:
+ >>> module = nn.Linear(2, 3, bias=True)
+ >>> init_cfg = dict(type='Constant', layer='Linear', val =1 , bias =2)
+ >>> initialize(module, init_cfg)
+
+ >>> module = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2))
+ >>> # define key ``'layer'`` for initializing layer with different
+ >>> # configuration
+ >>> init_cfg = [dict(type='Constant', layer='Conv1d', val=1),
+ dict(type='Constant', layer='Linear', val=2)]
+ >>> initialize(module, init_cfg)
+
+ >>> # define key``'override'`` to initialize some specific part in
+ >>> # module
+ >>> class FooNet(nn.Module):
+ >>> def __init__(self):
+ >>> super().__init__()
+ >>> self.feat = nn.Conv2d(3, 16, 3)
+ >>> self.reg = nn.Conv2d(16, 10, 3)
+ >>> self.cls = nn.Conv2d(16, 5, 3)
+ >>> model = FooNet()
+ >>> init_cfg = dict(type='Constant', val=1, bias=2, layer='Conv2d',
+ >>> override=dict(type='Constant', name='reg', val=3, bias=4))
+ >>> initialize(model, init_cfg)
+
+ >>> model = ResNet(depth=50)
+ >>> # Initialize weights with the pretrained model.
+ >>> init_cfg = dict(type='Pretrained',
+ checkpoint='torchvision://resnet50')
+ >>> initialize(model, init_cfg)
+
+ >>> # Initialize weights of a sub-module with the specific part of
+ >>> # a pretrained model by using "prefix".
+ >>> url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\
+ >>> 'retinanet_r50_fpn_1x_coco/'\
+ >>> 'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth'
+ >>> init_cfg = dict(type='Pretrained',
+ checkpoint=url, prefix='backbone.')
+ """
+ if not isinstance(init_cfg, (dict, list)):
+ raise TypeError(f'init_cfg must be a dict or a list of dict, \
+ but got {type(init_cfg)}')
+
+ if isinstance(init_cfg, dict):
+ init_cfg = [init_cfg]
+
+ for cfg in init_cfg:
+ # should deeply copy the original config because cfg may be used by
+ # other modules, e.g., one init_cfg shared by multiple bottleneck
+ # blocks, the expected cfg will be changed after pop and will change
+ # the initialization behavior of other modules
+ cp_cfg = copy.deepcopy(cfg)
+ override = cp_cfg.pop('override', None)
+ _initialize(module, cp_cfg)
+
+ if override is not None:
+ cp_cfg.pop('layer', None)
+ _initialize_override(module, override, cp_cfg)
+ else:
+ # All attributes in module have same initialization.
+ pass
+
+
+def _no_grad_trunc_normal_(tensor: Tensor, mean: float, std: float, a: float,
+ b: float) -> Tensor:
+ # Method based on
+ # https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+ # Modified from
+ # https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py
+ def norm_cdf(x):
+ # Computes standard normal cumulative distribution function
+ return (1. + math.erf(x / math.sqrt(2.))) / 2.
+
+ if (mean < a - 2 * std) or (mean > b + 2 * std):
+ warnings.warn(
+ 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. '
+ 'The distribution of values may be incorrect.',
+ stacklevel=2)
+
+ with torch.no_grad():
+ # Values are generated by using a truncated uniform distribution and
+ # then using the inverse CDF for the normal distribution.
+ # Get upper and lower cdf values
+ lower = norm_cdf((a - mean) / std)
+ upper = norm_cdf((b - mean) / std)
+
+ # Uniformly fill tensor with values from [lower, upper], then translate
+ # to [2lower-1, 2upper-1].
+ tensor.uniform_(2 * lower - 1, 2 * upper - 1)
+
+ # Use inverse cdf transform for normal distribution to get truncated
+ # standard normal
+ tensor.erfinv_()
+
+ # Transform to proper mean, std
+ tensor.mul_(std * math.sqrt(2.))
+ tensor.add_(mean)
+
+ # Clamp to ensure it's in the proper range
+ tensor.clamp_(min=a, max=b)
+ return tensor
+
+
+def trunc_normal_(tensor: Tensor,
+ mean: float = 0.,
+ std: float = 1.,
+ a: float = -2.,
+ b: float = 2.) -> Tensor:
+ r"""Fills the input Tensor with values drawn from a truncated
+ normal distribution. The values are effectively drawn from the
+ normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
+ with values outside :math:`[a, b]` redrawn until they are within
+ the bounds. The method used for generating the random values works
+ best when :math:`a \leq \text{mean} \leq b`.
+
+ Modified from
+ https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py
+
+ Args:
+ tensor (``torch.Tensor``): an n-dimensional `torch.Tensor`.
+ mean (float): the mean of the normal distribution.
+ std (float): the standard deviation of the normal distribution.
+ a (float): the minimum cutoff value.
+ b (float): the maximum cutoff value.
+ """
+ return _no_grad_trunc_normal_(tensor, mean, std, a, b)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/vgg.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/vgg.py
new file mode 100644
index 0000000000000000000000000000000000000000..55ee30d247e4a062eac0506ec67a11c41f16c38c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/cnn/vgg.py
@@ -0,0 +1,188 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+import torch.nn as nn
+
+from .utils import constant_init, kaiming_init, normal_init
+
+
+def conv3x3(in_planes, out_planes, dilation=1):
+ """3x3 convolution with padding."""
+ return nn.Conv2d(
+ in_planes,
+ out_planes,
+ kernel_size=3,
+ padding=dilation,
+ dilation=dilation)
+
+
+def make_vgg_layer(inplanes,
+ planes,
+ num_blocks,
+ dilation=1,
+ with_bn=False,
+ ceil_mode=False):
+ layers = []
+ for _ in range(num_blocks):
+ layers.append(conv3x3(inplanes, planes, dilation))
+ if with_bn:
+ layers.append(nn.BatchNorm2d(planes))
+ layers.append(nn.ReLU(inplace=True))
+ inplanes = planes
+ layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode))
+
+ return layers
+
+
+class VGG(nn.Module):
+ """VGG backbone.
+
+ Args:
+ depth (int): Depth of vgg, from {11, 13, 16, 19}.
+ with_bn (bool): Use BatchNorm or not.
+ num_classes (int): number of classes for classification.
+ num_stages (int): VGG stages, normally 5.
+ dilations (Sequence[int]): Dilation of each stage.
+ out_indices (Sequence[int]): Output from which stages.
+ frozen_stages (int): Stages to be frozen (all param fixed). -1 means
+ not freezing any parameters.
+ bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
+ running stats (mean and var).
+ bn_frozen (bool): Whether to freeze weight and bias of BN layers.
+ """
+
+ arch_settings = {
+ 11: (1, 1, 2, 2, 2),
+ 13: (2, 2, 2, 2, 2),
+ 16: (2, 2, 3, 3, 3),
+ 19: (2, 2, 4, 4, 4)
+ }
+
+ def __init__(self,
+ depth,
+ with_bn=False,
+ num_classes=-1,
+ num_stages=5,
+ dilations=(1, 1, 1, 1, 1),
+ out_indices=(0, 1, 2, 3, 4),
+ frozen_stages=-1,
+ bn_eval=True,
+ bn_frozen=False,
+ ceil_mode=False,
+ with_last_pool=True):
+ super(VGG, self).__init__()
+ if depth not in self.arch_settings:
+ raise KeyError(f'invalid depth {depth} for vgg')
+ assert num_stages >= 1 and num_stages <= 5
+ stage_blocks = self.arch_settings[depth]
+ self.stage_blocks = stage_blocks[:num_stages]
+ assert len(dilations) == num_stages
+ assert max(out_indices) <= num_stages
+
+ self.num_classes = num_classes
+ self.out_indices = out_indices
+ self.frozen_stages = frozen_stages
+ self.bn_eval = bn_eval
+ self.bn_frozen = bn_frozen
+
+ self.inplanes = 3
+ start_idx = 0
+ vgg_layers = []
+ self.range_sub_modules = []
+ for i, num_blocks in enumerate(self.stage_blocks):
+ num_modules = num_blocks * (2 + with_bn) + 1
+ end_idx = start_idx + num_modules
+ dilation = dilations[i]
+ planes = 64 * 2**i if i < 4 else 512
+ vgg_layer = make_vgg_layer(
+ self.inplanes,
+ planes,
+ num_blocks,
+ dilation=dilation,
+ with_bn=with_bn,
+ ceil_mode=ceil_mode)
+ vgg_layers.extend(vgg_layer)
+ self.inplanes = planes
+ self.range_sub_modules.append([start_idx, end_idx])
+ start_idx = end_idx
+ if not with_last_pool:
+ vgg_layers.pop(-1)
+ self.range_sub_modules[-1][1] -= 1
+ self.module_name = 'features'
+ self.add_module(self.module_name, nn.Sequential(*vgg_layers))
+
+ if self.num_classes > 0:
+ self.classifier = nn.Sequential(
+ nn.Linear(512 * 7 * 7, 4096),
+ nn.ReLU(True),
+ nn.Dropout(),
+ nn.Linear(4096, 4096),
+ nn.ReLU(True),
+ nn.Dropout(),
+ nn.Linear(4096, num_classes),
+ )
+
+ def init_weights(self, pretrained=None):
+ if isinstance(pretrained, str):
+ logger = logging.getLogger()
+ from ..runner import load_checkpoint
+ load_checkpoint(self, pretrained, strict=False, logger=logger)
+ elif pretrained is None:
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ kaiming_init(m)
+ elif isinstance(m, nn.BatchNorm2d):
+ constant_init(m, 1)
+ elif isinstance(m, nn.Linear):
+ normal_init(m, std=0.01)
+ else:
+ raise TypeError('pretrained must be a str or None')
+
+ def forward(self, x):
+ outs = []
+ vgg_layers = getattr(self, self.module_name)
+ for i in range(len(self.stage_blocks)):
+ for j in range(*self.range_sub_modules[i]):
+ vgg_layer = vgg_layers[j]
+ x = vgg_layer(x)
+ if i in self.out_indices:
+ outs.append(x)
+ if self.num_classes > 0:
+ x = x.view(x.size(0), -1)
+ x = self.classifier(x)
+ outs.append(x)
+ if len(outs) == 1:
+ return outs[0]
+ else:
+ return tuple(outs)
+
+ def train(self, mode=True):
+ super(VGG, self).train(mode)
+ if self.bn_eval:
+ for m in self.modules():
+ if isinstance(m, nn.BatchNorm2d):
+ m.eval()
+ if self.bn_frozen:
+ for params in m.parameters():
+ params.requires_grad = False
+ vgg_layers = getattr(self, self.module_name)
+ if mode and self.frozen_stages >= 0:
+ for i in range(self.frozen_stages):
+ for j in range(*self.range_sub_modules[i]):
+ mod = vgg_layers[j]
+ mod.eval()
+ for param in mod.parameters():
+ param.requires_grad = False
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/engine/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/engine/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0730e959a9298d679a3097736431e51094bcf408
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/engine/__init__.py
@@ -0,0 +1,21 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .test import (collect_results_cpu, collect_results_gpu, multi_gpu_test,
+ single_gpu_test)
+
+__all__ = [
+ 'collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test',
+ 'single_gpu_test'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/engine/test.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/engine/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..2eaf9fb4715011ce7687e6c4aa71dc099a1a3d0e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/engine/test.py
@@ -0,0 +1,215 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os.path as osp
+import pickle
+import shutil
+import tempfile
+import time
+
+import torch
+import torch.distributed as dist
+
+import mmcv
+from mmcv.runner import get_dist_info
+
+
+def single_gpu_test(model, data_loader):
+ """Test model with a single gpu.
+
+ This method tests model with a single gpu and displays test progress bar.
+
+ Args:
+ model (nn.Module): Model to be tested.
+ data_loader (nn.Dataloader): Pytorch data loader.
+
+ Returns:
+ list: The prediction results.
+ """
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ for data in data_loader:
+ with torch.no_grad():
+ result = model(return_loss=False, **data)
+ results.extend(result)
+
+ # Assume result has the same length of batch_size
+ # refer to https://github.com/open-mmlab/mmcv/issues/985
+ batch_size = len(result)
+ for _ in range(batch_size):
+ prog_bar.update()
+ return results
+
+
+def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+ """Test model with multiple gpus.
+
+ This method tests model with multiple gpus and collects the results
+ under two different modes: gpu and cpu modes. By setting
+ ``gpu_collect=True``, it encodes results to gpu tensors and use gpu
+ communication for results collection. On cpu mode it saves the results on
+ different gpus to ``tmpdir`` and collects them by the rank 0 worker.
+
+ Args:
+ model (nn.Module): Model to be tested.
+ data_loader (nn.Dataloader): Pytorch data loader.
+ tmpdir (str): Path of directory to save the temporary results from
+ different gpus under cpu mode.
+ gpu_collect (bool): Option to use either gpu or cpu to collect results.
+
+ Returns:
+ list: The prediction results.
+ """
+ model.eval()
+ results = []
+ dataset = data_loader.dataset
+ rank, world_size = get_dist_info()
+ if rank == 0:
+ prog_bar = mmcv.ProgressBar(len(dataset))
+ time.sleep(2) # This line can prevent deadlock problem in some cases.
+ for i, data in enumerate(data_loader):
+ with torch.no_grad():
+ result = model(return_loss=False, **data)
+ results.extend(result)
+
+ if rank == 0:
+ batch_size = len(result)
+ batch_size_all = batch_size * world_size
+ if batch_size_all + prog_bar.completed > len(dataset):
+ batch_size_all = len(dataset) - prog_bar.completed
+ for _ in range(batch_size_all):
+ prog_bar.update()
+
+ # collect results from all ranks
+ if gpu_collect:
+ results = collect_results_gpu(results, len(dataset))
+ else:
+ results = collect_results_cpu(results, len(dataset), tmpdir)
+ return results
+
+
+def collect_results_cpu(result_part, size, tmpdir=None):
+ """Collect results under cpu mode.
+
+ On cpu mode, this function will save the results on different gpus to
+ ``tmpdir`` and collect them by the rank 0 worker.
+
+ Args:
+ result_part (list): Result list containing result parts
+ to be collected.
+ size (int): Size of the results, commonly equal to length of
+ the results.
+ tmpdir (str | None): temporal directory for collected results to
+ store. If set to None, it will create a random temporal directory
+ for it.
+
+ Returns:
+ list: The collected results.
+ """
+ rank, world_size = get_dist_info()
+ # create a tmp dir if it is not specified
+ if tmpdir is None:
+ MAX_LEN = 512
+ # 32 is whitespace
+ dir_tensor = torch.full((MAX_LEN, ),
+ 32,
+ dtype=torch.uint8,
+ device='cuda')
+ if rank == 0:
+ mmcv.mkdir_or_exist('.dist_test')
+ tmpdir = tempfile.mkdtemp(dir='.dist_test')
+ tmpdir = torch.tensor(
+ bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
+ dir_tensor[:len(tmpdir)] = tmpdir
+ dist.broadcast(dir_tensor, 0)
+ tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+ else:
+ mmcv.mkdir_or_exist(tmpdir)
+ # dump the part result to the dir
+ mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
+ dist.barrier()
+ # collect all parts
+ if rank != 0:
+ return None
+ else:
+ # load results of all parts from tmp dir
+ part_list = []
+ for i in range(world_size):
+ part_file = osp.join(tmpdir, f'part_{i}.pkl')
+ part_result = mmcv.load(part_file)
+ # When data is severely insufficient, an empty part_result
+ # on a certain gpu could makes the overall outputs empty.
+ if part_result:
+ part_list.append(part_result)
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ # remove tmp dir
+ shutil.rmtree(tmpdir)
+ return ordered_results
+
+
+def collect_results_gpu(result_part, size):
+ """Collect results under gpu mode.
+
+ On gpu mode, this function will encode results to gpu tensors and use gpu
+ communication for results collection.
+
+ Args:
+ result_part (list): Result list containing result parts
+ to be collected.
+ size (int): Size of the results, commonly equal to length of
+ the results.
+
+ Returns:
+ list: The collected results.
+ """
+ rank, world_size = get_dist_info()
+ # dump result part to tensor with pickle
+ part_tensor = torch.tensor(
+ bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
+ # gather all result part tensor shape
+ shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
+ shape_list = [shape_tensor.clone() for _ in range(world_size)]
+ dist.all_gather(shape_list, shape_tensor)
+ # padding result part tensor to max length
+ shape_max = torch.tensor(shape_list).max()
+ part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
+ part_send[:shape_tensor[0]] = part_tensor
+ part_recv_list = [
+ part_tensor.new_zeros(shape_max) for _ in range(world_size)
+ ]
+ # gather all result part
+ dist.all_gather(part_recv_list, part_send)
+
+ if rank == 0:
+ part_list = []
+ for recv, shape in zip(part_recv_list, shape_list):
+ part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())
+ # When data is severely insufficient, an empty part_result
+ # on a certain gpu could makes the overall outputs empty.
+ if part_result:
+ part_list.append(part_result)
+ # sort the results
+ ordered_results = []
+ for res in zip(*part_list):
+ ordered_results.extend(list(res))
+ # the dataloader may pad some samples
+ ordered_results = ordered_results[:size]
+ return ordered_results
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..21a683db00d98219e9df878cb3511aa53181c5d1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/__init__.py
@@ -0,0 +1,24 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .file_client import BaseStorageBackend, FileClient
+from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler
+from .io import dump, load, register_handler
+from .parse import dict_from_file, list_from_file
+
+__all__ = [
+ 'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler',
+ 'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler',
+ 'list_from_file', 'dict_from_file'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/file_client.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/file_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd7036be73144b1163241a00a58d5d297ca980ad
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/file_client.py
@@ -0,0 +1,1162 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+import os
+import os.path as osp
+import re
+import tempfile
+import warnings
+from abc import ABCMeta, abstractmethod
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Iterable, Iterator, Optional, Tuple, Union
+from urllib.request import urlopen
+
+import mmcv
+from mmcv.utils.misc import has_method
+from mmcv.utils.path import is_filepath
+
+
+class BaseStorageBackend(metaclass=ABCMeta):
+ """Abstract class of storage backends.
+
+ All backends need to implement two apis: ``get()`` and ``get_text()``.
+ ``get()`` reads the file as a byte stream and ``get_text()`` reads the file
+ as texts.
+ """
+
+ # a flag to indicate whether the backend can create a symlink for a file
+ _allow_symlink = False
+
+ @property
+ def name(self):
+ return self.__class__.__name__
+
+ @property
+ def allow_symlink(self):
+ return self._allow_symlink
+
+ @abstractmethod
+ def get(self, filepath):
+ pass
+
+ @abstractmethod
+ def get_text(self, filepath):
+ pass
+
+
+class CephBackend(BaseStorageBackend):
+ """Ceph storage backend (for internal use).
+
+ Args:
+ path_mapping (dict|None): path mapping dict from local path to Petrel
+ path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath``
+ will be replaced by ``dst``. Default: None.
+
+ .. warning::
+ :class:`mmcv.fileio.file_client.CephBackend` will be deprecated,
+ please use :class:`mmcv.fileio.file_client.PetrelBackend` instead.
+ """
+
+ def __init__(self, path_mapping=None):
+ try:
+ import ceph
+ except ImportError:
+ raise ImportError('Please install ceph to enable CephBackend.')
+
+ warnings.warn(
+ 'CephBackend will be deprecated, please use PetrelBackend instead',
+ DeprecationWarning)
+ self._client = ceph.S3Client()
+ assert isinstance(path_mapping, dict) or path_mapping is None
+ self.path_mapping = path_mapping
+
+ def get(self, filepath):
+ filepath = str(filepath)
+ if self.path_mapping is not None:
+ for k, v in self.path_mapping.items():
+ filepath = filepath.replace(k, v)
+ value = self._client.Get(filepath)
+ value_buf = memoryview(value)
+ return value_buf
+
+ def get_text(self, filepath, encoding=None):
+ raise NotImplementedError
+
+
+class PetrelBackend(BaseStorageBackend):
+ """Petrel storage backend (for internal use).
+
+ PetrelBackend supports reading and writing data to multiple clusters.
+ If the file path contains the cluster name, PetrelBackend will read data
+ from specified cluster or write data to it. Otherwise, PetrelBackend will
+ access the default cluster.
+
+ Args:
+ path_mapping (dict, optional): Path mapping dict from local path to
+ Petrel path. When ``path_mapping={'src': 'dst'}``, ``src`` in
+ ``filepath`` will be replaced by ``dst``. Default: None.
+ enable_mc (bool, optional): Whether to enable memcached support.
+ Default: True.
+
+ Examples:
+ >>> filepath1 = 's3://path/of/file'
+ >>> filepath2 = 'cluster-name:s3://path/of/file'
+ >>> client = PetrelBackend()
+ >>> client.get(filepath1) # get data from default cluster
+ >>> client.get(filepath2) # get data from 'cluster-name' cluster
+ """
+
+ def __init__(self,
+ path_mapping: Optional[dict] = None,
+ enable_mc: bool = True):
+ try:
+ from petrel_client import client
+ except ImportError:
+ raise ImportError('Please install petrel_client to enable '
+ 'PetrelBackend.')
+
+ self._client = client.Client(enable_mc=enable_mc)
+ assert isinstance(path_mapping, dict) or path_mapping is None
+ self.path_mapping = path_mapping
+
+ def _map_path(self, filepath: Union[str, Path]) -> str:
+ """Map ``filepath`` to a string path whose prefix will be replaced by
+ :attr:`self.path_mapping`.
+
+ Args:
+ filepath (str): Path to be mapped.
+ """
+ filepath = str(filepath)
+ if self.path_mapping is not None:
+ for k, v in self.path_mapping.items():
+ filepath = filepath.replace(k, v)
+ return filepath
+
+ def _format_path(self, filepath: str) -> str:
+ """Convert a ``filepath`` to standard format of petrel oss.
+
+ If the ``filepath`` is concatenated by ``os.path.join``, in a Windows
+ environment, the ``filepath`` will be the format of
+ 's3://bucket_name\\image.jpg'. By invoking :meth:`_format_path`, the
+ above ``filepath`` will be converted to 's3://bucket_name/image.jpg'.
+
+ Args:
+ filepath (str): Path to be formatted.
+ """
+ return re.sub(r'\\+', '/', filepath)
+
+ def get(self, filepath: Union[str, Path]) -> memoryview:
+ """Read data from a given ``filepath`` with 'rb' mode.
+
+ Args:
+ filepath (str or Path): Path to read data.
+
+ Returns:
+ memoryview: A memory view of expected bytes object to avoid
+ copying. The memoryview object can be converted to bytes by
+ ``value_buf.tobytes()``.
+ """
+ filepath = self._map_path(filepath)
+ filepath = self._format_path(filepath)
+ value = self._client.Get(filepath)
+ value_buf = memoryview(value)
+ return value_buf
+
+ def get_text(self,
+ filepath: Union[str, Path],
+ encoding: str = 'utf-8') -> str:
+ """Read data from a given ``filepath`` with 'r' mode.
+
+ Args:
+ filepath (str or Path): Path to read data.
+ encoding (str): The encoding format used to open the ``filepath``.
+ Default: 'utf-8'.
+
+ Returns:
+ str: Expected text reading from ``filepath``.
+ """
+ return str(self.get(filepath), encoding=encoding)
+
+ def put(self, obj: bytes, filepath: Union[str, Path]) -> None:
+ """Save data to a given ``filepath``.
+
+ Args:
+ obj (bytes): Data to be saved.
+ filepath (str or Path): Path to write data.
+ """
+ filepath = self._map_path(filepath)
+ filepath = self._format_path(filepath)
+ self._client.put(filepath, obj)
+
+ def put_text(self,
+ obj: str,
+ filepath: Union[str, Path],
+ encoding: str = 'utf-8') -> None:
+ """Save data to a given ``filepath``.
+
+ Args:
+ obj (str): Data to be written.
+ filepath (str or Path): Path to write data.
+ encoding (str): The encoding format used to encode the ``obj``.
+ Default: 'utf-8'.
+ """
+ self.put(bytes(obj, encoding=encoding), filepath)
+
+ def remove(self, filepath: Union[str, Path]) -> None:
+ """Remove a file.
+
+ Args:
+ filepath (str or Path): Path to be removed.
+ """
+ if not has_method(self._client, 'delete'):
+ raise NotImplementedError(
+ ('Current version of Petrel Python SDK has not supported '
+ 'the `delete` method, please use a higher version or dev'
+ ' branch instead.'))
+
+ filepath = self._map_path(filepath)
+ filepath = self._format_path(filepath)
+ self._client.delete(filepath)
+
+ def exists(self, filepath: Union[str, Path]) -> bool:
+ """Check whether a file path exists.
+
+ Args:
+ filepath (str or Path): Path to be checked whether exists.
+
+ Returns:
+ bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise.
+ """
+ if not (has_method(self._client, 'contains')
+ and has_method(self._client, 'isdir')):
+ raise NotImplementedError(
+ ('Current version of Petrel Python SDK has not supported '
+ 'the `contains` and `isdir` methods, please use a higher'
+ 'version or dev branch instead.'))
+
+ filepath = self._map_path(filepath)
+ filepath = self._format_path(filepath)
+ return self._client.contains(filepath) or self._client.isdir(filepath)
+
+ def isdir(self, filepath: Union[str, Path]) -> bool:
+ """Check whether a file path is a directory.
+
+ Args:
+ filepath (str or Path): Path to be checked whether it is a
+ directory.
+
+ Returns:
+ bool: Return ``True`` if ``filepath`` points to a directory,
+ ``False`` otherwise.
+ """
+ if not has_method(self._client, 'isdir'):
+ raise NotImplementedError(
+ ('Current version of Petrel Python SDK has not supported '
+ 'the `isdir` method, please use a higher version or dev'
+ ' branch instead.'))
+
+ filepath = self._map_path(filepath)
+ filepath = self._format_path(filepath)
+ return self._client.isdir(filepath)
+
+ def isfile(self, filepath: Union[str, Path]) -> bool:
+ """Check whether a file path is a file.
+
+ Args:
+ filepath (str or Path): Path to be checked whether it is a file.
+
+ Returns:
+ bool: Return ``True`` if ``filepath`` points to a file, ``False``
+ otherwise.
+ """
+ if not has_method(self._client, 'contains'):
+ raise NotImplementedError(
+ ('Current version of Petrel Python SDK has not supported '
+ 'the `contains` method, please use a higher version or '
+ 'dev branch instead.'))
+
+ filepath = self._map_path(filepath)
+ filepath = self._format_path(filepath)
+ return self._client.contains(filepath)
+
+ def join_path(self, filepath: Union[str, Path],
+ *filepaths: Union[str, Path]) -> str:
+ """Concatenate all file paths.
+
+ Args:
+ filepath (str or Path): Path to be concatenated.
+
+ Returns:
+ str: The result after concatenation.
+ """
+ filepath = self._format_path(self._map_path(filepath))
+ if filepath.endswith('/'):
+ filepath = filepath[:-1]
+ formatted_paths = [filepath]
+ for path in filepaths:
+ formatted_paths.append(self._format_path(self._map_path(path)))
+ return '/'.join(formatted_paths)
+
+ @contextmanager
+ def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]:
+ """Download a file from ``filepath`` and return a temporary path.
+
+ ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
+ can be called with ``with`` statement, and when exists from the
+ ``with`` statement, the temporary path will be released.
+
+ Args:
+ filepath (str | Path): Download a file from ``filepath``.
+
+ Examples:
+ >>> client = PetrelBackend()
+ >>> # After existing from the ``with`` clause,
+ >>> # the path will be removed
+ >>> with client.get_local_path('s3://path/of/your/file') as path:
+ ... # do something here
+
+ Yields:
+ Iterable[str]: Only yield one temporary path.
+ """
+ filepath = self._map_path(filepath)
+ filepath = self._format_path(filepath)
+ assert self.isfile(filepath)
+ try:
+ f = tempfile.NamedTemporaryFile(delete=False)
+ f.write(self.get(filepath))
+ f.close()
+ yield f.name
+ finally:
+ os.remove(f.name)
+
+ def list_dir_or_file(self,
+ dir_path: Union[str, Path],
+ list_dir: bool = True,
+ list_file: bool = True,
+ suffix: Optional[Union[str, Tuple[str]]] = None,
+ recursive: bool = False) -> Iterator[str]:
+ """Scan a directory to find the interested directories or files in
+ arbitrary order.
+
+ Note:
+ Petrel has no concept of directories but it simulates the directory
+ hierarchy in the filesystem through public prefixes. In addition,
+ if the returned path ends with '/', it means the path is a public
+ prefix which is a logical directory.
+
+ Note:
+ :meth:`list_dir_or_file` returns the path relative to ``dir_path``.
+ In addition, the returned path of directory will not contains the
+ suffix '/' which is consistent with other backends.
+
+ Args:
+ dir_path (str | Path): Path of the directory.
+ list_dir (bool): List the directories. Default: True.
+ list_file (bool): List the path of files. Default: True.
+ suffix (str or tuple[str], optional): File suffix
+ that we are interested in. Default: None.
+ recursive (bool): If set to True, recursively scan the
+ directory. Default: False.
+
+ Yields:
+ Iterable[str]: A relative path to ``dir_path``.
+ """
+ if not has_method(self._client, 'list'):
+ raise NotImplementedError(
+ ('Current version of Petrel Python SDK has not supported '
+ 'the `list` method, please use a higher version or dev'
+ ' branch instead.'))
+
+ dir_path = self._map_path(dir_path)
+ dir_path = self._format_path(dir_path)
+ if list_dir and suffix is not None:
+ raise TypeError(
+ '`list_dir` should be False when `suffix` is not None')
+
+ if (suffix is not None) and not isinstance(suffix, (str, tuple)):
+ raise TypeError('`suffix` must be a string or tuple of strings')
+
+ # Petrel's simulated directory hierarchy assumes that directory paths
+ # should end with `/`
+ if not dir_path.endswith('/'):
+ dir_path += '/'
+
+ root = dir_path
+
+ def _list_dir_or_file(dir_path, list_dir, list_file, suffix,
+ recursive):
+ for path in self._client.list(dir_path):
+ # the `self.isdir` is not used here to determine whether path
+ # is a directory, because `self.isdir` relies on
+ # `self._client.list`
+ if path.endswith('/'): # a directory path
+ next_dir_path = self.join_path(dir_path, path)
+ if list_dir:
+ # get the relative path and exclude the last
+ # character '/'
+ rel_dir = next_dir_path[len(root):-1]
+ yield rel_dir
+ if recursive:
+ yield from _list_dir_or_file(next_dir_path, list_dir,
+ list_file, suffix,
+ recursive)
+ else: # a file path
+ absolute_path = self.join_path(dir_path, path)
+ rel_path = absolute_path[len(root):]
+ if (suffix is None
+ or rel_path.endswith(suffix)) and list_file:
+ yield rel_path
+
+ return _list_dir_or_file(dir_path, list_dir, list_file, suffix,
+ recursive)
+
+
+class MemcachedBackend(BaseStorageBackend):
+ """Memcached storage backend.
+
+ Attributes:
+ server_list_cfg (str): Config file for memcached server list.
+ client_cfg (str): Config file for memcached client.
+ sys_path (str | None): Additional path to be appended to `sys.path`.
+ Default: None.
+ """
+
+ def __init__(self, server_list_cfg, client_cfg, sys_path=None):
+ if sys_path is not None:
+ import sys
+ sys.path.append(sys_path)
+ try:
+ import mc
+ except ImportError:
+ raise ImportError(
+ 'Please install memcached to enable MemcachedBackend.')
+
+ self.server_list_cfg = server_list_cfg
+ self.client_cfg = client_cfg
+ self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg,
+ self.client_cfg)
+ # mc.pyvector servers as a point which points to a memory cache
+ self._mc_buffer = mc.pyvector()
+
+ def get(self, filepath):
+ filepath = str(filepath)
+ import mc
+ self._client.Get(filepath, self._mc_buffer)
+ value_buf = mc.ConvertBuffer(self._mc_buffer)
+ return value_buf
+
+ def get_text(self, filepath, encoding=None):
+ raise NotImplementedError
+
+
+class LmdbBackend(BaseStorageBackend):
+ """Lmdb storage backend.
+
+ Args:
+ db_path (str): Lmdb database path.
+ readonly (bool, optional): Lmdb environment parameter. If True,
+ disallow any write operations. Default: True.
+ lock (bool, optional): Lmdb environment parameter. If False, when
+ concurrent access occurs, do not lock the database. Default: False.
+ readahead (bool, optional): Lmdb environment parameter. If False,
+ disable the OS filesystem readahead mechanism, which may improve
+ random read performance when a database is larger than RAM.
+ Default: False.
+
+ Attributes:
+ db_path (str): Lmdb database path.
+ """
+
+ def __init__(self,
+ db_path,
+ readonly=True,
+ lock=False,
+ readahead=False,
+ **kwargs):
+ try:
+ import lmdb
+ except ImportError:
+ raise ImportError('Please install lmdb to enable LmdbBackend.')
+
+ self.db_path = str(db_path)
+ self._client = lmdb.open(
+ self.db_path,
+ readonly=readonly,
+ lock=lock,
+ readahead=readahead,
+ **kwargs)
+
+ def get(self, filepath):
+ """Get values according to the filepath.
+
+ Args:
+ filepath (str | obj:`Path`): Here, filepath is the lmdb key.
+ """
+ filepath = str(filepath)
+ with self._client.begin(write=False) as txn:
+ value_buf = txn.get(filepath.encode('ascii'))
+ return value_buf
+
+ def get_text(self, filepath, encoding=None):
+ raise NotImplementedError
+
+
+class HardDiskBackend(BaseStorageBackend):
+ """Raw hard disks storage backend."""
+
+ _allow_symlink = True
+
+ def get(self, filepath: Union[str, Path]) -> bytes:
+ """Read data from a given ``filepath`` with 'rb' mode.
+
+ Args:
+ filepath (str or Path): Path to read data.
+
+ Returns:
+ bytes: Expected bytes object.
+ """
+ with open(filepath, 'rb') as f:
+ value_buf = f.read()
+ return value_buf
+
+ def get_text(self,
+ filepath: Union[str, Path],
+ encoding: str = 'utf-8') -> str:
+ """Read data from a given ``filepath`` with 'r' mode.
+
+ Args:
+ filepath (str or Path): Path to read data.
+ encoding (str): The encoding format used to open the ``filepath``.
+ Default: 'utf-8'.
+
+ Returns:
+ str: Expected text reading from ``filepath``.
+ """
+ with open(filepath, 'r', encoding=encoding) as f:
+ value_buf = f.read()
+ return value_buf
+
+ def put(self, obj: bytes, filepath: Union[str, Path]) -> None:
+ """Write data to a given ``filepath`` with 'wb' mode.
+
+ Note:
+ ``put`` will create a directory if the directory of ``filepath``
+ does not exist.
+
+ Args:
+ obj (bytes): Data to be written.
+ filepath (str or Path): Path to write data.
+ """
+ mmcv.mkdir_or_exist(osp.dirname(filepath))
+ with open(filepath, 'wb') as f:
+ f.write(obj)
+
+ def put_text(self,
+ obj: str,
+ filepath: Union[str, Path],
+ encoding: str = 'utf-8') -> None:
+ """Write data to a given ``filepath`` with 'w' mode.
+
+ Note:
+ ``put_text`` will create a directory if the directory of
+ ``filepath`` does not exist.
+
+ Args:
+ obj (str): Data to be written.
+ filepath (str or Path): Path to write data.
+ encoding (str): The encoding format used to open the ``filepath``.
+ Default: 'utf-8'.
+ """
+ mmcv.mkdir_or_exist(osp.dirname(filepath))
+ with open(filepath, 'w', encoding=encoding) as f:
+ f.write(obj)
+
+ def remove(self, filepath: Union[str, Path]) -> None:
+ """Remove a file.
+
+ Args:
+ filepath (str or Path): Path to be removed.
+ """
+ os.remove(filepath)
+
+ def exists(self, filepath: Union[str, Path]) -> bool:
+ """Check whether a file path exists.
+
+ Args:
+ filepath (str or Path): Path to be checked whether exists.
+
+ Returns:
+ bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise.
+ """
+ return osp.exists(filepath)
+
+ def isdir(self, filepath: Union[str, Path]) -> bool:
+ """Check whether a file path is a directory.
+
+ Args:
+ filepath (str or Path): Path to be checked whether it is a
+ directory.
+
+ Returns:
+ bool: Return ``True`` if ``filepath`` points to a directory,
+ ``False`` otherwise.
+ """
+ return osp.isdir(filepath)
+
+ def isfile(self, filepath: Union[str, Path]) -> bool:
+ """Check whether a file path is a file.
+
+ Args:
+ filepath (str or Path): Path to be checked whether it is a file.
+
+ Returns:
+ bool: Return ``True`` if ``filepath`` points to a file, ``False``
+ otherwise.
+ """
+ return osp.isfile(filepath)
+
+ def join_path(self, filepath: Union[str, Path],
+ *filepaths: Union[str, Path]) -> str:
+ """Concatenate all file paths.
+
+ Join one or more filepath components intelligently. The return value
+ is the concatenation of filepath and any members of *filepaths.
+
+ Args:
+ filepath (str or Path): Path to be concatenated.
+
+ Returns:
+ str: The result of concatenation.
+ """
+ return osp.join(filepath, *filepaths)
+
+ @contextmanager
+ def get_local_path(
+ self, filepath: Union[str, Path]) -> Iterable[Union[str, Path]]:
+ """Only for unified API and do nothing."""
+ yield filepath
+
+ def list_dir_or_file(self,
+ dir_path: Union[str, Path],
+ list_dir: bool = True,
+ list_file: bool = True,
+ suffix: Optional[Union[str, Tuple[str]]] = None,
+ recursive: bool = False) -> Iterator[str]:
+ """Scan a directory to find the interested directories or files in
+ arbitrary order.
+
+ Note:
+ :meth:`list_dir_or_file` returns the path relative to ``dir_path``.
+
+ Args:
+ dir_path (str | Path): Path of the directory.
+ list_dir (bool): List the directories. Default: True.
+ list_file (bool): List the path of files. Default: True.
+ suffix (str or tuple[str], optional): File suffix
+ that we are interested in. Default: None.
+ recursive (bool): If set to True, recursively scan the
+ directory. Default: False.
+
+ Yields:
+ Iterable[str]: A relative path to ``dir_path``.
+ """
+ if list_dir and suffix is not None:
+ raise TypeError('`suffix` should be None when `list_dir` is True')
+
+ if (suffix is not None) and not isinstance(suffix, (str, tuple)):
+ raise TypeError('`suffix` must be a string or tuple of strings')
+
+ root = dir_path
+
+ def _list_dir_or_file(dir_path, list_dir, list_file, suffix,
+ recursive):
+ for entry in os.scandir(dir_path):
+ if not entry.name.startswith('.') and entry.is_file():
+ rel_path = osp.relpath(entry.path, root)
+ if (suffix is None
+ or rel_path.endswith(suffix)) and list_file:
+ yield rel_path
+ elif osp.isdir(entry.path):
+ if list_dir:
+ rel_dir = osp.relpath(entry.path, root)
+ yield rel_dir
+ if recursive:
+ yield from _list_dir_or_file(entry.path, list_dir,
+ list_file, suffix,
+ recursive)
+
+ return _list_dir_or_file(dir_path, list_dir, list_file, suffix,
+ recursive)
+
+
+class HTTPBackend(BaseStorageBackend):
+ """HTTP and HTTPS storage bachend."""
+
+ def get(self, filepath):
+ value_buf = urlopen(filepath).read()
+ return value_buf
+
+ def get_text(self, filepath, encoding='utf-8'):
+ value_buf = urlopen(filepath).read()
+ return value_buf.decode(encoding)
+
+ @contextmanager
+ def get_local_path(self, filepath: str) -> Iterable[str]:
+ """Download a file from ``filepath``.
+
+ ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
+ can be called with ``with`` statement, and when exists from the
+ ``with`` statement, the temporary path will be released.
+
+ Args:
+ filepath (str): Download a file from ``filepath``.
+
+ Examples:
+ >>> client = HTTPBackend()
+ >>> # After existing from the ``with`` clause,
+ >>> # the path will be removed
+ >>> with client.get_local_path('http://path/of/your/file') as path:
+ ... # do something here
+ """
+ try:
+ f = tempfile.NamedTemporaryFile(delete=False)
+ f.write(self.get(filepath))
+ f.close()
+ yield f.name
+ finally:
+ os.remove(f.name)
+
+
+class FileClient:
+ """A general file client to access files in different backends.
+
+ The client loads a file or text in a specified backend from its path
+ and returns it as a binary or text file. There are two ways to choose a
+ backend, the name of backend and the prefix of path. Although both of them
+ can be used to choose a storage backend, ``backend`` has a higher priority
+ that is if they are all set, the storage backend will be chosen by the
+ backend argument. If they are all `None`, the disk backend will be chosen.
+ Note that It can also register other backend accessor with a given name,
+ prefixes, and backend class. In addition, We use the singleton pattern to
+ avoid repeated object creation. If the arguments are the same, the same
+ object will be returned.
+
+ Args:
+ backend (str, optional): The storage backend type. Options are "disk",
+ "ceph", "memcached", "lmdb", "http" and "petrel". Default: None.
+ prefix (str, optional): The prefix of the registered storage backend.
+ Options are "s3", "http", "https". Default: None.
+
+ Examples:
+ >>> # only set backend
+ >>> file_client = FileClient(backend='petrel')
+ >>> # only set prefix
+ >>> file_client = FileClient(prefix='s3')
+ >>> # set both backend and prefix but use backend to choose client
+ >>> file_client = FileClient(backend='petrel', prefix='s3')
+ >>> # if the arguments are the same, the same object is returned
+ >>> file_client1 = FileClient(backend='petrel')
+ >>> file_client1 is file_client
+ True
+
+ Attributes:
+ client (:obj:`BaseStorageBackend`): The backend object.
+ """
+
+ _backends = {
+ 'disk': HardDiskBackend,
+ 'ceph': CephBackend,
+ 'memcached': MemcachedBackend,
+ 'lmdb': LmdbBackend,
+ 'petrel': PetrelBackend,
+ 'http': HTTPBackend,
+ }
+ # This collection is used to record the overridden backends, and when a
+ # backend appears in the collection, the singleton pattern is disabled for
+ # that backend, because if the singleton pattern is used, then the object
+ # returned will be the backend before overwriting
+ _overridden_backends = set()
+ _prefix_to_backends = {
+ 's3': PetrelBackend,
+ 'http': HTTPBackend,
+ 'https': HTTPBackend,
+ }
+ _overridden_prefixes = set()
+
+ _instances = {}
+
+ def __new__(cls, backend=None, prefix=None, **kwargs):
+ if backend is None and prefix is None:
+ backend = 'disk'
+ if backend is not None and backend not in cls._backends:
+ raise ValueError(
+ f'Backend {backend} is not supported. Currently supported ones'
+ f' are {list(cls._backends.keys())}')
+ if prefix is not None and prefix not in cls._prefix_to_backends:
+ raise ValueError(
+ f'prefix {prefix} is not supported. Currently supported ones '
+ f'are {list(cls._prefix_to_backends.keys())}')
+
+ # concatenate the arguments to a unique key for determining whether
+ # objects with the same arguments were created
+ arg_key = f'{backend}:{prefix}'
+ for key, value in kwargs.items():
+ arg_key += f':{key}:{value}'
+
+ # if a backend was overridden, it will create a new object
+ if (arg_key in cls._instances
+ and backend not in cls._overridden_backends
+ and prefix not in cls._overridden_prefixes):
+ _instance = cls._instances[arg_key]
+ else:
+ # create a new object and put it to _instance
+ _instance = super().__new__(cls)
+ if backend is not None:
+ _instance.client = cls._backends[backend](**kwargs)
+ else:
+ _instance.client = cls._prefix_to_backends[prefix](**kwargs)
+
+ cls._instances[arg_key] = _instance
+
+ return _instance
+
+ @property
+ def name(self):
+ return self.client.name
+
+ @property
+ def allow_symlink(self):
+ return self.client.allow_symlink
+
+ @staticmethod
+ def parse_uri_prefix(uri: Union[str, Path]) -> Optional[str]:
+ """Parse the prefix of a uri.
+
+ Args:
+ uri (str | Path): Uri to be parsed that contains the file prefix.
+
+ Examples:
+ >>> FileClient.parse_uri_prefix('s3://path/of/your/file')
+ 's3'
+
+ Returns:
+ str | None: Return the prefix of uri if the uri contains '://' else
+ ``None``.
+ """
+ assert is_filepath(uri)
+ uri = str(uri)
+ if '://' not in uri:
+ return None
+ else:
+ prefix, _ = uri.split('://')
+ # In the case of PetrelBackend, the prefix may contains the cluster
+ # name like clusterName:s3
+ if ':' in prefix:
+ _, prefix = prefix.split(':')
+ return prefix
+
+ @classmethod
+ def infer_client(cls,
+ file_client_args: Optional[dict] = None,
+ uri: Optional[Union[str, Path]] = None) -> 'FileClient':
+ """Infer a suitable file client based on the URI and arguments.
+
+ Args:
+ file_client_args (dict, optional): Arguments to instantiate a
+ FileClient. Default: None.
+ uri (str | Path, optional): Uri to be parsed that contains the file
+ prefix. Default: None.
+
+ Examples:
+ >>> uri = 's3://path/of/your/file'
+ >>> file_client = FileClient.infer_client(uri=uri)
+ >>> file_client_args = {'backend': 'petrel'}
+ >>> file_client = FileClient.infer_client(file_client_args)
+
+ Returns:
+ FileClient: Instantiated FileClient object.
+ """
+ assert file_client_args is not None or uri is not None
+ if file_client_args is None:
+ file_prefix = cls.parse_uri_prefix(uri) # type: ignore
+ return cls(prefix=file_prefix)
+ else:
+ return cls(**file_client_args)
+
+ @classmethod
+ def _register_backend(cls, name, backend, force=False, prefixes=None):
+ if not isinstance(name, str):
+ raise TypeError('the backend name should be a string, '
+ f'but got {type(name)}')
+ if not inspect.isclass(backend):
+ raise TypeError(
+ f'backend should be a class but got {type(backend)}')
+ if not issubclass(backend, BaseStorageBackend):
+ raise TypeError(
+ f'backend {backend} is not a subclass of BaseStorageBackend')
+ if not force and name in cls._backends:
+ raise KeyError(
+ f'{name} is already registered as a storage backend, '
+ 'add "force=True" if you want to override it')
+
+ if name in cls._backends and force:
+ cls._overridden_backends.add(name)
+ cls._backends[name] = backend
+
+ if prefixes is not None:
+ if isinstance(prefixes, str):
+ prefixes = [prefixes]
+ else:
+ assert isinstance(prefixes, (list, tuple))
+ for prefix in prefixes:
+ if prefix not in cls._prefix_to_backends:
+ cls._prefix_to_backends[prefix] = backend
+ elif (prefix in cls._prefix_to_backends) and force:
+ cls._overridden_prefixes.add(prefix)
+ cls._prefix_to_backends[prefix] = backend
+ else:
+ raise KeyError(
+ f'{prefix} is already registered as a storage backend,'
+ ' add "force=True" if you want to override it')
+
+ @classmethod
+ def register_backend(cls, name, backend=None, force=False, prefixes=None):
+ """Register a backend to FileClient.
+
+ This method can be used as a normal class method or a decorator.
+
+ .. code-block:: python
+
+ class NewBackend(BaseStorageBackend):
+
+ def get(self, filepath):
+ return filepath
+
+ def get_text(self, filepath):
+ return filepath
+
+ FileClient.register_backend('new', NewBackend)
+
+ or
+
+ .. code-block:: python
+
+ @FileClient.register_backend('new')
+ class NewBackend(BaseStorageBackend):
+
+ def get(self, filepath):
+ return filepath
+
+ def get_text(self, filepath):
+ return filepath
+
+ Args:
+ name (str): The name of the registered backend.
+ backend (class, optional): The backend class to be registered,
+ which must be a subclass of :class:`BaseStorageBackend`.
+ When this method is used as a decorator, backend is None.
+ Defaults to None.
+ force (bool, optional): Whether to override the backend if the name
+ has already been registered. Defaults to False.
+ prefixes (str or list[str] or tuple[str], optional): The prefixes
+ of the registered storage backend. Default: None.
+ `New in version 1.3.15.`
+ """
+ if backend is not None:
+ cls._register_backend(
+ name, backend, force=force, prefixes=prefixes)
+ return
+
+ def _register(backend_cls):
+ cls._register_backend(
+ name, backend_cls, force=force, prefixes=prefixes)
+ return backend_cls
+
+ return _register
+
+ def get(self, filepath: Union[str, Path]) -> Union[bytes, memoryview]:
+ """Read data from a given ``filepath`` with 'rb' mode.
+
+ Note:
+ There are two types of return values for ``get``, one is ``bytes``
+ and the other is ``memoryview``. The advantage of using memoryview
+ is that you can avoid copying, and if you want to convert it to
+ ``bytes``, you can use ``.tobytes()``.
+
+ Args:
+ filepath (str or Path): Path to read data.
+
+ Returns:
+ bytes | memoryview: Expected bytes object or a memory view of the
+ bytes object.
+ """
+ return self.client.get(filepath)
+
+ def get_text(self, filepath: Union[str, Path], encoding='utf-8') -> str:
+ """Read data from a given ``filepath`` with 'r' mode.
+
+ Args:
+ filepath (str or Path): Path to read data.
+ encoding (str): The encoding format used to open the ``filepath``.
+ Default: 'utf-8'.
+
+ Returns:
+ str: Expected text reading from ``filepath``.
+ """
+ return self.client.get_text(filepath, encoding)
+
+ def put(self, obj: bytes, filepath: Union[str, Path]) -> None:
+ """Write data to a given ``filepath`` with 'wb' mode.
+
+ Note:
+ ``put`` should create a directory if the directory of ``filepath``
+ does not exist.
+
+ Args:
+ obj (bytes): Data to be written.
+ filepath (str or Path): Path to write data.
+ """
+ self.client.put(obj, filepath)
+
+ def put_text(self, obj: str, filepath: Union[str, Path]) -> None:
+ """Write data to a given ``filepath`` with 'w' mode.
+
+ Note:
+ ``put_text`` should create a directory if the directory of
+ ``filepath`` does not exist.
+
+ Args:
+ obj (str): Data to be written.
+ filepath (str or Path): Path to write data.
+ encoding (str, optional): The encoding format used to open the
+ `filepath`. Default: 'utf-8'.
+ """
+ self.client.put_text(obj, filepath)
+
+ def remove(self, filepath: Union[str, Path]) -> None:
+ """Remove a file.
+
+ Args:
+ filepath (str, Path): Path to be removed.
+ """
+ self.client.remove(filepath)
+
+ def exists(self, filepath: Union[str, Path]) -> bool:
+ """Check whether a file path exists.
+
+ Args:
+ filepath (str or Path): Path to be checked whether exists.
+
+ Returns:
+ bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise.
+ """
+ return self.client.exists(filepath)
+
+ def isdir(self, filepath: Union[str, Path]) -> bool:
+ """Check whether a file path is a directory.
+
+ Args:
+ filepath (str or Path): Path to be checked whether it is a
+ directory.
+
+ Returns:
+ bool: Return ``True`` if ``filepath`` points to a directory,
+ ``False`` otherwise.
+ """
+ return self.client.isdir(filepath)
+
+ def isfile(self, filepath: Union[str, Path]) -> bool:
+ """Check whether a file path is a file.
+
+ Args:
+ filepath (str or Path): Path to be checked whether it is a file.
+
+ Returns:
+ bool: Return ``True`` if ``filepath`` points to a file, ``False``
+ otherwise.
+ """
+ return self.client.isfile(filepath)
+
+ def join_path(self, filepath: Union[str, Path],
+ *filepaths: Union[str, Path]) -> str:
+ """Concatenate all file paths.
+
+ Join one or more filepath components intelligently. The return value
+ is the concatenation of filepath and any members of *filepaths.
+
+ Args:
+ filepath (str or Path): Path to be concatenated.
+
+ Returns:
+ str: The result of concatenation.
+ """
+ return self.client.join_path(filepath, *filepaths)
+
+ @contextmanager
+ def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]:
+ """Download data from ``filepath`` and write the data to local path.
+
+ ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
+ can be called with ``with`` statement, and when exists from the
+ ``with`` statement, the temporary path will be released.
+
+ Note:
+ If the ``filepath`` is a local path, just return itself.
+
+ .. warning::
+ ``get_local_path`` is an experimental interface that may change in
+ the future.
+
+ Args:
+ filepath (str or Path): Path to be read data.
+
+ Examples:
+ >>> file_client = FileClient(prefix='s3')
+ >>> with file_client.get_local_path('s3://bucket/abc.jpg') as path:
+ ... # do something here
+
+ Yields:
+ Iterable[str]: Only yield one path.
+ """
+ with self.client.get_local_path(str(filepath)) as local_path:
+ yield local_path
+
+ def list_dir_or_file(self,
+ dir_path: Union[str, Path],
+ list_dir: bool = True,
+ list_file: bool = True,
+ suffix: Optional[Union[str, Tuple[str]]] = None,
+ recursive: bool = False) -> Iterator[str]:
+ """Scan a directory to find the interested directories or files in
+ arbitrary order.
+
+ Note:
+ :meth:`list_dir_or_file` returns the path relative to ``dir_path``.
+
+ Args:
+ dir_path (str | Path): Path of the directory.
+ list_dir (bool): List the directories. Default: True.
+ list_file (bool): List the path of files. Default: True.
+ suffix (str or tuple[str], optional): File suffix
+ that we are interested in. Default: None.
+ recursive (bool): If set to True, recursively scan the
+ directory. Default: False.
+
+ Yields:
+ Iterable[str]: A relative path to ``dir_path``.
+ """
+ yield from self.client.list_dir_or_file(dir_path, list_dir, list_file,
+ suffix, recursive)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b54d3630512558b2a8951a652b33d6a42127ae56
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/__init__.py
@@ -0,0 +1,20 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .base import BaseFileHandler
+from .json_handler import JsonHandler
+from .pickle_handler import PickleHandler
+from .yaml_handler import YamlHandler
+
+__all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/base.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..73c8a536a12da51eb66194d370ff0cb0156b8d50
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/base.py
@@ -0,0 +1,43 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABCMeta, abstractmethod
+
+
+class BaseFileHandler(metaclass=ABCMeta):
+ # `str_like` is a flag to indicate whether the type of file object is
+ # str-like object or bytes-like object. Pickle only processes bytes-like
+ # objects but json only processes str-like object. If it is str-like
+ # object, `StringIO` will be used to process the buffer.
+ str_like = True
+
+ @abstractmethod
+ def load_from_fileobj(self, file, **kwargs):
+ pass
+
+ @abstractmethod
+ def dump_to_fileobj(self, obj, file, **kwargs):
+ pass
+
+ @abstractmethod
+ def dump_to_str(self, obj, **kwargs):
+ pass
+
+ def load_from_path(self, filepath, mode='r', **kwargs):
+ with open(filepath, mode) as f:
+ return self.load_from_fileobj(f, **kwargs)
+
+ def dump_to_path(self, obj, filepath, mode='w', **kwargs):
+ with open(filepath, mode) as f:
+ self.dump_to_fileobj(obj, f, **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/json_handler.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/json_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..713ba0db5633f9e0f4099527aa479dd939b2deb0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/json_handler.py
@@ -0,0 +1,49 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+
+import numpy as np
+
+from .base import BaseFileHandler
+
+
+def set_default(obj):
+ """Set default json values for non-serializable values.
+
+ It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list.
+ It also converts ``np.generic`` (including ``np.int32``, ``np.float32``,
+ etc.) into plain numbers of plain python built-in types.
+ """
+ if isinstance(obj, (set, range)):
+ return list(obj)
+ elif isinstance(obj, np.ndarray):
+ return obj.tolist()
+ elif isinstance(obj, np.generic):
+ return obj.item()
+ raise TypeError(f'{type(obj)} is unsupported for json dump')
+
+
+class JsonHandler(BaseFileHandler):
+
+ def load_from_fileobj(self, file):
+ return json.load(file)
+
+ def dump_to_fileobj(self, obj, file, **kwargs):
+ kwargs.setdefault('default', set_default)
+ json.dump(obj, file, **kwargs)
+
+ def dump_to_str(self, obj, **kwargs):
+ kwargs.setdefault('default', set_default)
+ return json.dumps(obj, **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/pickle_handler.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/pickle_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..509885247ed3f3203dd9caf0544e18fd6c96664d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/pickle_handler.py
@@ -0,0 +1,41 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pickle
+
+from .base import BaseFileHandler
+
+
+class PickleHandler(BaseFileHandler):
+
+ str_like = False
+
+ def load_from_fileobj(self, file, **kwargs):
+ return pickle.load(file, **kwargs)
+
+ def load_from_path(self, filepath, **kwargs):
+ return super(PickleHandler, self).load_from_path(
+ filepath, mode='rb', **kwargs)
+
+ def dump_to_str(self, obj, **kwargs):
+ kwargs.setdefault('protocol', 2)
+ return pickle.dumps(obj, **kwargs)
+
+ def dump_to_fileobj(self, obj, file, **kwargs):
+ kwargs.setdefault('protocol', 2)
+ pickle.dump(obj, file, **kwargs)
+
+ def dump_to_path(self, obj, filepath, **kwargs):
+ super(PickleHandler, self).dump_to_path(
+ obj, filepath, mode='wb', **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/yaml_handler.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/yaml_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d2aacb93549d18797f3d081420fffd3fcd9a4e0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/handlers/yaml_handler.py
@@ -0,0 +1,37 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import yaml
+
+try:
+ from yaml import CLoader as Loader, CDumper as Dumper
+except ImportError:
+ from yaml import Loader, Dumper
+
+from .base import BaseFileHandler # isort:skip
+
+
+class YamlHandler(BaseFileHandler):
+
+ def load_from_fileobj(self, file, **kwargs):
+ kwargs.setdefault('Loader', Loader)
+ return yaml.load(file, **kwargs)
+
+ def dump_to_fileobj(self, obj, file, **kwargs):
+ kwargs.setdefault('Dumper', Dumper)
+ yaml.dump(obj, file, **kwargs)
+
+ def dump_to_str(self, obj, **kwargs):
+ kwargs.setdefault('Dumper', Dumper)
+ return yaml.dump(obj, **kwargs)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/io.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/io.py
new file mode 100644
index 0000000000000000000000000000000000000000..7536d1839bcd3ade037aaf07be3623ff14cac19d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/io.py
@@ -0,0 +1,164 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from io import BytesIO, StringIO
+from pathlib import Path
+
+from ..utils import is_list_of, is_str
+from .file_client import FileClient
+from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler
+
+file_handlers = {
+ 'json': JsonHandler(),
+ 'yaml': YamlHandler(),
+ 'yml': YamlHandler(),
+ 'pickle': PickleHandler(),
+ 'pkl': PickleHandler()
+}
+
+
+def load(file, file_format=None, file_client_args=None, **kwargs):
+ """Load data from json/yaml/pickle files.
+
+ This method provides a unified api for loading data from serialized files.
+
+ Note:
+ In v1.3.16 and later, ``load`` supports loading data from serialized
+ files those can be storaged in different backends.
+
+ Args:
+ file (str or :obj:`Path` or file-like object): Filename or a file-like
+ object.
+ file_format (str, optional): If not specified, the file format will be
+ inferred from the file extension, otherwise use the specified one.
+ Currently supported formats include "json", "yaml/yml" and
+ "pickle/pkl".
+ file_client_args (dict, optional): Arguments to instantiate a
+ FileClient. See :class:`mmcv.fileio.FileClient` for details.
+ Default: None.
+
+ Examples:
+ >>> load('/path/of/your/file') # file is storaged in disk
+ >>> load('https://path/of/your/file') # file is storaged in Internet
+ >>> load('s3://path/of/your/file') # file is storaged in petrel
+
+ Returns:
+ The content from the file.
+ """
+ if isinstance(file, Path):
+ file = str(file)
+ if file_format is None and is_str(file):
+ file_format = file.split('.')[-1]
+ if file_format not in file_handlers:
+ raise TypeError(f'Unsupported format: {file_format}')
+
+ handler = file_handlers[file_format]
+ if is_str(file):
+ file_client = FileClient.infer_client(file_client_args, file)
+ if handler.str_like:
+ with StringIO(file_client.get_text(file)) as f:
+ obj = handler.load_from_fileobj(f, **kwargs)
+ else:
+ with BytesIO(file_client.get(file)) as f:
+ obj = handler.load_from_fileobj(f, **kwargs)
+ elif hasattr(file, 'read'):
+ obj = handler.load_from_fileobj(file, **kwargs)
+ else:
+ raise TypeError('"file" must be a filepath str or a file-object')
+ return obj
+
+
+def dump(obj, file=None, file_format=None, file_client_args=None, **kwargs):
+ """Dump data to json/yaml/pickle strings or files.
+
+ This method provides a unified api for dumping data as strings or to files,
+ and also supports custom arguments for each file format.
+
+ Note:
+ In v1.3.16 and later, ``dump`` supports dumping data as strings or to
+ files which is saved to different backends.
+
+ Args:
+ obj (any): The python object to be dumped.
+ file (str or :obj:`Path` or file-like object, optional): If not
+ specified, then the object is dumped to a str, otherwise to a file
+ specified by the filename or file-like object.
+ file_format (str, optional): Same as :func:`load`.
+ file_client_args (dict, optional): Arguments to instantiate a
+ FileClient. See :class:`mmcv.fileio.FileClient` for details.
+ Default: None.
+
+ Examples:
+ >>> dump('hello world', '/path/of/your/file') # disk
+ >>> dump('hello world', 's3://path/of/your/file') # ceph or petrel
+
+ Returns:
+ bool: True for success, False otherwise.
+ """
+ if isinstance(file, Path):
+ file = str(file)
+ if file_format is None:
+ if is_str(file):
+ file_format = file.split('.')[-1]
+ elif file is None:
+ raise ValueError(
+ 'file_format must be specified since file is None')
+ if file_format not in file_handlers:
+ raise TypeError(f'Unsupported format: {file_format}')
+
+ handler = file_handlers[file_format]
+ if file is None:
+ return handler.dump_to_str(obj, **kwargs)
+ elif is_str(file):
+ file_client = FileClient.infer_client(file_client_args, file)
+ if handler.str_like:
+ with StringIO() as f:
+ handler.dump_to_fileobj(obj, f, **kwargs)
+ file_client.put_text(f.getvalue(), file)
+ else:
+ with BytesIO() as f:
+ handler.dump_to_fileobj(obj, f, **kwargs)
+ file_client.put(f.getvalue(), file)
+ elif hasattr(file, 'write'):
+ handler.dump_to_fileobj(obj, file, **kwargs)
+ else:
+ raise TypeError('"file" must be a filename str or a file-object')
+
+
+def _register_handler(handler, file_formats):
+ """Register a handler for some file extensions.
+
+ Args:
+ handler (:obj:`BaseFileHandler`): Handler to be registered.
+ file_formats (str or list[str]): File formats to be handled by this
+ handler.
+ """
+ if not isinstance(handler, BaseFileHandler):
+ raise TypeError(
+ f'handler must be a child of BaseFileHandler, not {type(handler)}')
+ if isinstance(file_formats, str):
+ file_formats = [file_formats]
+ if not is_list_of(file_formats, str):
+ raise TypeError('file_formats must be a str or a list of str')
+ for ext in file_formats:
+ file_handlers[ext] = handler
+
+
+def register_handler(file_formats, **kwargs):
+
+ def wrap(cls):
+ _register_handler(cls(**kwargs), file_formats)
+ return cls
+
+ return wrap
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/parse.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/parse.py
new file mode 100644
index 0000000000000000000000000000000000000000..487ee19563bf170a722b28f47209ec1b4bb02872
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/fileio/parse.py
@@ -0,0 +1,110 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from io import StringIO
+
+from .file_client import FileClient
+
+
+def list_from_file(filename,
+ prefix='',
+ offset=0,
+ max_num=0,
+ encoding='utf-8',
+ file_client_args=None):
+ """Load a text file and parse the content as a list of strings.
+
+ Note:
+ In v1.3.16 and later, ``list_from_file`` supports loading a text file
+ which can be storaged in different backends and parsing the content as
+ a list for strings.
+
+ Args:
+ filename (str): Filename.
+ prefix (str): The prefix to be inserted to the beginning of each item.
+ offset (int): The offset of lines.
+ max_num (int): The maximum number of lines to be read,
+ zeros and negatives mean no limitation.
+ encoding (str): Encoding used to open the file. Default utf-8.
+ file_client_args (dict, optional): Arguments to instantiate a
+ FileClient. See :class:`mmcv.fileio.FileClient` for details.
+ Default: None.
+
+ Examples:
+ >>> list_from_file('/path/of/your/file') # disk
+ ['hello', 'world']
+ >>> list_from_file('s3://path/of/your/file') # ceph or petrel
+ ['hello', 'world']
+
+ Returns:
+ list[str]: A list of strings.
+ """
+ cnt = 0
+ item_list = []
+ file_client = FileClient.infer_client(file_client_args, filename)
+ with StringIO(file_client.get_text(filename, encoding)) as f:
+ for _ in range(offset):
+ f.readline()
+ for line in f:
+ if 0 < max_num <= cnt:
+ break
+ item_list.append(prefix + line.rstrip('\n\r'))
+ cnt += 1
+ return item_list
+
+
+def dict_from_file(filename,
+ key_type=str,
+ encoding='utf-8',
+ file_client_args=None):
+ """Load a text file and parse the content as a dict.
+
+ Each line of the text file will be two or more columns split by
+ whitespaces or tabs. The first column will be parsed as dict keys, and
+ the following columns will be parsed as dict values.
+
+ Note:
+ In v1.3.16 and later, ``dict_from_file`` supports loading a text file
+ which can be storaged in different backends and parsing the content as
+ a dict.
+
+ Args:
+ filename(str): Filename.
+ key_type(type): Type of the dict keys. str is user by default and
+ type conversion will be performed if specified.
+ encoding (str): Encoding used to open the file. Default utf-8.
+ file_client_args (dict, optional): Arguments to instantiate a
+ FileClient. See :class:`mmcv.fileio.FileClient` for details.
+ Default: None.
+
+ Examples:
+ >>> dict_from_file('/path/of/your/file') # disk
+ {'key1': 'value1', 'key2': 'value2'}
+ >>> dict_from_file('s3://path/of/your/file') # ceph or petrel
+ {'key1': 'value1', 'key2': 'value2'}
+
+ Returns:
+ dict: The parsed contents.
+ """
+ mapping = {}
+ file_client = FileClient.infer_client(file_client_args, filename)
+ with StringIO(file_client.get_text(filename, encoding)) as f:
+ for line in f:
+ items = line.rstrip('\n').split()
+ assert len(items) >= 2
+ key = key_type(items[0])
+ val = items[1:] if len(items) > 2 else items[1]
+ mapping[key] = val
+ return mapping
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8faf6456e7ac9f17ae511df7efe870d475b75b4f
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/__init__.py
@@ -0,0 +1,41 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .colorspace import (bgr2gray, bgr2hls, bgr2hsv, bgr2rgb, bgr2ycbcr,
+ gray2bgr, gray2rgb, hls2bgr, hsv2bgr, imconvert,
+ rgb2bgr, rgb2gray, rgb2ycbcr, ycbcr2bgr, ycbcr2rgb)
+from .geometric import (cutout, imcrop, imflip, imflip_, impad,
+ impad_to_multiple, imrescale, imresize, imresize_like,
+ imresize_to_multiple, imrotate, imshear, imtranslate,
+ rescale_size)
+from .io import imfrombytes, imread, imwrite, supported_backends, use_backend
+from .misc import tensor2imgs
+from .photometric import (adjust_brightness, adjust_color, adjust_contrast,
+ adjust_lighting, adjust_sharpness, auto_contrast,
+ clahe, imdenormalize, imequalize, iminvert,
+ imnormalize, imnormalize_, lut_transform, posterize,
+ solarize)
+
+__all__ = [
+ 'bgr2gray', 'bgr2hls', 'bgr2hsv', 'bgr2rgb', 'gray2bgr', 'gray2rgb',
+ 'hls2bgr', 'hsv2bgr', 'imconvert', 'rgb2bgr', 'rgb2gray', 'imrescale',
+ 'imresize', 'imresize_like', 'imresize_to_multiple', 'rescale_size',
+ 'imcrop', 'imflip', 'imflip_', 'impad', 'impad_to_multiple', 'imrotate',
+ 'imfrombytes', 'imread', 'imwrite', 'supported_backends', 'use_backend',
+ 'imdenormalize', 'imnormalize', 'imnormalize_', 'iminvert', 'posterize',
+ 'solarize', 'rgb2ycbcr', 'bgr2ycbcr', 'ycbcr2rgb', 'ycbcr2bgr',
+ 'tensor2imgs', 'imshear', 'imtranslate', 'adjust_color', 'imequalize',
+ 'adjust_brightness', 'adjust_contrast', 'lut_transform', 'clahe',
+ 'adjust_sharpness', 'auto_contrast', 'cutout', 'adjust_lighting'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/colorspace.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/colorspace.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1289252008d1ebb41551998d5fc590ea7e57ce6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/colorspace.py
@@ -0,0 +1,319 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cv2
+import numpy as np
+
+
+def imconvert(img, src, dst):
+ """Convert an image from the src colorspace to dst colorspace.
+
+ Args:
+ img (ndarray): The input image.
+ src (str): The source colorspace, e.g., 'rgb', 'hsv'.
+ dst (str): The destination colorspace, e.g., 'rgb', 'hsv'.
+
+ Returns:
+ ndarray: The converted image.
+ """
+ code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
+ out_img = cv2.cvtColor(img, code)
+ return out_img
+
+
+def bgr2gray(img, keepdim=False):
+ """Convert a BGR image to grayscale image.
+
+ Args:
+ img (ndarray): The input image.
+ keepdim (bool): If False (by default), then return the grayscale image
+ with 2 dims, otherwise 3 dims.
+
+ Returns:
+ ndarray: The converted grayscale image.
+ """
+ out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ if keepdim:
+ out_img = out_img[..., None]
+ return out_img
+
+
+def rgb2gray(img, keepdim=False):
+ """Convert a RGB image to grayscale image.
+
+ Args:
+ img (ndarray): The input image.
+ keepdim (bool): If False (by default), then return the grayscale image
+ with 2 dims, otherwise 3 dims.
+
+ Returns:
+ ndarray: The converted grayscale image.
+ """
+ out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+ if keepdim:
+ out_img = out_img[..., None]
+ return out_img
+
+
+def gray2bgr(img):
+ """Convert a grayscale image to BGR image.
+
+ Args:
+ img (ndarray): The input image.
+
+ Returns:
+ ndarray: The converted BGR image.
+ """
+ img = img[..., None] if img.ndim == 2 else img
+ out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+ return out_img
+
+
+def gray2rgb(img):
+ """Convert a grayscale image to RGB image.
+
+ Args:
+ img (ndarray): The input image.
+
+ Returns:
+ ndarray: The converted RGB image.
+ """
+ img = img[..., None] if img.ndim == 2 else img
+ out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+ return out_img
+
+
+def _convert_input_type_range(img):
+ """Convert the type and range of the input image.
+
+ It converts the input image to np.float32 type and range of [0, 1].
+ It is mainly used for pre-processing the input image in colorspace
+ conversion functions such as rgb2ycbcr and ycbcr2rgb.
+
+ Args:
+ img (ndarray): The input image. It accepts:
+ 1. np.uint8 type with range [0, 255];
+ 2. np.float32 type with range [0, 1].
+
+ Returns:
+ (ndarray): The converted image with type of np.float32 and range of
+ [0, 1].
+ """
+ img_type = img.dtype
+ img = img.astype(np.float32)
+ if img_type == np.float32:
+ pass
+ elif img_type == np.uint8:
+ img /= 255.
+ else:
+ raise TypeError('The img type should be np.float32 or np.uint8, '
+ f'but got {img_type}')
+ return img
+
+
+def _convert_output_type_range(img, dst_type):
+ """Convert the type and range of the image according to dst_type.
+
+ It converts the image to desired type and range. If `dst_type` is np.uint8,
+ images will be converted to np.uint8 type with range [0, 255]. If
+ `dst_type` is np.float32, it converts the image to np.float32 type with
+ range [0, 1].
+ It is mainly used for post-processing images in colorspace conversion
+ functions such as rgb2ycbcr and ycbcr2rgb.
+
+ Args:
+ img (ndarray): The image to be converted with np.float32 type and
+ range [0, 255].
+ dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it
+ converts the image to np.uint8 type with range [0, 255]. If
+ dst_type is np.float32, it converts the image to np.float32 type
+ with range [0, 1].
+
+ Returns:
+ (ndarray): The converted image with desired type and range.
+ """
+ if dst_type not in (np.uint8, np.float32):
+ raise TypeError('The dst_type should be np.float32 or np.uint8, '
+ f'but got {dst_type}')
+ if dst_type == np.uint8:
+ img = img.round()
+ else:
+ img /= 255.
+ return img.astype(dst_type)
+
+
+def rgb2ycbcr(img, y_only=False):
+ """Convert a RGB image to YCbCr image.
+
+ This function produces the same results as Matlab's `rgb2ycbcr` function.
+ It implements the ITU-R BT.601 conversion for standard-definition
+ television. See more details in
+ https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
+
+ It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`.
+ In OpenCV, it implements a JPEG conversion. See more details in
+ https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
+
+ Args:
+ img (ndarray): The input image. It accepts:
+ 1. np.uint8 type with range [0, 255];
+ 2. np.float32 type with range [0, 1].
+ y_only (bool): Whether to only return Y channel. Default: False.
+
+ Returns:
+ ndarray: The converted YCbCr image. The output image has the same type
+ and range as input image.
+ """
+ img_type = img.dtype
+ img = _convert_input_type_range(img)
+ if y_only:
+ out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0
+ else:
+ out_img = np.matmul(
+ img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786],
+ [24.966, 112.0, -18.214]]) + [16, 128, 128]
+ out_img = _convert_output_type_range(out_img, img_type)
+ return out_img
+
+
+def bgr2ycbcr(img, y_only=False):
+ """Convert a BGR image to YCbCr image.
+
+ The bgr version of rgb2ycbcr.
+ It implements the ITU-R BT.601 conversion for standard-definition
+ television. See more details in
+ https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
+
+ It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`.
+ In OpenCV, it implements a JPEG conversion. See more details in
+ https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
+
+ Args:
+ img (ndarray): The input image. It accepts:
+ 1. np.uint8 type with range [0, 255];
+ 2. np.float32 type with range [0, 1].
+ y_only (bool): Whether to only return Y channel. Default: False.
+
+ Returns:
+ ndarray: The converted YCbCr image. The output image has the same type
+ and range as input image.
+ """
+ img_type = img.dtype
+ img = _convert_input_type_range(img)
+ if y_only:
+ out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0
+ else:
+ out_img = np.matmul(
+ img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786],
+ [65.481, -37.797, 112.0]]) + [16, 128, 128]
+ out_img = _convert_output_type_range(out_img, img_type)
+ return out_img
+
+
+def ycbcr2rgb(img):
+ """Convert a YCbCr image to RGB image.
+
+ This function produces the same results as Matlab's ycbcr2rgb function.
+ It implements the ITU-R BT.601 conversion for standard-definition
+ television. See more details in
+ https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
+
+ It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`.
+ In OpenCV, it implements a JPEG conversion. See more details in
+ https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
+
+ Args:
+ img (ndarray): The input image. It accepts:
+ 1. np.uint8 type with range [0, 255];
+ 2. np.float32 type with range [0, 1].
+
+ Returns:
+ ndarray: The converted RGB image. The output image has the same type
+ and range as input image.
+ """
+ img_type = img.dtype
+ img = _convert_input_type_range(img) * 255
+ out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
+ [0, -0.00153632, 0.00791071],
+ [0.00625893, -0.00318811, 0]]) * 255.0 + [
+ -222.921, 135.576, -276.836
+ ]
+ out_img = _convert_output_type_range(out_img, img_type)
+ return out_img
+
+
+def ycbcr2bgr(img):
+ """Convert a YCbCr image to BGR image.
+
+ The bgr version of ycbcr2rgb.
+ It implements the ITU-R BT.601 conversion for standard-definition
+ television. See more details in
+ https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
+
+ It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`.
+ In OpenCV, it implements a JPEG conversion. See more details in
+ https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
+
+ Args:
+ img (ndarray): The input image. It accepts:
+ 1. np.uint8 type with range [0, 255];
+ 2. np.float32 type with range [0, 1].
+
+ Returns:
+ ndarray: The converted BGR image. The output image has the same type
+ and range as input image.
+ """
+ img_type = img.dtype
+ img = _convert_input_type_range(img) * 255
+ out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
+ [0.00791071, -0.00153632, 0],
+ [0, -0.00318811, 0.00625893]]) * 255.0 + [
+ -276.836, 135.576, -222.921
+ ]
+ out_img = _convert_output_type_range(out_img, img_type)
+ return out_img
+
+
+def convert_color_factory(src, dst):
+
+ code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
+
+ def convert_color(img):
+ out_img = cv2.cvtColor(img, code)
+ return out_img
+
+ convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()}
+ image.
+
+ Args:
+ img (ndarray or str): The input image.
+
+ Returns:
+ ndarray: The converted {dst.upper()} image.
+ """
+
+ return convert_color
+
+
+bgr2rgb = convert_color_factory('bgr', 'rgb')
+
+rgb2bgr = convert_color_factory('rgb', 'bgr')
+
+bgr2hsv = convert_color_factory('bgr', 'hsv')
+
+hsv2bgr = convert_color_factory('hsv', 'bgr')
+
+bgr2hls = convert_color_factory('bgr', 'hls')
+
+hls2bgr = convert_color_factory('hls', 'bgr')
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/geometric.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/geometric.py
new file mode 100644
index 0000000000000000000000000000000000000000..af84f926ca3a6a5b9cc882ffb15148b4dabd5473
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/geometric.py
@@ -0,0 +1,741 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numbers
+
+import cv2
+import numpy as np
+
+from ..utils import to_2tuple
+from .io import imread_backend
+
+try:
+ from PIL import Image
+except ImportError:
+ Image = None
+
+
+def _scale_size(size, scale):
+ """Rescale a size by a ratio.
+
+ Args:
+ size (tuple[int]): (w, h).
+ scale (float | tuple(float)): Scaling factor.
+
+ Returns:
+ tuple[int]: scaled size.
+ """
+ if isinstance(scale, (float, int)):
+ scale = (scale, scale)
+ w, h = size
+ return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5)
+
+
+cv2_interp_codes = {
+ 'nearest': cv2.INTER_NEAREST,
+ 'bilinear': cv2.INTER_LINEAR,
+ 'bicubic': cv2.INTER_CUBIC,
+ 'area': cv2.INTER_AREA,
+ 'lanczos': cv2.INTER_LANCZOS4
+}
+
+if Image is not None:
+ pillow_interp_codes = {
+ 'nearest': Image.NEAREST,
+ 'bilinear': Image.BILINEAR,
+ 'bicubic': Image.BICUBIC,
+ 'box': Image.BOX,
+ 'lanczos': Image.LANCZOS,
+ 'hamming': Image.HAMMING
+ }
+
+
+def imresize(img,
+ size,
+ return_scale=False,
+ interpolation='bilinear',
+ out=None,
+ backend=None):
+ """Resize image to a given size.
+
+ Args:
+ img (ndarray): The input image.
+ size (tuple[int]): Target size (w, h).
+ return_scale (bool): Whether to return `w_scale` and `h_scale`.
+ interpolation (str): Interpolation method, accepted values are
+ "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
+ backend, "nearest", "bilinear" for 'pillow' backend.
+ out (ndarray): The output destination.
+ backend (str | None): The image resize backend type. Options are `cv2`,
+ `pillow`, `None`. If backend is None, the global imread_backend
+ specified by ``mmcv.use_backend()`` will be used. Default: None.
+
+ Returns:
+ tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or
+ `resized_img`.
+ """
+ h, w = img.shape[:2]
+ if backend is None:
+ backend = imread_backend
+ if backend not in ['cv2', 'pillow']:
+ raise ValueError(f'backend: {backend} is not supported for resize.'
+ f"Supported backends are 'cv2', 'pillow'")
+
+ if backend == 'pillow':
+ assert img.dtype == np.uint8, 'Pillow backend only support uint8 type'
+ pil_image = Image.fromarray(img)
+ pil_image = pil_image.resize(size, pillow_interp_codes[interpolation])
+ resized_img = np.array(pil_image)
+ else:
+ resized_img = cv2.resize(
+ img, size, dst=out, interpolation=cv2_interp_codes[interpolation])
+ if not return_scale:
+ return resized_img
+ else:
+ w_scale = size[0] / w
+ h_scale = size[1] / h
+ return resized_img, w_scale, h_scale
+
+
+def imresize_to_multiple(img,
+ divisor,
+ size=None,
+ scale_factor=None,
+ keep_ratio=False,
+ return_scale=False,
+ interpolation='bilinear',
+ out=None,
+ backend=None):
+ """Resize image according to a given size or scale factor and then rounds
+ up the the resized or rescaled image size to the nearest value that can be
+ divided by the divisor.
+
+ Args:
+ img (ndarray): The input image.
+ divisor (int | tuple): Resized image size will be a multiple of
+ divisor. If divisor is a tuple, divisor should be
+ (w_divisor, h_divisor).
+ size (None | int | tuple[int]): Target size (w, h). Default: None.
+ scale_factor (None | float | tuple[float]): Multiplier for spatial
+ size. Should match input size if it is a tuple and the 2D style is
+ (w_scale_factor, h_scale_factor). Default: None.
+ keep_ratio (bool): Whether to keep the aspect ratio when resizing the
+ image. Default: False.
+ return_scale (bool): Whether to return `w_scale` and `h_scale`.
+ interpolation (str): Interpolation method, accepted values are
+ "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
+ backend, "nearest", "bilinear" for 'pillow' backend.
+ out (ndarray): The output destination.
+ backend (str | None): The image resize backend type. Options are `cv2`,
+ `pillow`, `None`. If backend is None, the global imread_backend
+ specified by ``mmcv.use_backend()`` will be used. Default: None.
+
+ Returns:
+ tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or
+ `resized_img`.
+ """
+ h, w = img.shape[:2]
+ if size is not None and scale_factor is not None:
+ raise ValueError('only one of size or scale_factor should be defined')
+ elif size is None and scale_factor is None:
+ raise ValueError('one of size or scale_factor should be defined')
+ elif size is not None:
+ size = to_2tuple(size)
+ if keep_ratio:
+ size = rescale_size((w, h), size, return_scale=False)
+ else:
+ size = _scale_size((w, h), scale_factor)
+
+ divisor = to_2tuple(divisor)
+ size = tuple([int(np.ceil(s / d)) * d for s, d in zip(size, divisor)])
+ resized_img, w_scale, h_scale = imresize(
+ img,
+ size,
+ return_scale=True,
+ interpolation=interpolation,
+ out=out,
+ backend=backend)
+ if return_scale:
+ return resized_img, w_scale, h_scale
+ else:
+ return resized_img
+
+
+def imresize_like(img,
+ dst_img,
+ return_scale=False,
+ interpolation='bilinear',
+ backend=None):
+ """Resize image to the same size of a given image.
+
+ Args:
+ img (ndarray): The input image.
+ dst_img (ndarray): The target image.
+ return_scale (bool): Whether to return `w_scale` and `h_scale`.
+ interpolation (str): Same as :func:`resize`.
+ backend (str | None): Same as :func:`resize`.
+
+ Returns:
+ tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or
+ `resized_img`.
+ """
+ h, w = dst_img.shape[:2]
+ return imresize(img, (w, h), return_scale, interpolation, backend=backend)
+
+
+def rescale_size(old_size, scale, return_scale=False):
+ """Calculate the new size to be rescaled to.
+
+ Args:
+ old_size (tuple[int]): The old size (w, h) of image.
+ scale (float | tuple[int]): The scaling factor or maximum size.
+ If it is a float number, then the image will be rescaled by this
+ factor, else if it is a tuple of 2 integers, then the image will
+ be rescaled as large as possible within the scale.
+ return_scale (bool): Whether to return the scaling factor besides the
+ rescaled image size.
+
+ Returns:
+ tuple[int]: The new rescaled image size.
+ """
+ w, h = old_size
+ if isinstance(scale, (float, int)):
+ if scale <= 0:
+ raise ValueError(f'Invalid scale {scale}, must be positive.')
+ scale_factor = scale
+ elif isinstance(scale, tuple):
+ max_long_edge = max(scale)
+ max_short_edge = min(scale)
+ scale_factor = min(max_long_edge / max(h, w),
+ max_short_edge / min(h, w))
+ else:
+ raise TypeError(
+ f'Scale must be a number or tuple of int, but got {type(scale)}')
+
+ new_size = _scale_size((w, h), scale_factor)
+
+ if return_scale:
+ return new_size, scale_factor
+ else:
+ return new_size
+
+
+def imrescale(img,
+ scale,
+ return_scale=False,
+ interpolation='bilinear',
+ backend=None):
+ """Resize image while keeping the aspect ratio.
+
+ Args:
+ img (ndarray): The input image.
+ scale (float | tuple[int]): The scaling factor or maximum size.
+ If it is a float number, then the image will be rescaled by this
+ factor, else if it is a tuple of 2 integers, then the image will
+ be rescaled as large as possible within the scale.
+ return_scale (bool): Whether to return the scaling factor besides the
+ rescaled image.
+ interpolation (str): Same as :func:`resize`.
+ backend (str | None): Same as :func:`resize`.
+
+ Returns:
+ ndarray: The rescaled image.
+ """
+ h, w = img.shape[:2]
+ new_size, scale_factor = rescale_size((w, h), scale, return_scale=True)
+ rescaled_img = imresize(
+ img, new_size, interpolation=interpolation, backend=backend)
+ if return_scale:
+ return rescaled_img, scale_factor
+ else:
+ return rescaled_img
+
+
+def imflip(img, direction='horizontal'):
+ """Flip an image horizontally or vertically.
+
+ Args:
+ img (ndarray): Image to be flipped.
+ direction (str): The flip direction, either "horizontal" or
+ "vertical" or "diagonal".
+
+ Returns:
+ ndarray: The flipped image.
+ """
+ assert direction in ['horizontal', 'vertical', 'diagonal']
+ if direction == 'horizontal':
+ return np.flip(img, axis=1)
+ elif direction == 'vertical':
+ return np.flip(img, axis=0)
+ else:
+ return np.flip(img, axis=(0, 1))
+
+
+def imflip_(img, direction='horizontal'):
+ """Inplace flip an image horizontally or vertically.
+
+ Args:
+ img (ndarray): Image to be flipped.
+ direction (str): The flip direction, either "horizontal" or
+ "vertical" or "diagonal".
+
+ Returns:
+ ndarray: The flipped image (inplace).
+ """
+ assert direction in ['horizontal', 'vertical', 'diagonal']
+ if direction == 'horizontal':
+ return cv2.flip(img, 1, img)
+ elif direction == 'vertical':
+ return cv2.flip(img, 0, img)
+ else:
+ return cv2.flip(img, -1, img)
+
+
+def imrotate(img,
+ angle,
+ center=None,
+ scale=1.0,
+ border_value=0,
+ interpolation='bilinear',
+ auto_bound=False):
+ """Rotate an image.
+
+ Args:
+ img (ndarray): Image to be rotated.
+ angle (float): Rotation angle in degrees, positive values mean
+ clockwise rotation.
+ center (tuple[float], optional): Center point (w, h) of the rotation in
+ the source image. If not specified, the center of the image will be
+ used.
+ scale (float): Isotropic scale factor.
+ border_value (int): Border value.
+ interpolation (str): Same as :func:`resize`.
+ auto_bound (bool): Whether to adjust the image size to cover the whole
+ rotated image.
+
+ Returns:
+ ndarray: The rotated image.
+ """
+ if center is not None and auto_bound:
+ raise ValueError('`auto_bound` conflicts with `center`')
+ h, w = img.shape[:2]
+ if center is None:
+ center = ((w - 1) * 0.5, (h - 1) * 0.5)
+ assert isinstance(center, tuple)
+
+ matrix = cv2.getRotationMatrix2D(center, -angle, scale)
+ if auto_bound:
+ cos = np.abs(matrix[0, 0])
+ sin = np.abs(matrix[0, 1])
+ new_w = h * sin + w * cos
+ new_h = h * cos + w * sin
+ matrix[0, 2] += (new_w - w) * 0.5
+ matrix[1, 2] += (new_h - h) * 0.5
+ w = int(np.round(new_w))
+ h = int(np.round(new_h))
+ rotated = cv2.warpAffine(
+ img,
+ matrix, (w, h),
+ flags=cv2_interp_codes[interpolation],
+ borderValue=border_value)
+ return rotated
+
+
+def bbox_clip(bboxes, img_shape):
+ """Clip bboxes to fit the image shape.
+
+ Args:
+ bboxes (ndarray): Shape (..., 4*k)
+ img_shape (tuple[int]): (height, width) of the image.
+
+ Returns:
+ ndarray: Clipped bboxes.
+ """
+ assert bboxes.shape[-1] % 4 == 0
+ cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype)
+ cmin[0::2] = img_shape[1] - 1
+ cmin[1::2] = img_shape[0] - 1
+ clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0)
+ return clipped_bboxes
+
+
+def bbox_scaling(bboxes, scale, clip_shape=None):
+ """Scaling bboxes w.r.t the box center.
+
+ Args:
+ bboxes (ndarray): Shape(..., 4).
+ scale (float): Scaling factor.
+ clip_shape (tuple[int], optional): If specified, bboxes that exceed the
+ boundary will be clipped according to the given shape (h, w).
+
+ Returns:
+ ndarray: Scaled bboxes.
+ """
+ if float(scale) == 1.0:
+ scaled_bboxes = bboxes.copy()
+ else:
+ w = bboxes[..., 2] - bboxes[..., 0] + 1
+ h = bboxes[..., 3] - bboxes[..., 1] + 1
+ dw = (w * (scale - 1)) * 0.5
+ dh = (h * (scale - 1)) * 0.5
+ scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1)
+ if clip_shape is not None:
+ return bbox_clip(scaled_bboxes, clip_shape)
+ else:
+ return scaled_bboxes
+
+
+def imcrop(img, bboxes, scale=1.0, pad_fill=None):
+ """Crop image patches.
+
+ 3 steps: scale the bboxes -> clip bboxes -> crop and pad.
+
+ Args:
+ img (ndarray): Image to be cropped.
+ bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes.
+ scale (float, optional): Scale ratio of bboxes, the default value
+ 1.0 means no padding.
+ pad_fill (Number | list[Number]): Value to be filled for padding.
+ Default: None, which means no padding.
+
+ Returns:
+ list[ndarray] | ndarray: The cropped image patches.
+ """
+ chn = 1 if img.ndim == 2 else img.shape[2]
+ if pad_fill is not None:
+ if isinstance(pad_fill, (int, float)):
+ pad_fill = [pad_fill for _ in range(chn)]
+ assert len(pad_fill) == chn
+
+ _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes
+ scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32)
+ clipped_bbox = bbox_clip(scaled_bboxes, img.shape)
+
+ patches = []
+ for i in range(clipped_bbox.shape[0]):
+ x1, y1, x2, y2 = tuple(clipped_bbox[i, :])
+ if pad_fill is None:
+ patch = img[y1:y2 + 1, x1:x2 + 1, ...]
+ else:
+ _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :])
+ if chn == 1:
+ patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1)
+ else:
+ patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn)
+ patch = np.array(
+ pad_fill, dtype=img.dtype) * np.ones(
+ patch_shape, dtype=img.dtype)
+ x_start = 0 if _x1 >= 0 else -_x1
+ y_start = 0 if _y1 >= 0 else -_y1
+ w = x2 - x1 + 1
+ h = y2 - y1 + 1
+ patch[y_start:y_start + h, x_start:x_start + w,
+ ...] = img[y1:y1 + h, x1:x1 + w, ...]
+ patches.append(patch)
+
+ if bboxes.ndim == 1:
+ return patches[0]
+ else:
+ return patches
+
+
+def impad(img,
+ *,
+ shape=None,
+ padding=None,
+ pad_val=0,
+ padding_mode='constant'):
+ """Pad the given image to a certain shape or pad on all sides with
+ specified padding mode and padding value.
+
+ Args:
+ img (ndarray): Image to be padded.
+ shape (tuple[int]): Expected padding shape (h, w). Default: None.
+ padding (int or tuple[int]): Padding on each border. If a single int is
+ provided this is used to pad all borders. If tuple of length 2 is
+ provided this is the padding on left/right and top/bottom
+ respectively. If a tuple of length 4 is provided this is the
+ padding for the left, top, right and bottom borders respectively.
+ Default: None. Note that `shape` and `padding` can not be both
+ set.
+ pad_val (Number | Sequence[Number]): Values to be filled in padding
+ areas when padding_mode is 'constant'. Default: 0.
+ padding_mode (str): Type of padding. Should be: constant, edge,
+ reflect or symmetric. Default: constant.
+
+ - constant: pads with a constant value, this value is specified
+ with pad_val.
+ - edge: pads with the last value at the edge of the image.
+ - reflect: pads with reflection of image without repeating the last
+ value on the edge. For example, padding [1, 2, 3, 4] with 2
+ elements on both sides in reflect mode will result in
+ [3, 2, 1, 2, 3, 4, 3, 2].
+ - symmetric: pads with reflection of image repeating the last value
+ on the edge. For example, padding [1, 2, 3, 4] with 2 elements on
+ both sides in symmetric mode will result in
+ [2, 1, 1, 2, 3, 4, 4, 3]
+
+ Returns:
+ ndarray: The padded image.
+ """
+
+ assert (shape is not None) ^ (padding is not None)
+ if shape is not None:
+ padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0])
+
+ # check pad_val
+ if isinstance(pad_val, tuple):
+ assert len(pad_val) == img.shape[-1]
+ elif not isinstance(pad_val, numbers.Number):
+ raise TypeError('pad_val must be a int or a tuple. '
+ f'But received {type(pad_val)}')
+
+ # check padding
+ if isinstance(padding, tuple) and len(padding) in [2, 4]:
+ if len(padding) == 2:
+ padding = (padding[0], padding[1], padding[0], padding[1])
+ elif isinstance(padding, numbers.Number):
+ padding = (padding, padding, padding, padding)
+ else:
+ raise ValueError('Padding must be a int or a 2, or 4 element tuple.'
+ f'But received {padding}')
+
+ # check padding mode
+ assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric']
+
+ border_type = {
+ 'constant': cv2.BORDER_CONSTANT,
+ 'edge': cv2.BORDER_REPLICATE,
+ 'reflect': cv2.BORDER_REFLECT_101,
+ 'symmetric': cv2.BORDER_REFLECT
+ }
+ img = cv2.copyMakeBorder(
+ img,
+ padding[1],
+ padding[3],
+ padding[0],
+ padding[2],
+ border_type[padding_mode],
+ value=pad_val)
+
+ return img
+
+
+def impad_to_multiple(img, divisor, pad_val=0):
+ """Pad an image to ensure each edge to be multiple to some number.
+
+ Args:
+ img (ndarray): Image to be padded.
+ divisor (int): Padded image edges will be multiple to divisor.
+ pad_val (Number | Sequence[Number]): Same as :func:`impad`.
+
+ Returns:
+ ndarray: The padded image.
+ """
+ pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor
+ pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor
+ return impad(img, shape=(pad_h, pad_w), pad_val=pad_val)
+
+
+def cutout(img, shape, pad_val=0):
+ """Randomly cut out a rectangle from the original img.
+
+ Args:
+ img (ndarray): Image to be cutout.
+ shape (int | tuple[int]): Expected cutout shape (h, w). If given as a
+ int, the value will be used for both h and w.
+ pad_val (int | float | tuple[int | float]): Values to be filled in the
+ cut area. Defaults to 0.
+
+ Returns:
+ ndarray: The cutout image.
+ """
+
+ channels = 1 if img.ndim == 2 else img.shape[2]
+ if isinstance(shape, int):
+ cut_h, cut_w = shape, shape
+ else:
+ assert isinstance(shape, tuple) and len(shape) == 2, \
+ f'shape must be a int or a tuple with length 2, but got type ' \
+ f'{type(shape)} instead.'
+ cut_h, cut_w = shape
+ if isinstance(pad_val, (int, float)):
+ pad_val = tuple([pad_val] * channels)
+ elif isinstance(pad_val, tuple):
+ assert len(pad_val) == channels, \
+ 'Expected the num of elements in tuple equals the channels' \
+ 'of input image. Found {} vs {}'.format(
+ len(pad_val), channels)
+ else:
+ raise TypeError(f'Invalid type {type(pad_val)} for `pad_val`')
+
+ img_h, img_w = img.shape[:2]
+ y0 = np.random.uniform(img_h)
+ x0 = np.random.uniform(img_w)
+
+ y1 = int(max(0, y0 - cut_h / 2.))
+ x1 = int(max(0, x0 - cut_w / 2.))
+ y2 = min(img_h, y1 + cut_h)
+ x2 = min(img_w, x1 + cut_w)
+
+ if img.ndim == 2:
+ patch_shape = (y2 - y1, x2 - x1)
+ else:
+ patch_shape = (y2 - y1, x2 - x1, channels)
+
+ img_cutout = img.copy()
+ patch = np.array(
+ pad_val, dtype=img.dtype) * np.ones(
+ patch_shape, dtype=img.dtype)
+ img_cutout[y1:y2, x1:x2, ...] = patch
+
+ return img_cutout
+
+
+def _get_shear_matrix(magnitude, direction='horizontal'):
+ """Generate the shear matrix for transformation.
+
+ Args:
+ magnitude (int | float): The magnitude used for shear.
+ direction (str): The flip direction, either "horizontal"
+ or "vertical".
+
+ Returns:
+ ndarray: The shear matrix with dtype float32.
+ """
+ if direction == 'horizontal':
+ shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]])
+ elif direction == 'vertical':
+ shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]])
+ return shear_matrix
+
+
+def imshear(img,
+ magnitude,
+ direction='horizontal',
+ border_value=0,
+ interpolation='bilinear'):
+ """Shear an image.
+
+ Args:
+ img (ndarray): Image to be sheared with format (h, w)
+ or (h, w, c).
+ magnitude (int | float): The magnitude used for shear.
+ direction (str): The flip direction, either "horizontal"
+ or "vertical".
+ border_value (int | tuple[int]): Value used in case of a
+ constant border.
+ interpolation (str): Same as :func:`resize`.
+
+ Returns:
+ ndarray: The sheared image.
+ """
+ assert direction in ['horizontal',
+ 'vertical'], f'Invalid direction: {direction}'
+ height, width = img.shape[:2]
+ if img.ndim == 2:
+ channels = 1
+ elif img.ndim == 3:
+ channels = img.shape[-1]
+ if isinstance(border_value, int):
+ border_value = tuple([border_value] * channels)
+ elif isinstance(border_value, tuple):
+ assert len(border_value) == channels, \
+ 'Expected the num of elements in tuple equals the channels' \
+ 'of input image. Found {} vs {}'.format(
+ len(border_value), channels)
+ else:
+ raise ValueError(
+ f'Invalid type {type(border_value)} for `border_value`')
+ shear_matrix = _get_shear_matrix(magnitude, direction)
+ sheared = cv2.warpAffine(
+ img,
+ shear_matrix,
+ (width, height),
+ # Note case when the number elements in `border_value`
+ # greater than 3 (e.g. shearing masks whose channels large
+ # than 3) will raise TypeError in `cv2.warpAffine`.
+ # Here simply slice the first 3 values in `border_value`.
+ borderValue=border_value[:3],
+ flags=cv2_interp_codes[interpolation])
+ return sheared
+
+
+def _get_translate_matrix(offset, direction='horizontal'):
+ """Generate the translate matrix.
+
+ Args:
+ offset (int | float): The offset used for translate.
+ direction (str): The translate direction, either
+ "horizontal" or "vertical".
+
+ Returns:
+ ndarray: The translate matrix with dtype float32.
+ """
+ if direction == 'horizontal':
+ translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]])
+ elif direction == 'vertical':
+ translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]])
+ return translate_matrix
+
+
+def imtranslate(img,
+ offset,
+ direction='horizontal',
+ border_value=0,
+ interpolation='bilinear'):
+ """Translate an image.
+
+ Args:
+ img (ndarray): Image to be translated with format
+ (h, w) or (h, w, c).
+ offset (int | float): The offset used for translate.
+ direction (str): The translate direction, either "horizontal"
+ or "vertical".
+ border_value (int | tuple[int]): Value used in case of a
+ constant border.
+ interpolation (str): Same as :func:`resize`.
+
+ Returns:
+ ndarray: The translated image.
+ """
+ assert direction in ['horizontal',
+ 'vertical'], f'Invalid direction: {direction}'
+ height, width = img.shape[:2]
+ if img.ndim == 2:
+ channels = 1
+ elif img.ndim == 3:
+ channels = img.shape[-1]
+ if isinstance(border_value, int):
+ border_value = tuple([border_value] * channels)
+ elif isinstance(border_value, tuple):
+ assert len(border_value) == channels, \
+ 'Expected the num of elements in tuple equals the channels' \
+ 'of input image. Found {} vs {}'.format(
+ len(border_value), channels)
+ else:
+ raise ValueError(
+ f'Invalid type {type(border_value)} for `border_value`.')
+ translate_matrix = _get_translate_matrix(offset, direction)
+ translated = cv2.warpAffine(
+ img,
+ translate_matrix,
+ (width, height),
+ # Note case when the number elements in `border_value`
+ # greater than 3 (e.g. translating masks whose channels
+ # large than 3) will raise TypeError in `cv2.warpAffine`.
+ # Here simply slice the first 3 values in `border_value`.
+ borderValue=border_value[:3],
+ flags=cv2_interp_codes[interpolation])
+ return translated
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/io.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/io.py
new file mode 100644
index 0000000000000000000000000000000000000000..e05effbd95713c3fc76cc5739d6dc616dc997005
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/io.py
@@ -0,0 +1,325 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import io
+import os.path as osp
+import warnings
+from pathlib import Path
+
+import cv2
+import numpy as np
+from cv2 import (IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_IGNORE_ORIENTATION,
+ IMREAD_UNCHANGED)
+
+from mmcv.fileio import FileClient
+from mmcv.utils import is_filepath, is_str
+
+try:
+ from turbojpeg import TJCS_RGB, TJPF_BGR, TJPF_GRAY, TurboJPEG
+except ImportError:
+ TJCS_RGB = TJPF_GRAY = TJPF_BGR = TurboJPEG = None
+
+try:
+ from PIL import Image, ImageOps
+except ImportError:
+ Image = None
+
+try:
+ import tifffile
+except ImportError:
+ tifffile = None
+
+jpeg = None
+supported_backends = ['cv2', 'turbojpeg', 'pillow', 'tifffile']
+
+imread_flags = {
+ 'color': IMREAD_COLOR,
+ 'grayscale': IMREAD_GRAYSCALE,
+ 'unchanged': IMREAD_UNCHANGED,
+ 'color_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_COLOR,
+ 'grayscale_ignore_orientation':
+ IMREAD_IGNORE_ORIENTATION | IMREAD_GRAYSCALE
+}
+
+imread_backend = 'cv2'
+
+
+def use_backend(backend):
+ """Select a backend for image decoding.
+
+ Args:
+ backend (str): The image decoding backend type. Options are `cv2`,
+ `pillow`, `turbojpeg` (see https://github.com/lilohuang/PyTurboJPEG)
+ and `tifffile`. `turbojpeg` is faster but it only supports `.jpeg`
+ file format.
+ """
+ assert backend in supported_backends
+ global imread_backend
+ imread_backend = backend
+ if imread_backend == 'turbojpeg':
+ if TurboJPEG is None:
+ raise ImportError('`PyTurboJPEG` is not installed')
+ global jpeg
+ if jpeg is None:
+ jpeg = TurboJPEG()
+ elif imread_backend == 'pillow':
+ if Image is None:
+ raise ImportError('`Pillow` is not installed')
+ elif imread_backend == 'tifffile':
+ if tifffile is None:
+ raise ImportError('`tifffile` is not installed')
+
+
+def _jpegflag(flag='color', channel_order='bgr'):
+ channel_order = channel_order.lower()
+ if channel_order not in ['rgb', 'bgr']:
+ raise ValueError('channel order must be either "rgb" or "bgr"')
+
+ if flag == 'color':
+ if channel_order == 'bgr':
+ return TJPF_BGR
+ elif channel_order == 'rgb':
+ return TJCS_RGB
+ elif flag == 'grayscale':
+ return TJPF_GRAY
+ else:
+ raise ValueError('flag must be "color" or "grayscale"')
+
+
+def _pillow2array(img, flag='color', channel_order='bgr'):
+ """Convert a pillow image to numpy array.
+
+ Args:
+ img (:obj:`PIL.Image.Image`): The image loaded using PIL
+ flag (str): Flags specifying the color type of a loaded image,
+ candidates are 'color', 'grayscale' and 'unchanged'.
+ Default to 'color'.
+ channel_order (str): The channel order of the output image array,
+ candidates are 'bgr' and 'rgb'. Default to 'bgr'.
+
+ Returns:
+ np.ndarray: The converted numpy array
+ """
+ channel_order = channel_order.lower()
+ if channel_order not in ['rgb', 'bgr']:
+ raise ValueError('channel order must be either "rgb" or "bgr"')
+
+ if flag == 'unchanged':
+ array = np.array(img)
+ if array.ndim >= 3 and array.shape[2] >= 3: # color image
+ array[:, :, :3] = array[:, :, (2, 1, 0)] # RGB to BGR
+ else:
+ # Handle exif orientation tag
+ if flag in ['color', 'grayscale']:
+ img = ImageOps.exif_transpose(img)
+ # If the image mode is not 'RGB', convert it to 'RGB' first.
+ if img.mode != 'RGB':
+ if img.mode != 'LA':
+ # Most formats except 'LA' can be directly converted to RGB
+ img = img.convert('RGB')
+ else:
+ # When the mode is 'LA', the default conversion will fill in
+ # the canvas with black, which sometimes shadows black objects
+ # in the foreground.
+ #
+ # Therefore, a random color (124, 117, 104) is used for canvas
+ img_rgba = img.convert('RGBA')
+ img = Image.new('RGB', img_rgba.size, (124, 117, 104))
+ img.paste(img_rgba, mask=img_rgba.split()[3]) # 3 is alpha
+ if flag in ['color', 'color_ignore_orientation']:
+ array = np.array(img)
+ if channel_order != 'rgb':
+ array = array[:, :, ::-1] # RGB to BGR
+ elif flag in ['grayscale', 'grayscale_ignore_orientation']:
+ img = img.convert('L')
+ array = np.array(img)
+ else:
+ raise ValueError(
+ 'flag must be "color", "grayscale", "unchanged", '
+ f'"color_ignore_orientation" or "grayscale_ignore_orientation"'
+ f' but got {flag}')
+ return array
+
+
+def imread(img_or_path,
+ flag='color',
+ channel_order='bgr',
+ backend=None,
+ file_client_args=None):
+ """Read an image.
+
+ Note:
+ In v1.4.1 and later, add `file_client_args` parameters.
+
+ Args:
+ img_or_path (ndarray or str or Path): Either a numpy array or str or
+ pathlib.Path. If it is a numpy array (loaded image), then
+ it will be returned as is.
+ flag (str): Flags specifying the color type of a loaded image,
+ candidates are `color`, `grayscale`, `unchanged`,
+ `color_ignore_orientation` and `grayscale_ignore_orientation`.
+ By default, `cv2` and `pillow` backend would rotate the image
+ according to its EXIF info unless called with `unchanged` or
+ `*_ignore_orientation` flags. `turbojpeg` and `tifffile` backend
+ always ignore image's EXIF info regardless of the flag.
+ The `turbojpeg` backend only supports `color` and `grayscale`.
+ channel_order (str): Order of channel, candidates are `bgr` and `rgb`.
+ backend (str | None): The image decoding backend type. Options are
+ `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`.
+ If backend is None, the global imread_backend specified by
+ ``mmcv.use_backend()`` will be used. Default: None.
+ file_client_args (dict | None): Arguments to instantiate a
+ FileClient. See :class:`mmcv.fileio.FileClient` for details.
+ Default: None.
+
+ Returns:
+ ndarray: Loaded image array.
+
+ Examples:
+ >>> import mmcv
+ >>> img_path = '/path/to/img.jpg'
+ >>> img = mmcv.imread(img_path)
+ >>> img = mmcv.imread(img_path, flag='color', channel_order='rgb',
+ ... backend='cv2')
+ >>> img = mmcv.imread(img_path, flag='color', channel_order='bgr',
+ ... backend='pillow')
+ >>> s3_img_path = 's3://bucket/img.jpg'
+ >>> # infer the file backend by the prefix s3
+ >>> img = mmcv.imread(s3_img_path)
+ >>> # manually set the file backend petrel
+ >>> img = mmcv.imread(s3_img_path, file_client_args={
+ ... 'backend': 'petrel'})
+ >>> http_img_path = 'http://path/to/img.jpg'
+ >>> img = mmcv.imread(http_img_path)
+ >>> img = mmcv.imread(http_img_path, file_client_args={
+ ... 'backend': 'http'})
+ """
+
+ if isinstance(img_or_path, Path):
+ img_or_path = str(img_or_path)
+
+ if isinstance(img_or_path, np.ndarray):
+ return img_or_path
+ elif is_str(img_or_path):
+ file_client = FileClient.infer_client(file_client_args, img_or_path)
+ img_bytes = file_client.get(img_or_path)
+ return imfrombytes(img_bytes, flag, channel_order, backend)
+ else:
+ raise TypeError('"img" must be a numpy array or a str or '
+ 'a pathlib.Path object')
+
+
+def imfrombytes(content, flag='color', channel_order='bgr', backend=None):
+ """Read an image from bytes.
+
+ Args:
+ content (bytes): Image bytes got from files or other streams.
+ flag (str): Same as :func:`imread`.
+ backend (str | None): The image decoding backend type. Options are
+ `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`. If backend is
+ None, the global imread_backend specified by ``mmcv.use_backend()``
+ will be used. Default: None.
+
+ Returns:
+ ndarray: Loaded image array.
+
+ Examples:
+ >>> img_path = '/path/to/img.jpg'
+ >>> with open(img_path, 'rb') as f:
+ >>> img_buff = f.read()
+ >>> img = mmcv.imfrombytes(img_buff)
+ >>> img = mmcv.imfrombytes(img_buff, flag='color', channel_order='rgb')
+ >>> img = mmcv.imfrombytes(img_buff, backend='pillow')
+ >>> img = mmcv.imfrombytes(img_buff, backend='cv2')
+ """
+
+ if backend is None:
+ backend = imread_backend
+ if backend not in supported_backends:
+ raise ValueError(
+ f'backend: {backend} is not supported. Supported '
+ "backends are 'cv2', 'turbojpeg', 'pillow', 'tifffile'")
+ if backend == 'turbojpeg':
+ img = jpeg.decode(content, _jpegflag(flag, channel_order))
+ if img.shape[-1] == 1:
+ img = img[:, :, 0]
+ return img
+ elif backend == 'pillow':
+ with io.BytesIO(content) as buff:
+ img = Image.open(buff)
+ img = _pillow2array(img, flag, channel_order)
+ return img
+ elif backend == 'tifffile':
+ with io.BytesIO(content) as buff:
+ img = tifffile.imread(buff)
+ return img
+ else:
+ img_np = np.frombuffer(content, np.uint8)
+ flag = imread_flags[flag] if is_str(flag) else flag
+ img = cv2.imdecode(img_np, flag)
+ if flag == IMREAD_COLOR and channel_order == 'rgb':
+ cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)
+ return img
+
+
+def imwrite(img,
+ file_path,
+ params=None,
+ auto_mkdir=None,
+ file_client_args=None):
+ """Write image to file.
+
+ Note:
+ In v1.4.1 and later, add `file_client_args` parameters.
+
+ Warning:
+ The parameter `auto_mkdir` will be deprecated in the future and every
+ file clients will make directory automatically.
+
+ Args:
+ img (ndarray): Image array to be written.
+ file_path (str): Image file path.
+ params (None or list): Same as opencv :func:`imwrite` interface.
+ auto_mkdir (bool): If the parent folder of `file_path` does not exist,
+ whether to create it automatically. It will be deprecated.
+ file_client_args (dict | None): Arguments to instantiate a
+ FileClient. See :class:`mmcv.fileio.FileClient` for details.
+ Default: None.
+
+ Returns:
+ bool: Successful or not.
+
+ Examples:
+ >>> # write to hard disk client
+ >>> ret = mmcv.imwrite(img, '/path/to/img.jpg')
+ >>> # infer the file backend by the prefix s3
+ >>> ret = mmcv.imwrite(img, 's3://bucket/img.jpg')
+ >>> # manually set the file backend petrel
+ >>> ret = mmcv.imwrite(img, 's3://bucket/img.jpg', file_client_args={
+ ... 'backend': 'petrel'})
+ """
+ assert is_filepath(file_path)
+ file_path = str(file_path)
+ if auto_mkdir is not None:
+ warnings.warn(
+ 'The parameter `auto_mkdir` will be deprecated in the future and '
+ 'every file clients will make directory automatically.')
+ file_client = FileClient.infer_client(file_client_args, file_path)
+ img_ext = osp.splitext(file_path)[-1]
+ # Encode image according to image suffix.
+ # For example, if image path is '/path/your/img.jpg', the encode
+ # format is '.jpg'.
+ flag, img_buff = cv2.imencode(img_ext, img, params)
+ file_client.put(img_buff.tobytes(), file_path)
+ return flag
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/misc.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f33962cd321a65abc1c48c33d5b0a2fdd9ad657
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/misc.py
@@ -0,0 +1,66 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+
+import mmcv
+
+try:
+ import torch
+except ImportError:
+ torch = None
+
+
+def tensor2imgs(tensor, mean=None, std=None, to_rgb=True):
+ """Convert tensor to 3-channel images or 1-channel gray images.
+
+ Args:
+ tensor (torch.Tensor): Tensor that contains multiple images, shape (
+ N, C, H, W). :math:`C` can be either 3 or 1.
+ mean (tuple[float], optional): Mean of images. If None,
+ (0, 0, 0) will be used for tensor with 3-channel,
+ while (0, ) for tensor with 1-channel. Defaults to None.
+ std (tuple[float], optional): Standard deviation of images. If None,
+ (1, 1, 1) will be used for tensor with 3-channel,
+ while (1, ) for tensor with 1-channel. Defaults to None.
+ to_rgb (bool, optional): Whether the tensor was converted to RGB
+ format in the first place. If so, convert it back to BGR.
+ For the tensor with 1 channel, it must be False. Defaults to True.
+
+ Returns:
+ list[np.ndarray]: A list that contains multiple images.
+ """
+
+ if torch is None:
+ raise RuntimeError('pytorch is not installed')
+ assert torch.is_tensor(tensor) and tensor.ndim == 4
+ channels = tensor.size(1)
+ assert channels in [1, 3]
+ if mean is None:
+ mean = (0, ) * channels
+ if std is None:
+ std = (1, ) * channels
+ assert (channels == len(mean) == len(std) == 3) or \
+ (channels == len(mean) == len(std) == 1 and not to_rgb)
+
+ num_imgs = tensor.size(0)
+ mean = np.array(mean, dtype=np.float32)
+ std = np.array(std, dtype=np.float32)
+ imgs = []
+ for img_id in range(num_imgs):
+ img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
+ img = mmcv.imdenormalize(
+ img, mean, std, to_bgr=to_rgb).astype(np.uint8)
+ imgs.append(np.ascontiguousarray(img))
+ return imgs
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/photometric.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/photometric.py
new file mode 100644
index 0000000000000000000000000000000000000000..c58eb8a21f6b1e6a16b2176016d4414400ed6076
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/image/photometric.py
@@ -0,0 +1,441 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cv2
+import numpy as np
+
+from ..utils import is_tuple_of
+from .colorspace import bgr2gray, gray2bgr
+
+
+def imnormalize(img, mean, std, to_rgb=True):
+ """Normalize an image with mean and std.
+
+ Args:
+ img (ndarray): Image to be normalized.
+ mean (ndarray): The mean to be used for normalize.
+ std (ndarray): The std to be used for normalize.
+ to_rgb (bool): Whether to convert to rgb.
+
+ Returns:
+ ndarray: The normalized image.
+ """
+ img = img.copy().astype(np.float32)
+ return imnormalize_(img, mean, std, to_rgb)
+
+
+def imnormalize_(img, mean, std, to_rgb=True):
+ """Inplace normalize an image with mean and std.
+
+ Args:
+ img (ndarray): Image to be normalized.
+ mean (ndarray): The mean to be used for normalize.
+ std (ndarray): The std to be used for normalize.
+ to_rgb (bool): Whether to convert to rgb.
+
+ Returns:
+ ndarray: The normalized image.
+ """
+ # cv2 inplace normalization does not accept uint8
+ assert img.dtype != np.uint8
+ mean = np.float64(mean.reshape(1, -1))
+ stdinv = 1 / np.float64(std.reshape(1, -1))
+ if to_rgb:
+ cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace
+ cv2.subtract(img, mean, img) # inplace
+ cv2.multiply(img, stdinv, img) # inplace
+ return img
+
+
+def imdenormalize(img, mean, std, to_bgr=True):
+ assert img.dtype != np.uint8
+ mean = mean.reshape(1, -1).astype(np.float64)
+ std = std.reshape(1, -1).astype(np.float64)
+ img = cv2.multiply(img, std) # make a copy
+ cv2.add(img, mean, img) # inplace
+ if to_bgr:
+ cv2.cvtColor(img, cv2.COLOR_RGB2BGR, img) # inplace
+ return img
+
+
+def iminvert(img):
+ """Invert (negate) an image.
+
+ Args:
+ img (ndarray): Image to be inverted.
+
+ Returns:
+ ndarray: The inverted image.
+ """
+ return np.full_like(img, 255) - img
+
+
+def solarize(img, thr=128):
+ """Solarize an image (invert all pixel values above a threshold)
+
+ Args:
+ img (ndarray): Image to be solarized.
+ thr (int): Threshold for solarizing (0 - 255).
+
+ Returns:
+ ndarray: The solarized image.
+ """
+ img = np.where(img < thr, img, 255 - img)
+ return img
+
+
+def posterize(img, bits):
+ """Posterize an image (reduce the number of bits for each color channel)
+
+ Args:
+ img (ndarray): Image to be posterized.
+ bits (int): Number of bits (1 to 8) to use for posterizing.
+
+ Returns:
+ ndarray: The posterized image.
+ """
+ shift = 8 - bits
+ img = np.left_shift(np.right_shift(img, shift), shift)
+ return img
+
+
+def adjust_color(img, alpha=1, beta=None, gamma=0):
+ r"""It blends the source image and its gray image:
+
+ .. math::
+ output = img * alpha + gray\_img * beta + gamma
+
+ Args:
+ img (ndarray): The input source image.
+ alpha (int | float): Weight for the source image. Default 1.
+ beta (int | float): Weight for the converted gray image.
+ If None, it's assigned the value (1 - `alpha`).
+ gamma (int | float): Scalar added to each sum.
+ Same as :func:`cv2.addWeighted`. Default 0.
+
+ Returns:
+ ndarray: Colored image which has the same size and dtype as input.
+ """
+ gray_img = bgr2gray(img)
+ gray_img = np.tile(gray_img[..., None], [1, 1, 3])
+ if beta is None:
+ beta = 1 - alpha
+ colored_img = cv2.addWeighted(img, alpha, gray_img, beta, gamma)
+ if not colored_img.dtype == np.uint8:
+ # Note when the dtype of `img` is not the default `np.uint8`
+ # (e.g. np.float32), the value in `colored_img` got from cv2
+ # is not guaranteed to be in range [0, 255], so here clip
+ # is needed.
+ colored_img = np.clip(colored_img, 0, 255)
+ return colored_img
+
+
+def imequalize(img):
+ """Equalize the image histogram.
+
+ This function applies a non-linear mapping to the input image,
+ in order to create a uniform distribution of grayscale values
+ in the output image.
+
+ Args:
+ img (ndarray): Image to be equalized.
+
+ Returns:
+ ndarray: The equalized image.
+ """
+
+ def _scale_channel(im, c):
+ """Scale the data in the corresponding channel."""
+ im = im[:, :, c]
+ # Compute the histogram of the image channel.
+ histo = np.histogram(im, 256, (0, 255))[0]
+ # For computing the step, filter out the nonzeros.
+ nonzero_histo = histo[histo > 0]
+ step = (np.sum(nonzero_histo) - nonzero_histo[-1]) // 255
+ if not step:
+ lut = np.array(range(256))
+ else:
+ # Compute the cumulative sum, shifted by step // 2
+ # and then normalized by step.
+ lut = (np.cumsum(histo) + (step // 2)) // step
+ # Shift lut, prepending with 0.
+ lut = np.concatenate([[0], lut[:-1]], 0)
+ # handle potential integer overflow
+ lut[lut > 255] = 255
+ # If step is zero, return the original image.
+ # Otherwise, index from lut.
+ return np.where(np.equal(step, 0), im, lut[im])
+
+ # Scales each channel independently and then stacks
+ # the result.
+ s1 = _scale_channel(img, 0)
+ s2 = _scale_channel(img, 1)
+ s3 = _scale_channel(img, 2)
+ equalized_img = np.stack([s1, s2, s3], axis=-1)
+ return equalized_img.astype(img.dtype)
+
+
+def adjust_brightness(img, factor=1.):
+ """Adjust image brightness.
+
+ This function controls the brightness of an image. An
+ enhancement factor of 0.0 gives a black image.
+ A factor of 1.0 gives the original image. This function
+ blends the source image and the degenerated black image:
+
+ .. math::
+ output = img * factor + degenerated * (1 - factor)
+
+ Args:
+ img (ndarray): Image to be brightened.
+ factor (float): A value controls the enhancement.
+ Factor 1.0 returns the original image, lower
+ factors mean less color (brightness, contrast,
+ etc), and higher values more. Default 1.
+
+ Returns:
+ ndarray: The brightened image.
+ """
+ degenerated = np.zeros_like(img)
+ # Note manually convert the dtype to np.float32, to
+ # achieve as close results as PIL.ImageEnhance.Brightness.
+ # Set beta=1-factor, and gamma=0
+ brightened_img = cv2.addWeighted(
+ img.astype(np.float32), factor, degenerated.astype(np.float32),
+ 1 - factor, 0)
+ brightened_img = np.clip(brightened_img, 0, 255)
+ return brightened_img.astype(img.dtype)
+
+
+def adjust_contrast(img, factor=1.):
+ """Adjust image contrast.
+
+ This function controls the contrast of an image. An
+ enhancement factor of 0.0 gives a solid grey
+ image. A factor of 1.0 gives the original image. It
+ blends the source image and the degenerated mean image:
+
+ .. math::
+ output = img * factor + degenerated * (1 - factor)
+
+ Args:
+ img (ndarray): Image to be contrasted. BGR order.
+ factor (float): Same as :func:`mmcv.adjust_brightness`.
+
+ Returns:
+ ndarray: The contrasted image.
+ """
+ gray_img = bgr2gray(img)
+ hist = np.histogram(gray_img, 256, (0, 255))[0]
+ mean = round(np.sum(gray_img) / np.sum(hist))
+ degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype)
+ degenerated = gray2bgr(degenerated)
+ contrasted_img = cv2.addWeighted(
+ img.astype(np.float32), factor, degenerated.astype(np.float32),
+ 1 - factor, 0)
+ contrasted_img = np.clip(contrasted_img, 0, 255)
+ return contrasted_img.astype(img.dtype)
+
+
+def auto_contrast(img, cutoff=0):
+ """Auto adjust image contrast.
+
+ This function maximize (normalize) image contrast by first removing cutoff
+ percent of the lightest and darkest pixels from the histogram and remapping
+ the image so that the darkest pixel becomes black (0), and the lightest
+ becomes white (255).
+
+ Args:
+ img (ndarray): Image to be contrasted. BGR order.
+ cutoff (int | float | tuple): The cutoff percent of the lightest and
+ darkest pixels to be removed. If given as tuple, it shall be
+ (low, high). Otherwise, the single value will be used for both.
+ Defaults to 0.
+
+ Returns:
+ ndarray: The contrasted image.
+ """
+
+ def _auto_contrast_channel(im, c, cutoff):
+ im = im[:, :, c]
+ # Compute the histogram of the image channel.
+ histo = np.histogram(im, 256, (0, 255))[0]
+ # Remove cut-off percent pixels from histo
+ histo_sum = np.cumsum(histo)
+ cut_low = histo_sum[-1] * cutoff[0] // 100
+ cut_high = histo_sum[-1] - histo_sum[-1] * cutoff[1] // 100
+ histo_sum = np.clip(histo_sum, cut_low, cut_high) - cut_low
+ histo = np.concatenate([[histo_sum[0]], np.diff(histo_sum)], 0)
+
+ # Compute mapping
+ low, high = np.nonzero(histo)[0][0], np.nonzero(histo)[0][-1]
+ # If all the values have been cut off, return the origin img
+ if low >= high:
+ return im
+ scale = 255.0 / (high - low)
+ offset = -low * scale
+ lut = np.array(range(256))
+ lut = lut * scale + offset
+ lut = np.clip(lut, 0, 255)
+ return lut[im]
+
+ if isinstance(cutoff, (int, float)):
+ cutoff = (cutoff, cutoff)
+ else:
+ assert isinstance(cutoff, tuple), 'cutoff must be of type int, ' \
+ f'float or tuple, but got {type(cutoff)} instead.'
+ # Auto adjusts contrast for each channel independently and then stacks
+ # the result.
+ s1 = _auto_contrast_channel(img, 0, cutoff)
+ s2 = _auto_contrast_channel(img, 1, cutoff)
+ s3 = _auto_contrast_channel(img, 2, cutoff)
+ contrasted_img = np.stack([s1, s2, s3], axis=-1)
+ return contrasted_img.astype(img.dtype)
+
+
+def adjust_sharpness(img, factor=1., kernel=None):
+ """Adjust image sharpness.
+
+ This function controls the sharpness of an image. An
+ enhancement factor of 0.0 gives a blurred image. A
+ factor of 1.0 gives the original image. And a factor
+ of 2.0 gives a sharpened image. It blends the source
+ image and the degenerated mean image:
+
+ .. math::
+ output = img * factor + degenerated * (1 - factor)
+
+ Args:
+ img (ndarray): Image to be sharpened. BGR order.
+ factor (float): Same as :func:`mmcv.adjust_brightness`.
+ kernel (np.ndarray, optional): Filter kernel to be applied on the img
+ to obtain the degenerated img. Defaults to None.
+
+ Note:
+ No value sanity check is enforced on the kernel set by users. So with
+ an inappropriate kernel, the ``adjust_sharpness`` may fail to perform
+ the function its name indicates but end up performing whatever
+ transform determined by the kernel.
+
+ Returns:
+ ndarray: The sharpened image.
+ """
+
+ if kernel is None:
+ # adopted from PIL.ImageFilter.SMOOTH
+ kernel = np.array([[1., 1., 1.], [1., 5., 1.], [1., 1., 1.]]) / 13
+ assert isinstance(kernel, np.ndarray), \
+ f'kernel must be of type np.ndarray, but got {type(kernel)} instead.'
+ assert kernel.ndim == 2, \
+ f'kernel must have a dimension of 2, but got {kernel.ndim} instead.'
+
+ degenerated = cv2.filter2D(img, -1, kernel)
+ sharpened_img = cv2.addWeighted(
+ img.astype(np.float32), factor, degenerated.astype(np.float32),
+ 1 - factor, 0)
+ sharpened_img = np.clip(sharpened_img, 0, 255)
+ return sharpened_img.astype(img.dtype)
+
+
+def adjust_lighting(img, eigval, eigvec, alphastd=0.1, to_rgb=True):
+ """AlexNet-style PCA jitter.
+
+ This data augmentation is proposed in `ImageNet Classification with Deep
+ Convolutional Neural Networks
+ `_.
+
+ Args:
+ img (ndarray): Image to be adjusted lighting. BGR order.
+ eigval (ndarray): the eigenvalue of the convariance matrix of pixel
+ values, respectively.
+ eigvec (ndarray): the eigenvector of the convariance matrix of pixel
+ values, respectively.
+ alphastd (float): The standard deviation for distribution of alpha.
+ Defaults to 0.1
+ to_rgb (bool): Whether to convert img to rgb.
+
+ Returns:
+ ndarray: The adjusted image.
+ """
+ assert isinstance(eigval, np.ndarray) and isinstance(eigvec, np.ndarray), \
+ f'eigval and eigvec should both be of type np.ndarray, got ' \
+ f'{type(eigval)} and {type(eigvec)} instead.'
+
+ assert eigval.ndim == 1 and eigvec.ndim == 2
+ assert eigvec.shape == (3, eigval.shape[0])
+ n_eigval = eigval.shape[0]
+ assert isinstance(alphastd, float), 'alphastd should be of type float, ' \
+ f'got {type(alphastd)} instead.'
+
+ img = img.copy().astype(np.float32)
+ if to_rgb:
+ cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace
+
+ alpha = np.random.normal(0, alphastd, n_eigval)
+ alter = eigvec \
+ * np.broadcast_to(alpha.reshape(1, n_eigval), (3, n_eigval)) \
+ * np.broadcast_to(eigval.reshape(1, n_eigval), (3, n_eigval))
+ alter = np.broadcast_to(alter.sum(axis=1).reshape(1, 1, 3), img.shape)
+ img_adjusted = img + alter
+ return img_adjusted
+
+
+def lut_transform(img, lut_table):
+ """Transform array by look-up table.
+
+ The function lut_transform fills the output array with values from the
+ look-up table. Indices of the entries are taken from the input array.
+
+ Args:
+ img (ndarray): Image to be transformed.
+ lut_table (ndarray): look-up table of 256 elements; in case of
+ multi-channel input array, the table should either have a single
+ channel (in this case the same table is used for all channels) or
+ the same number of channels as in the input array.
+
+ Returns:
+ ndarray: The transformed image.
+ """
+ assert isinstance(img, np.ndarray)
+ assert 0 <= np.min(img) and np.max(img) <= 255
+ assert isinstance(lut_table, np.ndarray)
+ assert lut_table.shape == (256, )
+
+ return cv2.LUT(np.array(img, dtype=np.uint8), lut_table)
+
+
+def clahe(img, clip_limit=40.0, tile_grid_size=(8, 8)):
+ """Use CLAHE method to process the image.
+
+ See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J].
+ Graphics Gems, 1994:474-485.` for more information.
+
+ Args:
+ img (ndarray): Image to be processed.
+ clip_limit (float): Threshold for contrast limiting. Default: 40.0.
+ tile_grid_size (tuple[int]): Size of grid for histogram equalization.
+ Input image will be divided into equally sized rectangular tiles.
+ It defines the number of tiles in row and column. Default: (8, 8).
+
+ Returns:
+ ndarray: The processed image.
+ """
+ assert isinstance(img, np.ndarray)
+ assert img.ndim == 2
+ assert isinstance(clip_limit, (float, int))
+ assert is_tuple_of(tile_grid_size, int)
+ assert len(tile_grid_size) == 2
+
+ clahe = cv2.createCLAHE(clip_limit, tile_grid_size)
+ return clahe.apply(np.array(img, dtype=np.uint8))
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/model_zoo/deprecated.json b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/model_zoo/deprecated.json
new file mode 100644
index 0000000000000000000000000000000000000000..25cf6f28caecc22a77e3136fefa6b8dfc0e6cb5b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/model_zoo/deprecated.json
@@ -0,0 +1,6 @@
+{
+ "resnet50_caffe": "detectron/resnet50_caffe",
+ "resnet50_caffe_bgr": "detectron2/resnet50_caffe_bgr",
+ "resnet101_caffe": "detectron/resnet101_caffe",
+ "resnet101_caffe_bgr": "detectron2/resnet101_caffe_bgr"
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/model_zoo/mmcls.json b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/model_zoo/mmcls.json
new file mode 100644
index 0000000000000000000000000000000000000000..c073a41d0aeb44ee0243f97ecc3558de538f9300
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/model_zoo/mmcls.json
@@ -0,0 +1,59 @@
+{
+ "vgg11": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_batch256_imagenet_20210208-4271cd6c.pth",
+ "vgg13": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_batch256_imagenet_20210208-4d1d6080.pth",
+ "vgg16": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_batch256_imagenet_20210208-db26f1a5.pth",
+ "vgg19": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_batch256_imagenet_20210208-e6920e4a.pth",
+ "vgg11_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_bn_batch256_imagenet_20210207-f244902c.pth",
+ "vgg13_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_bn_batch256_imagenet_20210207-1a8b7864.pth",
+ "vgg16_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_bn_batch256_imagenet_20210208-7e55cd29.pth",
+ "vgg19_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_bn_batch256_imagenet_20210208-da620c4f.pth",
+ "resnet18": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth",
+ "resnet34": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth",
+ "resnet50": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth",
+ "resnet101": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_8xb32_in1k_20210831-539c63f8.pth",
+ "resnet152": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_8xb32_in1k_20210901-4d7582fa.pth",
+ "resnet50_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth",
+ "resnet101_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_b32x8_imagenet_20210531-6e13bcd3.pth",
+ "resnet152_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_b32x8_imagenet_20210531-278cf22a.pth",
+ "resnext50_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.pth",
+ "resnext101_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_b32x8_imagenet_20210506-e0fa3dd5.pth",
+ "resnext101_32x8d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_b32x8_imagenet_20210506-23a247d5.pth",
+ "resnext152_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_b32x8_imagenet_20210524-927787be.pth",
+ "se-resnet50": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200804-ae206104.pth",
+ "se-resnet101": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200804-ba5b51d4.pth",
+ "resnest50": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest50_imagenet_converted-1ebf0afe.pth",
+ "resnest101": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest101_imagenet_converted-032caa52.pth",
+ "resnest200": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest200_imagenet_converted-581a60f2.pth",
+ "resnest269": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest269_imagenet_converted-59930960.pth",
+ "shufflenet_v1": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth",
+ "shufflenet_v2": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200812-5bf4721e.pth",
+ "mobilenet_v2": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth",
+ "mobilenet_v3_small": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_small-8427ecf0.pth",
+ "mobilenet_v3_large": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_large-3ea3c186.pth",
+ "repvgg_A0": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth",
+ "repvgg_A1": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A1_3rdparty_4xb64-coslr-120e_in1k_20210909-24003a24.pth",
+ "repvgg_A2": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A2_3rdparty_4xb64-coslr-120e_in1k_20210909-97d7695a.pth",
+ "repvgg_B0": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B0_3rdparty_4xb64-coslr-120e_in1k_20210909-446375f4.pth",
+ "repvgg_B1": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1_3rdparty_4xb64-coslr-120e_in1k_20210909-750cdf67.pth",
+ "repvgg_B1g2": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1g2_3rdparty_4xb64-coslr-120e_in1k_20210909-344f6422.pth",
+ "repvgg_B1g4": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1g4_3rdparty_4xb64-coslr-120e_in1k_20210909-d4c1a642.pth",
+ "repvgg_B2": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B2_3rdparty_4xb64-coslr-120e_in1k_20210909-bd6b937c.pth",
+ "repvgg_B2g4": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B2g4_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-7b7955f0.pth",
+ "repvgg_B3": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B3_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-dda968bf.pth",
+ "repvgg_B3g4": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B3g4_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-4e54846a.pth",
+ "repvgg_D2se": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-D2se_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-cf3139b7.pth",
+ "res2net101_w26": "https://download.openmmlab.com/mmclassification/v0/res2net/res2net101-w26-s4_3rdparty_8xb32_in1k_20210927-870b6c36.pth",
+ "res2net50_w14": "https://download.openmmlab.com/mmclassification/v0/res2net/res2net50-w14-s8_3rdparty_8xb32_in1k_20210927-bc967bf1.pth",
+ "res2net50_w26": "https://download.openmmlab.com/mmclassification/v0/res2net/res2net50-w26-s8_3rdparty_8xb32_in1k_20210927-f547a94b.pth",
+ "swin_tiny": "https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_b16x64_300e_imagenet_20210616_090925-66df6be6.pth",
+ "swin_small": "https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_b16x64_300e_imagenet_20210615_110219-7f9d988b.pth",
+ "swin_base": "https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_base_patch4_window7_224_22kto1k-f967f799.pth",
+ "swin_large": "https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_large_patch4_window7_224_22kto1k-5f0996db.pth",
+ "t2t_vit_t_14": "https://download.openmmlab.com/mmclassification/v0/t2t-vit/t2t-vit-t-14_3rdparty_8xb64_in1k_20210928-b7c09b62.pth",
+ "t2t_vit_t_19": "https://download.openmmlab.com/mmclassification/v0/t2t-vit/t2t-vit-t-19_3rdparty_8xb64_in1k_20210928-7f1478d5.pth",
+ "t2t_vit_t_24": "https://download.openmmlab.com/mmclassification/v0/t2t-vit/t2t-vit-t-24_3rdparty_8xb64_in1k_20210928-fe95a61b.pth",
+ "tnt_small": "https://download.openmmlab.com/mmclassification/v0/tnt/tnt-small-p16_3rdparty_in1k_20210903-c56ee7df.pth",
+ "vit_base_p16": "https://download.openmmlab.com/mmclassification/v0/vit/finetune/vit-base-p16_in21k-pre-3rdparty_ft-64xb64_in1k-384_20210928-98e8652b.pth",
+ "vit_base_p32": "https://download.openmmlab.com/mmclassification/v0/vit/finetune/vit-base-p32_in21k-pre-3rdparty_ft-64xb64_in1k-384_20210928-9cea8599.pth",
+ "vit_large_p16": "https://download.openmmlab.com/mmclassification/v0/vit/finetune/vit-large-p16_in21k-pre-3rdparty_ft-64xb64_in1k-384_20210928-b20ba619.pth"
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/model_zoo/open_mmlab.json b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/model_zoo/open_mmlab.json
new file mode 100644
index 0000000000000000000000000000000000000000..8311db4feef92faa0841c697d75efbee8430c3a0
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/model_zoo/open_mmlab.json
@@ -0,0 +1,50 @@
+{
+ "vgg16_caffe": "https://download.openmmlab.com/pretrain/third_party/vgg16_caffe-292e1171.pth",
+ "detectron/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_caffe-788b5fa3.pth",
+ "detectron2/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_msra-5891d200.pth",
+ "detectron/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_caffe-3ad79236.pth",
+ "detectron2/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_msra-6cc46731.pth",
+ "detectron2/resnext101_32x8d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x8d-1516f1aa.pth",
+ "resnext50_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext50-32x4d-0ab1a123.pth",
+ "resnext101_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d-a5af3160.pth",
+ "resnext101_64x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_64x4d-ee2c6f71.pth",
+ "contrib/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_thangvubk-ad1730dd.pth",
+ "detectron/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn-9186a21c.pth",
+ "detectron/resnet101_gn": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn-cac0ab98.pth",
+ "jhu/resnet50_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_ws-15beedd8.pth",
+ "jhu/resnet101_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn_ws-3e3c308c.pth",
+ "jhu/resnext50_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn_ws-0d87ac85.pth",
+ "jhu/resnext101_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn_ws-34ac1a9e.pth",
+ "jhu/resnext50_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn-c7e8b754.pth",
+ "jhu/resnext101_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn-ac3bb84e.pth",
+ "msra/hrnetv2_w18_small": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18_small-b5a04e21.pth",
+ "msra/hrnetv2_w18": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18-00eb2006.pth",
+ "msra/hrnetv2_w32": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w32-dc9eeb4f.pth",
+ "msra/hrnetv2_w40": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w40-ed0b031c.pth",
+ "msra/hrnetv2_w48": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w48-d2186c55.pth",
+ "bninception_caffe": "https://download.openmmlab.com/pretrain/third_party/bn_inception_caffe-ed2e8665.pth",
+ "kin400/i3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/i3d_r50_f32s2_k400-2c57e077.pth",
+ "kin400/nl3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/nl3d_r50_f32s2_k400-fa7e7caa.pth",
+ "res2net101_v1d_26w_4s": "https://download.openmmlab.com/pretrain/third_party/res2net101_v1d_26w_4s_mmdetv2-f0a600f9.pth",
+ "regnetx_400mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_400mf-a5b10d96.pth",
+ "regnetx_800mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_800mf-1f4be4c7.pth",
+ "regnetx_1.6gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_1.6gf-5791c176.pth",
+ "regnetx_3.2gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_3.2gf-c2599b0f.pth",
+ "regnetx_4.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_4.0gf-a88f671e.pth",
+ "regnetx_6.4gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_6.4gf-006af45d.pth",
+ "regnetx_8.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_8.0gf-3c68abe7.pth",
+ "regnetx_12gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_12gf-4c2a3350.pth",
+ "resnet18_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet18_v1c-b5776b93.pth",
+ "resnet50_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet50_v1c-2cccc1ad.pth",
+ "resnet101_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet101_v1c-e67eebb6.pth",
+ "mmedit/vgg16": "https://download.openmmlab.com/mmediting/third_party/vgg_state_dict.pth",
+ "mmedit/res34_en_nomixup": "https://download.openmmlab.com/mmediting/third_party/model_best_resnet34_En_nomixup.pth",
+ "mmedit/mobilenet_v2": "https://download.openmmlab.com/mmediting/third_party/mobilenet_v2.pth",
+ "contrib/mobilenet_v3_large": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_large-bc2c3fd3.pth",
+ "contrib/mobilenet_v3_small": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_small-47085aa1.pth",
+ "resnest50": "https://download.openmmlab.com/pretrain/third_party/resnest50_d2-7497a55b.pth",
+ "resnest101": "https://download.openmmlab.com/pretrain/third_party/resnest101_d2-f3b931b2.pth",
+ "resnest200": "https://download.openmmlab.com/pretrain/third_party/resnest200_d2-ca88e41f.pth",
+ "darknet53": "https://download.openmmlab.com/pretrain/third_party/darknet53-a628ea1b.pth",
+ "mmdet/mobilenet_v2": "https://download.openmmlab.com/mmdetection/v2.0/third_party/mobilenet_v2_batch256_imagenet-ff34753d.pth"
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..360c27e38e27dbf71b006766e76d91e56789e264
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/__init__.py
@@ -0,0 +1,18 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .info import is_custom_op_loaded
+from .symbolic import register_extra_symbolics
+
+__all__ = ['register_extra_symbolics', 'is_custom_op_loaded']
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/info.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/info.py
new file mode 100644
index 0000000000000000000000000000000000000000..3aa367f3d94debcd28706c0c580ddc06f8cf6455
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/info.py
@@ -0,0 +1,34 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+import torch
+
+
+def is_custom_op_loaded():
+ flag = False
+ try:
+ from ..tensorrt import is_tensorrt_plugin_loaded
+ flag = is_tensorrt_plugin_loaded()
+ except (ImportError, ModuleNotFoundError):
+ pass
+ if not flag:
+ try:
+ from ..ops import get_onnxruntime_op_path
+ ort_lib_path = get_onnxruntime_op_path()
+ flag = os.path.exists(ort_lib_path)
+ except (ImportError, ModuleNotFoundError):
+ pass
+ return flag or torch.__version__ == 'parrots'
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/onnx_utils/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/onnx_utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5880464d7daa3df842818358dbff34938a119ab3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/onnx_utils/__init__.py
@@ -0,0 +1,14 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/onnx_utils/symbolic_helper.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/onnx_utils/symbolic_helper.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a922060a4e5ec0a8451388b273272977abfabd4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/onnx_utils/symbolic_helper.py
@@ -0,0 +1,344 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Modified from https://github.com/pytorch/pytorch."""
+import warnings
+from functools import wraps
+from sys import maxsize
+
+import torch
+import torch.onnx
+# This import monkey-patches graph manipulation methods on Graph, used for the
+# ONNX symbolics
+import torch.onnx.utils
+from torch._C import ListType
+
+# ---------------------------------------------------------------------------------
+# Helper functions
+# ---------------------------------------------------------------------------------
+
+# Save some builtins as locals, because we'll shadown them below
+_sum = sum
+
+
+def _parse_arg(value, desc):
+ if desc == 'none':
+ return value
+ if desc == 'v' or not _is_value(value):
+ return value
+ if value.node().mustBeNone():
+ return None
+ if value.node().kind() == 'onnx::Constant':
+ tval = value.node()['value']
+ if desc == 'i':
+ return int(tval)
+ elif desc == 'f':
+ return float(tval)
+ elif desc == 'b':
+ return bool(tval)
+ elif desc == 's':
+ return str(tval)
+ elif desc == 't':
+ return tval
+ elif desc == 'is':
+ return [int(v) for v in tval]
+ elif desc == 'fs':
+ return [float(v) for v in tval]
+ else:
+ raise RuntimeError(
+ "ONNX symbolic doesn't know to interpret Constant node")
+ elif value.node().kind() == 'prim::ListConstruct':
+ if desc == 'is':
+ for v in value.node().inputs():
+ if v.node().kind() != 'onnx::Constant':
+ raise RuntimeError(
+ "Failed to export an ONNX attribute '" +
+ v.node().kind() +
+ "', since it's not constant, please try to make "
+ 'things (e.g., kernel size) static if possible')
+ return [int(v.node()['value']) for v in value.node().inputs()]
+ else:
+ raise RuntimeError(
+ "ONNX symbolic doesn't know to interpret ListConstruct node")
+
+ raise RuntimeError('Unexpected node type: {}'.format(value.node().kind()))
+
+
+def _maybe_get_const(value, desc):
+ if _is_value(value) and value.node().kind() == 'onnx::Constant':
+ return _parse_arg(value, desc)
+ return value
+
+
+def _maybe_get_scalar(value):
+ value_t = _maybe_get_const(value, 't')
+ if isinstance(value_t, torch.Tensor) and value_t.shape == ():
+ return value_t
+ return value
+
+
+def _get_const(value, desc, arg_name):
+ if _is_value(value) and value.node().kind() not in ('onnx::Constant',
+ 'prim::Constant'):
+ raise RuntimeError('ONNX symbolic expected a constant'
+ ' value of the {} argument, got `{}`'.format(
+ arg_name, value))
+ return _parse_arg(value, desc)
+
+
+def _unpack_list(list_value):
+ list_node = list_value.node()
+ assert list_node.kind() == 'prim::ListConstruct'
+ return list(list_node.inputs())
+
+
+# Check if list_value is output from prim::ListConstruct
+# This is usually called before _unpack_list to ensure the list can be
+# unpacked.
+def _is_packed_list(list_value):
+ return _is_value(
+ list_value) and list_value.node().kind() == 'prim::ListConstruct'
+
+
+def parse_args(*arg_descriptors):
+
+ def decorator(fn):
+ fn._arg_descriptors = arg_descriptors
+
+ def wrapper(g, *args):
+ # some args may be optional, so the length may be smaller
+ assert len(arg_descriptors) >= len(args)
+ args = [
+ _parse_arg(arg, arg_desc)
+ for arg, arg_desc in zip(args, arg_descriptors)
+ ]
+ return fn(g, *args)
+
+ # In Python 2 functools.wraps chokes on partially applied functions, so
+ # we need this as a workaround
+ try:
+ wrapper = wraps(fn)(wrapper)
+ except Exception:
+ pass
+ return wrapper
+
+ return decorator
+
+
+def _scalar(x):
+ """Convert a scalar tensor into a Python value."""
+ assert x.numel() == 1
+ return x.item()
+
+
+def _if_scalar_type_as(g, self, tensor):
+ """Convert self into the same type of tensor, as necessary."""
+ if isinstance(self, torch._C.Value):
+ return self
+
+ scalar_type = tensor.type().scalarType()
+ if scalar_type:
+ ty = scalar_type.lower()
+ return getattr(self, ty)()
+
+ return self
+
+
+def _is_none(x):
+ return x.node().mustBeNone()
+
+
+def _is_value(x):
+ return isinstance(x, torch._C.Value)
+
+
+def _is_tensor_list(x):
+ return x.type().isSubtypeOf(ListType.ofTensors())
+
+
+def _unimplemented(op, msg):
+ warnings.warn('ONNX export failed on ' + op + ' because ' + msg +
+ ' not supported')
+
+
+def _try_get_scalar_type(*args):
+ for arg in args:
+ try:
+ return arg.type().scalarType()
+ except RuntimeError:
+ pass
+ return None
+
+
+def _topk_helper(g, input, k, dim, largest=True, sorted=False, out=None):
+ if out is not None:
+ _unimplemented('TopK', 'Out parameter is not supported')
+ if not _is_value(k):
+ k = g.op('Constant', value_t=torch.tensor([k], dtype=torch.int64))
+ else:
+ k = g.op('Reshape', k, g.op('Constant', value_t=torch.tensor([1])))
+ return g.op(
+ 'TopK',
+ input,
+ k,
+ axis_i=dim,
+ largest_i=largest,
+ sorted_i=sorted,
+ outputs=2)
+
+
+def _slice_helper(g,
+ input,
+ axes,
+ starts,
+ ends,
+ steps=None,
+ dynamic_slice=False):
+ # TODO(ruobing): add support for opset<10
+ from torch.onnx.symbolic_opset10 import _slice
+ return _slice(g, input, axes, starts, ends, steps, dynamic_slice)
+
+
+def _unsqueeze_helper(g, input, dim):
+ from torch.onnx.symbolic_opset9 import unsqueeze
+ return unsqueeze(g, input, dim)
+
+
+def _interpolate_size_to_scales(g, input, output_size, dim):
+ output_size = _maybe_get_const(output_size, 'is')
+ if _is_value(output_size):
+ offset = 2
+ offsets = g.op(
+ 'Constant', value_t=torch.ones(offset, dtype=torch.float32))
+ dividend = g.op(
+ 'Cast', output_size, to_i=cast_pytorch_to_onnx['Float'])
+ divisor = _slice_helper(
+ g, g.op('Shape', input), axes=[0], ends=[maxsize], starts=[offset])
+ divisor = g.op('Cast', divisor, to_i=cast_pytorch_to_onnx['Float'])
+ scale_dims = g.op('Div', dividend, divisor)
+ scales = g.op('Concat', offsets, scale_dims, axis_i=0)
+ else:
+ scales_constant = [
+ 1. if i < 2 else float(output_size[-(dim - i)]) /
+ float(input.type().sizes()[-(dim - i)]) for i in range(0, dim)
+ ]
+ scales = g.op(
+ 'Constant',
+ value_t=torch.tensor(scales_constant, dtype=torch.float32))
+ return scales
+
+
+def _interpolate_get_scales_if_available(g, scales):
+ if len(scales) == 0:
+ return None
+ # scales[0] is NoneType in Pytorch == 1.5.1
+ # scales[0] is TensorType with sizes = [] in Pytorch == 1.6.0
+ # scales[0] is ListType in Pytorch == 1.7.0
+ # scales[0] is TensorType with sizes = [2] in Pytorch == 1.8.0
+ scale_desc = 'fs' if scales[0].type().kind() == 'ListType' or (
+ scales[0].type().kind() == 'TensorType' and
+ (sum(scales[0].type().sizes()) > 1)) else 'f'
+ available_scales = _maybe_get_const(
+ scales[0], scale_desc) != -1 and not _is_none(scales[0])
+
+ if not available_scales:
+ return None
+
+ offsets = g.op('Constant', value_t=torch.ones(2, dtype=torch.float32))
+ if scale_desc == 'fs':
+ scales_list = g.op(
+ 'Constant',
+ value_t=torch.tensor(_maybe_get_const(scales[0], scale_desc)))
+ # modify to support PyTorch==1.7.0
+ # https://github.com/pytorch/pytorch/blob/75ee5756715e7161314ce037474843b68f69fc04/torch/onnx/symbolic_helper.py#L375 # noqa: E501
+ scales = g.op('Concat', offsets, scales_list, axis_i=0)
+ else:
+ # for PyTorch < 1.7.0
+ scales_list = []
+ for scale in scales:
+ unsqueezed_scale = _unsqueeze_helper(g, scale, 0)
+ # ONNX only supports float for the scales. double -> float.
+ unsqueezed_scale = g.op(
+ 'Cast', unsqueezed_scale, to_i=cast_pytorch_to_onnx['Float'])
+ scales_list.append(unsqueezed_scale)
+ scales = g.op('Concat', offsets, *scales_list, axis_i=0)
+ return scales
+
+
+def _get_interpolate_attributes(g, mode, args):
+ if mode == 'nearest':
+ align_corners = None
+ scales = args[0:]
+ else:
+ align_corners = args[0]
+ scales = args[1:]
+ scales = _interpolate_get_scales_if_available(g, scales)
+ return scales, align_corners
+
+
+def _interpolate_get_scales(g, scale_factor, dim):
+ offsets = g.op('Constant', value_t=torch.ones(2, dtype=torch.float32))
+ if isinstance(scale_factor.type(), torch._C.ListType):
+ return g.op('Concat', offsets, scale_factor, axis_i=0)
+ else:
+ scale_factor = _unsqueeze_helper(g, scale_factor, 0)
+ scale_factor = g.op(
+ 'Cast', scale_factor, to_i=cast_pytorch_to_onnx['Float'])
+ scales = [scale_factor for i in range(dim - 2)]
+ scale_factor = g.op('Concat', offsets, *scales, axis_i=0)
+ return scale_factor
+
+
+def _size_helper(g, self, dim):
+ full_shape = g.op('Shape', self)
+ from torch.onnx.symbolic_opset9 import select
+ return select(g, full_shape, g.op('Constant', value_t=torch.tensor([0])),
+ dim)
+
+
+def _avgpool_helper(tuple_fn, padding, kernel_size, stride, divisor_override,
+ name):
+ if divisor_override and divisor_override.node().kind() != 'prim::Constant':
+ return _unimplemented(name, 'divisor_override')
+ if not stride:
+ stride = kernel_size
+ padding = tuple(tuple_fn(padding))
+ return padding
+
+
+# Metaprogram symbolics for each ATen native specialized cast operator.
+# For e.g. we specify a function named `_cast_uint8_t` that instantiates an
+# ONNX cast node with `to` attribute 'UINT8'
+#
+# TODO: remove these once we support Type's in the JIT IR and we can once again
+# use the unified toType operator
+cast_pytorch_to_onnx = {
+ 'Byte': torch.onnx.TensorProtoDataType.UINT8,
+ 'Char': torch.onnx.TensorProtoDataType.INT8,
+ 'Double': torch.onnx.TensorProtoDataType.DOUBLE,
+ 'Float': torch.onnx.TensorProtoDataType.FLOAT,
+ 'Half': torch.onnx.TensorProtoDataType.FLOAT16,
+ 'Int': torch.onnx.TensorProtoDataType.INT32,
+ 'Long': torch.onnx.TensorProtoDataType.INT64,
+ 'Short': torch.onnx.TensorProtoDataType.INT16,
+ 'Bool': torch.onnx.TensorProtoDataType.BOOL,
+ 'ComplexFloat': torch.onnx.TensorProtoDataType.COMPLEX64,
+ 'ComplexDouble': torch.onnx.TensorProtoDataType.COMPLEX128,
+ 'Undefined': torch.onnx.TensorProtoDataType.UNDEFINED,
+}
+
+# Global set to store the list of quantized operators in the network.
+# This is currently only used in the conversion of quantized ops from PT
+# -> C2 via ONNX.
+_quantized_ops = set()
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/symbolic.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/symbolic.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c597c0846fa492967785ad83e31c7cbfad3d731
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/onnx/symbolic.py
@@ -0,0 +1,509 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Modified from https://github.com/pytorch/pytorch."""
+import os
+
+import numpy as np
+import torch
+from torch.nn.modules.utils import _pair, _single, _triple
+from torch.onnx.symbolic_helper import parse_args
+from torch.onnx.symbolic_registry import register_op
+
+from .onnx_utils import symbolic_helper as sym_help
+
+
+def _interpolate(name, dim, interpolate_mode):
+
+ def symbolic_fn(g, input, output_size, *args):
+ scales, align_corners = sym_help._get_interpolate_attributes(
+ g, interpolate_mode, args)
+ align_corners = sym_help._maybe_get_scalar(align_corners)
+ transformation_mode = 'asymmetric' \
+ if interpolate_mode == 'nearest' \
+ else 'align_corners' if align_corners else 'pytorch_half_pixel'
+ empty_tensor = g.op(
+ 'Constant', value_t=torch.tensor([], dtype=torch.float32))
+
+ if scales is None:
+ if 'ONNX_BACKEND' in os.environ and os.environ[
+ 'ONNX_BACKEND'] == 'TensorRT':
+ input_size = input.type().sizes()
+ # slice the first two dim
+ input_size = input_size[:2]
+ # convert output_size to int type
+ output_size = sym_help._maybe_get_const(output_size, 'is')
+ input_size.extend(output_size)
+ output_size = g.op(
+ 'Constant',
+ value_t=torch.tensor(input_size, dtype=torch.int64))
+ else:
+ input_size = g.op('Shape', input)
+ input_size_beg = sym_help._slice_helper(
+ g, input_size, axes=[0], ends=[2], starts=[0])
+ output_size = g.op(
+ 'Cast',
+ output_size,
+ to_i=sym_help.cast_pytorch_to_onnx['Long'])
+ output_size = g.op(
+ 'Concat', input_size_beg, output_size, axis_i=0)
+ scales = g.op(
+ 'Constant', value_t=torch.tensor([], dtype=torch.float32))
+ return g.op(
+ 'Resize',
+ input,
+ empty_tensor,
+ # roi only takes effect with
+ # coordinate_transformation_mode="tf_crop_and_resize"
+ scales, # scales is not needed since we are sending out_size
+ output_size,
+ coordinate_transformation_mode_s=transformation_mode,
+ cubic_coeff_a_f=-0.75, # only valid when mode="cubic"
+ mode_s=interpolate_mode, # nearest, linear, or cubic
+ nearest_mode_s='floor') # only valid when mode="nearest"
+ else:
+ return g.op(
+ 'Resize',
+ input,
+ empty_tensor,
+ # roi only takes effect with
+ # coordinate_transformation_mode="tf_crop_and_resize"
+ scales, # scales is not needed since we are sending out_size
+ coordinate_transformation_mode_s=transformation_mode,
+ cubic_coeff_a_f=-0.75, # only valid when mode="cubic"
+ mode_s=interpolate_mode, # nearest, linear, or cubic
+ nearest_mode_s='floor') # only valid when mode="nearest"
+
+ return symbolic_fn
+
+
+upsample_nearest1d = _interpolate('upsample_nearest1d', 3, 'nearest')
+upsample_nearest2d = _interpolate('upsample_nearest2d', 4, 'nearest')
+upsample_nearest3d = _interpolate('upsample_nearest3d', 5, 'nearest')
+upsample_linear1d = _interpolate('upsample_linear1d', 3, 'linear')
+upsample_bilinear2d = _interpolate('upsample_bilinear2d', 4, 'linear')
+upsample_trilinear3d = _interpolate('upsample_trilinear3d', 5, 'linear')
+upsample_bicubic2d = _interpolate('upsample_bicubic2d', 4, 'cubic')
+
+
+@parse_args('v', 'v', 'i', 'i', 'i', 'none')
+def topk(g, self, k, dim, largest, sorted, out=None):
+ return sym_help._topk_helper(
+ g, self, k, dim, largest=largest, sorted=sorted, out=out)
+
+
+def masked_select(g, self, mask):
+ from torch.onnx.symbolic_opset9 import expand_as, nonzero
+ index = nonzero(g, expand_as(g, mask, self))
+ return g.op('GatherND', self, index)
+
+
+def _prepare_onnx_paddings(g, dim, pad):
+ pad_len = torch.onnx.symbolic_opset9.size(
+ g, pad, g.op('Constant', value_t=torch.tensor([0])))
+ # Set extension = [0] * (dim * 2 - len(pad))
+ extension = g.op(
+ 'Sub',
+ g.op('Mul',
+ g.op('Constant', value_t=torch.tensor(dim, dtype=torch.int64)),
+ g.op('Constant', value_t=torch.tensor(2, dtype=torch.int64))),
+ pad_len)
+ pad = g.op('Cast', pad, to_i=sym_help.cast_pytorch_to_onnx['Long'])
+ paddings = g.op(
+ 'Concat',
+ pad,
+ g.op(
+ 'ConstantOfShape',
+ extension,
+ value_t=torch.tensor([0], dtype=torch.int64)),
+ axis_i=0)
+ paddings = g.op('Reshape', paddings,
+ g.op('Constant', value_t=torch.tensor([-1, 2])))
+ paddings = g.op(
+ 'Transpose',
+ torch.onnx.symbolic_opset10.flip(g, paddings, [0]),
+ perm_i=[1, 0])
+ paddings = g.op('Reshape', paddings,
+ g.op('Constant', value_t=torch.tensor([-1])))
+ padding_c = g.op(
+ 'Cast', paddings, to_i=sym_help.cast_pytorch_to_onnx['Long'])
+ return padding_c
+
+
+def constant_pad_nd(g, input, padding, value=None):
+ mode = 'constant'
+ value = sym_help._maybe_get_scalar(value)
+ value = sym_help._if_scalar_type_as(g, value, input)
+ pad = _prepare_onnx_paddings(g, input.type().dim(), padding)
+ return g.op('Pad', input, pad, value, mode_s=mode)
+
+
+def reflection_pad(g, input, padding):
+ mode = 'reflect'
+ paddings = _prepare_onnx_paddings(g, input.type().dim(), padding)
+ return g.op('Pad', input, paddings, mode_s=mode)
+
+
+reflection_pad1d = reflection_pad
+reflection_pad2d = reflection_pad
+reflection_pad3d = reflection_pad
+
+
+def _avg_pool(name, tuple_fn):
+
+ @parse_args('v', 'is', 'is', 'is', 'i', 'i', 'none')
+ def symbolic_fn(g,
+ input,
+ kernel_size,
+ stride,
+ padding,
+ ceil_mode,
+ count_include_pad,
+ divisor_override=None):
+ padding = sym_help._avgpool_helper(tuple_fn, padding, kernel_size,
+ stride, divisor_override, name)
+ if not stride:
+ stride = kernel_size
+ if count_include_pad:
+ input = g.op(
+ 'Pad',
+ input,
+ g.op(
+ 'Constant',
+ value_t=torch.tensor(((0, ) * 2 + padding) * 2)),
+ mode_s='constant')
+ padding = (0, ) * len(padding)
+ output = g.op(
+ 'AveragePool',
+ input,
+ kernel_shape_i=tuple_fn(kernel_size),
+ strides_i=tuple_fn(stride),
+ pads_i=padding * 2,
+ ceil_mode_i=ceil_mode)
+ return output
+
+ return symbolic_fn
+
+
+avg_pool1d = _avg_pool('avg_pool1d', _single)
+avg_pool2d = _avg_pool('avg_pool2d', _pair)
+avg_pool3d = _avg_pool('avg_pool3d', _triple)
+
+
+def _get_im2col_indices_along_dim(g, input_d, kernel_size_d, dilation_d,
+ padding_d, stride_d):
+ # Input is always 4-D (N, C, H, W)
+ # Calculate indices of sliding blocks along spatial dimension
+ # Slide kernel over input each dim d:
+ # each dimension d ranges from 0 to
+ # input[d]+2xpadding[d]-dilation[d]x(kernel_size[d]-1)
+ # with steps = stride
+
+ blocks_d = g.op('Add', input_d,
+ g.op('Constant', value_t=torch.tensor(padding_d * 2)))
+ blocks_d = g.op(
+ 'Sub', blocks_d,
+ g.op(
+ 'Constant',
+ value_t=torch.tensor(dilation_d * (kernel_size_d - 1))))
+
+ # Stride kernel over input and find starting indices along dim d
+ blocks_d_indices = g.op('Range', g.op('Constant', value_t=torch.tensor(0)),
+ blocks_d,
+ g.op('Constant', value_t=torch.tensor(stride_d)))
+
+ # Apply dilation on kernel and find its indices along dim d
+ kernel_grid = np.arange(0, kernel_size_d * dilation_d, dilation_d)
+ kernel_grid = g.op('Constant', value_t=torch.tensor([kernel_grid]))
+
+ # Broadcast and add kernel staring positions (indices) with
+ # kernel_grid along dim d, to get block indices along dim d
+ blocks_d_indices = g.op(
+ 'Unsqueeze', blocks_d_indices, axes_i=[0]) # Reshape to [1, -1]
+ kernel_mask = g.op('Reshape', kernel_grid,
+ g.op('Constant', value_t=torch.tensor([-1, 1])))
+ block_mask = g.op('Add', blocks_d_indices, kernel_mask)
+
+ return block_mask
+
+
+def _get_im2col_padded_input(g, input, padding_h, padding_w):
+ # Input is always 4-D tensor (N, C, H, W)
+ # Padding tensor has the following format: (padding_h, padding_w)
+ # Reshape the padding to follow ONNX format:
+ # (dim1_begin, dim2_begin,...,dim1_end, dim2_end,...)
+ pad = g.op(
+ 'Constant', value_t=torch.LongTensor([0, 0, padding_h, padding_w] * 2))
+ return g.op('Pad', input, pad)
+
+
+def _get_im2col_output_shape(g, input, kernel_h, kernel_w):
+ batch_dim = size(g, input, g.op('Constant', value_t=torch.tensor(0)))
+ channel_dim = size(g, input, g.op('Constant', value_t=torch.tensor(1)))
+ channel_unfolded = g.op(
+ 'Mul', channel_dim,
+ g.op('Constant', value_t=torch.tensor(kernel_h * kernel_w)))
+
+ return g.op(
+ 'Concat',
+ g.op('Unsqueeze', batch_dim, axes_i=[0]),
+ g.op('Unsqueeze', channel_unfolded, axes_i=[0]),
+ g.op('Constant', value_t=torch.tensor([-1])),
+ axis_i=0)
+
+
+def size(g, self, dim=None):
+ if dim is None:
+ return g.op('Shape', self)
+ return sym_help._size_helper(g, self, dim)
+
+
+@parse_args('v', 'is', 'is', 'is', 'is')
+def im2col(g, input, kernel_size, dilation, padding, stride):
+ # Input is always 4-D tensor (N, C, H, W)
+ # All other args are int[2]
+
+ input_h = size(g, input, g.op('Constant', value_t=torch.tensor(2)))
+ input_w = size(g, input, g.op('Constant', value_t=torch.tensor(3)))
+
+ stride_h, stride_w = stride[0], stride[1]
+ padding_h, padding_w = padding[0], padding[1]
+ dilation_h, dilation_w = dilation[0], dilation[1]
+ kernel_h, kernel_w = kernel_size[0], kernel_size[1]
+
+ blocks_row_indices = _get_im2col_indices_along_dim(g, input_h, kernel_h,
+ dilation_h, padding_h,
+ stride_h)
+ blocks_col_indices = _get_im2col_indices_along_dim(g, input_w, kernel_w,
+ dilation_w, padding_w,
+ stride_w)
+
+ output_shape = _get_im2col_output_shape(g, input, kernel_h, kernel_w)
+ padded_input = _get_im2col_padded_input(g, input, padding_h, padding_w)
+
+ output = g.op('Gather', padded_input, blocks_row_indices, axis_i=2)
+ output = g.op('Gather', output, blocks_col_indices, axis_i=4)
+ output = g.op('Transpose', output, perm_i=[0, 1, 2, 4, 3, 5])
+ return g.op('Reshape', output, output_shape)
+
+
+@parse_args('v', 'i')
+def one_hot(g, self, num_classes):
+ values = g.op('Constant', value_t=torch.LongTensor([0, 1]))
+ depth = g.op('Constant', value_t=torch.LongTensor([num_classes]))
+ return g.op('OneHot', self, depth, values, axis_i=-1)
+
+
+@parse_args('v', 'i', 'none')
+def softmax(g, input, dim, dtype=None):
+ input_dim = input.type().dim()
+ if input_dim:
+ # TODO: remove this as onnx opset 11 spec allows negative axes
+ if dim < 0:
+ dim = input_dim + dim
+ if input_dim == dim + 1:
+ softmax = g.op('Softmax', input, axis_i=dim)
+ if dtype and dtype.node().kind() != 'prim::Constant':
+ parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype')
+ softmax = g.op(
+ 'Cast',
+ softmax,
+ to_i=sym_help.scalar_type_to_onnx[parsed_dtype])
+ return softmax
+
+ max_value = g.op('ReduceMax', input, axes_i=[dim], keepdims_i=1)
+ input = g.op('Sub', input, max_value)
+ exp = g.op('Exp', input)
+ sum = g.op('ReduceSum', exp, axes_i=[dim])
+ softmax = g.op('Div', exp, sum)
+ if dtype and dtype.node().kind() != 'prim::Constant':
+ parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype')
+ softmax = g.op(
+ 'Cast', softmax, to_i=sym_help.scalar_type_to_onnx[parsed_dtype])
+ return softmax
+
+
+def _adaptive_pool(name, type, tuple_fn, fn=None):
+
+ @parse_args('v', 'is')
+ def symbolic_fn(g, input, output_size):
+ if output_size == [1] * len(output_size) and type == 'AveragePool':
+ return g.op('GlobalAveragePool', input)
+ if not input.isCompleteTensor():
+ if output_size == [1] * len(output_size):
+ return g.op('GlobalMaxPool', input), None
+ raise NotImplementedError(
+ '[Adaptive pool]:input size not accessible')
+ dim = input.type().sizes()[2:]
+ if output_size == [1] * len(output_size) and type == 'MaxPool':
+ return g.op('GlobalMaxPool', input), None
+
+ # compute stride = floor(input_size / output_size)
+ s = [int(dim[i] / output_size[i]) for i in range(0, len(dim))]
+
+ # compute kernel_size = input_size - (output_size - 1) * stride
+ k = [dim[i] - (output_size[i] - 1) * s[i] for i in range(0, len(dim))]
+
+ # call max_poolxd_with_indices to get indices in the output
+ if type == 'MaxPool':
+ return fn(g, input, k, k, (0, ) * len(dim), (1, ) * len(dim),
+ False)
+ output = g.op(
+ type,
+ input,
+ kernel_shape_i=tuple_fn(k),
+ strides_i=tuple_fn(s),
+ ceil_mode_i=False)
+ return output
+
+ return symbolic_fn
+
+
+adaptive_avg_pool1d = _adaptive_pool('adaptive_avg_pool1d', 'AveragePool',
+ _single)
+adaptive_avg_pool2d = _adaptive_pool('adaptive_avg_pool2d', 'AveragePool',
+ _pair)
+adaptive_avg_pool3d = _adaptive_pool('adaptive_avg_pool3d', 'AveragePool',
+ _triple)
+
+
+def new_full(g,
+ self,
+ size,
+ fill_value,
+ dtype,
+ layout,
+ device,
+ pin_memory=False):
+ from torch.onnx.symbolic_opset9 import full
+ if dtype is None and self.isCompleteTensor():
+ dtype = self.type().scalarType()
+ dtype = sym_help.scalar_type_to_onnx.index(
+ sym_help.cast_pytorch_to_onnx[dtype])
+ return full(g, size, fill_value, dtype, layout, device, pin_memory)
+
+
+@parse_args('v', 'v', 'i', 'i', 'i')
+def grid_sampler(g,
+ input,
+ grid,
+ interpolation_mode,
+ padding_mode,
+ align_corners=False):
+ return g.op(
+ 'mmcv::grid_sampler',
+ input,
+ grid,
+ interpolation_mode_i=interpolation_mode,
+ padding_mode_i=padding_mode,
+ align_corners_i=align_corners)
+
+
+@parse_args('v', 'i')
+def cummax(g, input, dim):
+ return g.op('mmcv::cummax', input, dim_i=dim, outputs=2)
+
+
+@parse_args('v', 'i')
+def cummin(g, input, dim):
+ return g.op('mmcv::cummin', input, dim_i=dim, outputs=2)
+
+
+@parse_args('v', 'v', 'is')
+def roll(g, input, shifts, dims):
+ from torch.onnx.symbolic_opset9 import squeeze
+ from packaging import version
+ input_shape = g.op('Shape', input)
+
+ need_flatten = len(dims) == 0
+ # If dims is not specified, the tensor will be flattened before
+ # rolling and then restored to the original shape.
+ if need_flatten:
+ resize_shape = input_shape
+ input = g.op('Reshape', input,
+ g.op('Constant', value_t=torch.LongTensor([1, -1])))
+ input_shape = g.op('Shape', input)
+ dims = [1]
+
+ for index, dim in enumerate(dims):
+ end_size = sym_help._slice_helper(
+ g, input_shape, axes=[0], ends=[dim + 1], starts=[dim])
+ shift_size = sym_help._slice_helper(
+ g, shifts, axes=[0], ends=[index + 1], starts=[index])
+ slice_size = g.op('Sub', end_size, shift_size)
+
+ # Can not use Mod because tensorrt does not support
+ div_size = g.op('Div', slice_size, end_size)
+ slice_size = g.op('Sub', slice_size, g.op('Mul', end_size, div_size))
+
+ if version.parse(torch.__version__) >= version.parse('1.7.0'):
+ # add dim=0 for pytorch 1.9.0
+ end_size = squeeze(g, end_size, 0)
+ slice_size = squeeze(g, slice_size, 0)
+ else:
+ end_size = g.op('Squeeze', end_size)
+ slice_size = g.op('Squeeze', slice_size)
+ dim = torch.LongTensor([dim])
+
+ input_slice0 = sym_help._slice_helper(
+ g,
+ input,
+ axes=dim,
+ starts=torch.LongTensor([0]),
+ ends=slice_size,
+ dynamic_slice=True)
+ input_slice1 = sym_help._slice_helper(
+ g,
+ input,
+ axes=dim,
+ ends=end_size,
+ starts=slice_size,
+ dynamic_slice=True)
+
+ input = g.op('Concat', input_slice1, input_slice0, axis_i=dim)
+
+ if need_flatten:
+ input = g.op('Reshape', input, resize_shape)
+
+ return input
+
+
+def register_extra_symbolics(opset=11):
+ register_op('one_hot', one_hot, '', opset)
+ register_op('im2col', im2col, '', opset)
+ register_op('topk', topk, '', opset)
+ register_op('softmax', softmax, '', opset)
+ register_op('constant_pad_nd', constant_pad_nd, '', opset)
+ register_op('reflection_pad1d', reflection_pad1d, '', opset)
+ register_op('reflection_pad2d', reflection_pad2d, '', opset)
+ register_op('reflection_pad3d', reflection_pad3d, '', opset)
+ register_op('avg_pool1d', avg_pool1d, '', opset)
+ register_op('avg_pool2d', avg_pool2d, '', opset)
+ register_op('avg_pool3d', avg_pool3d, '', opset)
+ register_op('adaptive_avg_pool1d', adaptive_avg_pool1d, '', opset)
+ register_op('adaptive_avg_pool2d', adaptive_avg_pool2d, '', opset)
+ register_op('adaptive_avg_pool3d', adaptive_avg_pool3d, '', opset)
+ register_op('masked_select', masked_select, '', opset)
+ register_op('upsample_nearest1d', upsample_nearest1d, '', opset)
+ register_op('upsample_nearest2d', upsample_nearest2d, '', opset)
+ register_op('upsample_nearest3d', upsample_nearest3d, '', opset)
+ register_op('upsample_linear1d', upsample_linear1d, '', opset)
+ register_op('upsample_bilinear2d', upsample_bilinear2d, '', opset)
+ register_op('upsample_trilinear3d', upsample_trilinear3d, '', opset)
+ register_op('upsample_bicubic2d', upsample_bicubic2d, '', opset)
+ register_op('new_full', new_full, '', opset)
+ register_op('grid_sampler', grid_sampler, '', opset)
+ register_op('cummax', cummax, '', opset)
+ register_op('cummin', cummin, '', opset)
+ register_op('roll', roll, '', opset)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/__init__.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f1f6e5802809af20265bef5e8e6c5429b07f784
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/__init__.py
@@ -0,0 +1,103 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .active_rotated_filter import active_rotated_filter
+from .assign_score_withk import assign_score_withk
+from .ball_query import ball_query
+from .bbox import bbox_overlaps
+from .border_align import BorderAlign, border_align
+from .box_iou_rotated import box_iou_rotated
+from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive
+from .cc_attention import CrissCrossAttention
+from .contour_expand import contour_expand
+from .convex_iou import convex_giou, convex_iou
+from .corner_pool import CornerPool
+from .correlation import Correlation
+from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d
+from .deform_roi_pool import (DeformRoIPool, DeformRoIPoolPack,
+ ModulatedDeformRoIPoolPack, deform_roi_pool)
+from .deprecated_wrappers import Conv2d_deprecated as Conv2d
+from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d
+from .deprecated_wrappers import Linear_deprecated as Linear
+from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d
+from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss,
+ sigmoid_focal_loss, softmax_focal_loss)
+from .furthest_point_sample import (furthest_point_sample,
+ furthest_point_sample_with_dist)
+from .fused_bias_leakyrelu import FusedBiasLeakyReLU, fused_bias_leakyrelu
+from .gather_points import gather_points
+from .group_points import GroupAll, QueryAndGroup, grouping_operation
+from .info import (get_compiler_version, get_compiling_cuda_version,
+ get_onnxruntime_op_path)
+from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev
+from .knn import knn
+from .masked_conv import MaskedConv2d, masked_conv2d
+from .min_area_polygons import min_area_polygons
+from .modulated_deform_conv import (ModulatedDeformConv2d,
+ ModulatedDeformConv2dPack,
+ modulated_deform_conv2d)
+from .multi_scale_deform_attn import MultiScaleDeformableAttention
+from .nms import batched_nms, nms, nms_match, nms_rotated, soft_nms
+from .pixel_group import pixel_group
+from .point_sample import (SimpleRoIAlign, point_sample,
+ rel_roi_point_to_rel_img_point)
+from .points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu,
+ points_in_boxes_part)
+from .points_in_polygons import points_in_polygons
+from .points_sampler import PointsSampler
+from .psa_mask import PSAMask
+from .riroi_align_rotated import RiRoIAlignRotated, riroi_align_rotated
+from .roi_align import RoIAlign, roi_align
+from .roi_align_rotated import RoIAlignRotated, roi_align_rotated
+from .roi_pool import RoIPool, roi_pool
+from .roiaware_pool3d import RoIAwarePool3d
+from .roipoint_pool3d import RoIPointPool3d
+from .rotated_feature_align import rotated_feature_align
+from .saconv import SAConv2d
+from .scatter_points import DynamicScatter, dynamic_scatter
+from .sync_bn import SyncBatchNorm
+from .three_interpolate import three_interpolate
+from .three_nn import three_nn
+from .tin_shift import TINShift, tin_shift
+from .upfirdn2d import upfirdn2d
+from .voxelize import Voxelization, voxelization
+
+__all__ = [
+ 'bbox_overlaps', 'CARAFE', 'CARAFENaive', 'CARAFEPack', 'carafe',
+ 'carafe_naive', 'CornerPool', 'DeformConv2d', 'DeformConv2dPack',
+ 'deform_conv2d', 'DeformRoIPool', 'DeformRoIPoolPack',
+ 'ModulatedDeformRoIPoolPack', 'deform_roi_pool', 'SigmoidFocalLoss',
+ 'SoftmaxFocalLoss', 'sigmoid_focal_loss', 'softmax_focal_loss',
+ 'get_compiler_version', 'get_compiling_cuda_version',
+ 'get_onnxruntime_op_path', 'MaskedConv2d', 'masked_conv2d',
+ 'ModulatedDeformConv2d', 'ModulatedDeformConv2dPack',
+ 'modulated_deform_conv2d', 'batched_nms', 'nms', 'soft_nms', 'nms_match',
+ 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 'SyncBatchNorm', 'Conv2d',
+ 'ConvTranspose2d', 'Linear', 'MaxPool2d', 'CrissCrossAttention', 'PSAMask',
+ 'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign',
+ 'SAConv2d', 'TINShift', 'tin_shift', 'assign_score_withk',
+ 'box_iou_rotated', 'RoIPointPool3d', 'nms_rotated', 'knn', 'ball_query',
+ 'upfirdn2d', 'FusedBiasLeakyReLU', 'fused_bias_leakyrelu',
+ 'rotated_feature_align', 'RiRoIAlignRotated', 'riroi_align_rotated',
+ 'RoIAlignRotated', 'roi_align_rotated', 'pixel_group', 'QueryAndGroup',
+ 'GroupAll', 'grouping_operation', 'contour_expand', 'three_nn',
+ 'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign',
+ 'border_align', 'gather_points', 'furthest_point_sample',
+ 'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation',
+ 'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization',
+ 'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d',
+ 'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all',
+ 'points_in_polygons', 'min_area_polygons', 'active_rotated_filter',
+ 'convex_iou', 'convex_giou'
+]
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/active_rotated_filter.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/active_rotated_filter.py
new file mode 100644
index 0000000000000000000000000000000000000000..8acda1969bb4566a3464cfff44b2f14da3b0cb17
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/active_rotated_filter.py
@@ -0,0 +1,74 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext(
+ '_ext',
+ ['active_rotated_filter_forward', 'active_rotated_filter_backward'])
+
+
+class ActiveRotatedFilterFunction(Function):
+ """Encoding the orientation information and generating orientation-
+ sensitive features.
+
+ The details are described in the paper `Align Deep Features for Oriented
+ Object Detection _`.
+ """
+
+ @staticmethod
+ def forward(ctx, input, indices):
+ """
+ Args:
+ input (torch.Tensor): Input features with shape
+ [num_output_planes, num_input_planes, num_orientations, H, W].
+ indices (torch.Tensor): Indices with shape
+ [num_orientations, H, W, num_rotations].
+
+ Returns:
+ torch.Tensor: Refined features with shape [num_output_planes *
+ num_rotations, num_input_planes * num_orientations, H, W].
+ """
+ ctx.save_for_backward(input, indices)
+ op, ip, o, h, w = input.size()
+ o, h, w, r = indices.size()
+ output = input.new_zeros((op * r, ip * o, h, w))
+ ext_module.active_rotated_filter_forward(input, indices, output)
+
+ return output
+
+ @staticmethod
+ @once_differentiable
+ def backward(ctx, grad_out):
+ """
+ Args:
+ grad_output (torch.Tensor): The gradiant of output features
+ with shape [num_output_planes * num_rotations,
+ num_input_planes * num_orientations, H, W].
+
+ Returns:
+ torch.Tensor: The gradiant of input features with shape
+ [num_output_planes, num_input_planes, num_orientations, H, W].
+ """
+ input, indices = ctx.saved_tensors
+ grad_in = torch.zeros_like(input)
+ ext_module.active_rotated_filter_backward(grad_out, indices, grad_in)
+ return grad_in, None
+
+
+active_rotated_filter = ActiveRotatedFilterFunction.apply
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/assign_score_withk.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/assign_score_withk.py
new file mode 100644
index 0000000000000000000000000000000000000000..a631bbbe4c9693b67f13a87dae5d4fa6d0018180
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/assign_score_withk.py
@@ -0,0 +1,140 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from torch.autograd import Function
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext(
+ '_ext', ['assign_score_withk_forward', 'assign_score_withk_backward'])
+
+
+class AssignScoreWithK(Function):
+ r"""Perform weighted sum to generate output features according to scores.
+ Modified from `PAConv `_.
+
+ This is a memory-efficient CUDA implementation of assign_scores operation,
+ which first transform all point features with weight bank, then assemble
+ neighbor features with ``knn_idx`` and perform weighted sum of ``scores``.
+
+ See the `paper `_ appendix Sec. D for
+ more detailed descriptions.
+
+ Note:
+ This implementation assumes using ``neighbor`` kernel input, which is
+ (point_features - center_features, point_features).
+ See https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/model/
+ pointnet2/paconv.py#L128 for more details.
+ """
+
+ @staticmethod
+ def forward(ctx,
+ scores,
+ point_features,
+ center_features,
+ knn_idx,
+ aggregate='sum'):
+ """
+ Args:
+ scores (torch.Tensor): (B, npoint, K, M), predicted scores to
+ aggregate weight matrices in the weight bank.
+ ``npoint`` is the number of sampled centers.
+ ``K`` is the number of queried neighbors.
+ ``M`` is the number of weight matrices in the weight bank.
+ point_features (torch.Tensor): (B, N, M, out_dim)
+ Pre-computed point features to be aggregated.
+ center_features (torch.Tensor): (B, N, M, out_dim)
+ Pre-computed center features to be aggregated.
+ knn_idx (torch.Tensor): (B, npoint, K), index of sampled kNN.
+ We assume the first idx in each row is the idx of the center.
+ aggregate (str, optional): Aggregation method.
+ Can be 'sum', 'avg' or 'max'. Defaults: 'sum'.
+
+ Returns:
+ torch.Tensor: (B, out_dim, npoint, K), the aggregated features.
+ """
+ agg = {'sum': 0, 'avg': 1, 'max': 2}
+
+ B, N, M, out_dim = point_features.size()
+ _, npoint, K, _ = scores.size()
+
+ output = point_features.new_zeros((B, out_dim, npoint, K))
+ ext_module.assign_score_withk_forward(
+ point_features.contiguous(),
+ center_features.contiguous(),
+ scores.contiguous(),
+ knn_idx.contiguous(),
+ output,
+ B=B,
+ N0=N,
+ N1=npoint,
+ M=M,
+ K=K,
+ O=out_dim,
+ aggregate=agg[aggregate])
+
+ ctx.save_for_backward(output, point_features, center_features, scores,
+ knn_idx)
+ ctx.agg = agg[aggregate]
+
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_out):
+ """
+ Args:
+ grad_out (torch.Tensor): (B, out_dim, npoint, K)
+
+ Returns:
+ tuple[torch.Tensor]: A tuple contains five elements. The first one
+ is the gradient of ``scores`` whose shape is (B, npoint, K, M). The
+ second is the gradient of ``point_features`` whose shape is
+ (B, N, M, out_dim). The third is the gradient of
+ ``center_features`` with the shape of (B, N, M, out_dim). The last
+ two are ``None``.
+ """
+ _, point_features, center_features, scores, knn_idx = ctx.saved_tensors
+
+ agg = ctx.agg
+
+ B, N, M, out_dim = point_features.size()
+ _, npoint, K, _ = scores.size()
+
+ grad_point_features = point_features.new_zeros(point_features.shape)
+ grad_center_features = center_features.new_zeros(center_features.shape)
+ grad_scores = scores.new_zeros(scores.shape)
+
+ ext_module.assign_score_withk_backward(
+ grad_out.contiguous(),
+ point_features.contiguous(),
+ center_features.contiguous(),
+ scores.contiguous(),
+ knn_idx.contiguous(),
+ grad_point_features,
+ grad_center_features,
+ grad_scores,
+ B=B,
+ N0=N,
+ N1=npoint,
+ M=M,
+ K=K,
+ O=out_dim,
+ aggregate=agg)
+
+ return grad_scores, grad_point_features, \
+ grad_center_features, None, None
+
+
+assign_score_withk = AssignScoreWithK.apply
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/ball_query.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/ball_query.py
new file mode 100644
index 0000000000000000000000000000000000000000..4495f5a6eb4efb8b1c41a2d5eeca4e8bcc91da22
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/ball_query.py
@@ -0,0 +1,69 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from torch.autograd import Function
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', ['ball_query_forward'])
+
+
+class BallQuery(Function):
+ """Find nearby points in spherical space."""
+
+ @staticmethod
+ def forward(ctx, min_radius: float, max_radius: float, sample_num: int,
+ xyz: torch.Tensor, center_xyz: torch.Tensor) -> torch.Tensor:
+ """
+ Args:
+ min_radius (float): minimum radius of the balls.
+ max_radius (float): maximum radius of the balls.
+ sample_num (int): maximum number of features in the balls.
+ xyz (Tensor): (B, N, 3) xyz coordinates of the features.
+ center_xyz (torch.Tensor): (B, npoint, 3) centers of the ball
+ query.
+
+ Returns:
+ torch.Tensor: (B, npoint, nsample) tensor with the indices of the
+ features that form the query balls.
+ """
+ assert center_xyz.is_contiguous()
+ assert xyz.is_contiguous()
+ assert min_radius < max_radius
+
+ B, N, _ = xyz.size()
+ npoint = center_xyz.size(1)
+ idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int)
+
+ ext_module.ball_query_forward(
+ center_xyz,
+ xyz,
+ idx,
+ b=B,
+ n=N,
+ m=npoint,
+ min_radius=min_radius,
+ max_radius=max_radius,
+ nsample=sample_num)
+ if torch.__version__ != 'parrots':
+ ctx.mark_non_differentiable(idx)
+ return idx
+
+ @staticmethod
+ def backward(ctx, a=None):
+ return None, None, None, None
+
+
+ball_query = BallQuery.apply
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/bbox.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/bbox.py
new file mode 100644
index 0000000000000000000000000000000000000000..e23c228b3eef175a813ed49c54c0bf5ade6a35f6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/bbox.py
@@ -0,0 +1,87 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps'])
+
+
+def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0):
+ """Calculate overlap between two set of bboxes.
+
+ If ``aligned`` is ``False``, then calculate the ious between each bbox
+ of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
+ bboxes1 and bboxes2.
+
+ Args:
+ bboxes1 (torch.Tensor): shape (m, 4) in format or
+ empty.
+ bboxes2 (torch.Tensor): shape (n, 4) in format or
+ empty. If aligned is ``True``, then m and n must be equal.
+ mode (str): "iou" (intersection over union) or iof (intersection over
+ foreground).
+
+ Returns:
+ torch.Tensor: Return the ious betweens boxes. If ``aligned`` is
+ ``False``, the shape of ious is (m, n) else (m, 1).
+
+ Example:
+ >>> bboxes1 = torch.FloatTensor([
+ >>> [0, 0, 10, 10],
+ >>> [10, 10, 20, 20],
+ >>> [32, 32, 38, 42],
+ >>> ])
+ >>> bboxes2 = torch.FloatTensor([
+ >>> [0, 0, 10, 20],
+ >>> [0, 10, 10, 19],
+ >>> [10, 10, 20, 20],
+ >>> ])
+ >>> bbox_overlaps(bboxes1, bboxes2)
+ tensor([[0.5000, 0.0000, 0.0000],
+ [0.0000, 0.0000, 1.0000],
+ [0.0000, 0.0000, 0.0000]])
+
+ Example:
+ >>> empty = torch.FloatTensor([])
+ >>> nonempty = torch.FloatTensor([
+ >>> [0, 0, 10, 9],
+ >>> ])
+ >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)
+ >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)
+ >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)
+ """
+
+ mode_dict = {'iou': 0, 'iof': 1}
+ assert mode in mode_dict.keys()
+ mode_flag = mode_dict[mode]
+ # Either the boxes are empty or the length of boxes' last dimension is 4
+ assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0)
+ assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0)
+ assert offset == 1 or offset == 0
+
+ rows = bboxes1.size(0)
+ cols = bboxes2.size(0)
+ if aligned:
+ assert rows == cols
+
+ if rows * cols == 0:
+ return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols)
+
+ if aligned:
+ ious = bboxes1.new_zeros(rows)
+ else:
+ ious = bboxes1.new_zeros((rows, cols))
+ ext_module.bbox_overlaps(
+ bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset)
+ return ious
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/border_align.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/border_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..03a35d93c6a9947ed2292abd6cdc180144f8041e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/border_align.py
@@ -0,0 +1,122 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# modified from
+# https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/border_align.py
+
+import torch
+import torch.nn as nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext(
+ '_ext', ['border_align_forward', 'border_align_backward'])
+
+
+class BorderAlignFunction(Function):
+
+ @staticmethod
+ def symbolic(g, input, boxes, pool_size):
+ return g.op(
+ 'mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size)
+
+ @staticmethod
+ def forward(ctx, input, boxes, pool_size):
+ ctx.pool_size = pool_size
+ ctx.input_shape = input.size()
+
+ assert boxes.ndim == 3, 'boxes must be with shape [B, H*W, 4]'
+ assert boxes.size(2) == 4, \
+ 'the last dimension of boxes must be (x1, y1, x2, y2)'
+ assert input.size(1) % 4 == 0, \
+ 'the channel for input feature must be divisible by factor 4'
+
+ # [B, C//4, H*W, 4]
+ output_shape = (input.size(0), input.size(1) // 4, boxes.size(1), 4)
+ output = input.new_zeros(output_shape)
+ # `argmax_idx` only used for backward
+ argmax_idx = input.new_zeros(output_shape).to(torch.int)
+
+ ext_module.border_align_forward(
+ input, boxes, output, argmax_idx, pool_size=ctx.pool_size)
+
+ ctx.save_for_backward(boxes, argmax_idx)
+ return output
+
+ @staticmethod
+ @once_differentiable
+ def backward(ctx, grad_output):
+ boxes, argmax_idx = ctx.saved_tensors
+ grad_input = grad_output.new_zeros(ctx.input_shape)
+ # complex head architecture may cause grad_output uncontiguous
+ grad_output = grad_output.contiguous()
+ ext_module.border_align_backward(
+ grad_output,
+ boxes,
+ argmax_idx,
+ grad_input,
+ pool_size=ctx.pool_size)
+ return grad_input, None, None
+
+
+border_align = BorderAlignFunction.apply
+
+
+class BorderAlign(nn.Module):
+ r"""Border align pooling layer.
+
+ Applies border_align over the input feature based on predicted bboxes.
+ The details were described in the paper
+ `BorderDet: Border Feature for Dense Object Detection
+ `_.
+
+ For each border line (e.g. top, left, bottom or right) of each box,
+ border_align does the following:
+
+ 1. uniformly samples ``pool_size`` +1 positions on this line, involving
+ the start and end points.
+ 2. the corresponding features on these points are computed by bilinear
+ interpolation.
+ 3. max pooling over all the ``pool_size`` +1 positions are used for
+ computing pooled feature.
+
+ Args:
+ pool_size (int): number of positions sampled over the boxes' borders
+ (e.g. top, bottom, left, right).
+ """
+
+ def __init__(self, pool_size):
+ super(BorderAlign, self).__init__()
+ self.pool_size = pool_size
+
+ def forward(self, input, boxes):
+ """
+ Args:
+ input: Features with shape [N,4C,H,W]. Channels ranged in [0,C),
+ [C,2C), [2C,3C), [3C,4C) represent the top, left, bottom,
+ right features respectively.
+ boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2).
+
+ Returns:
+ torch.Tensor: Pooled features with shape [N,C,H*W,4]. The order is
+ (top,left,bottom,right) for the last dimension.
+ """
+ return border_align(input, boxes, self.pool_size)
+
+ def __repr__(self):
+ s = self.__class__.__name__
+ s += f'(pool_size={self.pool_size})'
+ return s
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/box_iou_rotated.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/box_iou_rotated.py
new file mode 100644
index 0000000000000000000000000000000000000000..29b6631c24893b99b2b7d4beabfec734c080d6d4
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/box_iou_rotated.py
@@ -0,0 +1,159 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated'])
+
+
+def box_iou_rotated(bboxes1,
+ bboxes2,
+ mode='iou',
+ aligned=False,
+ clockwise=True):
+ """Return intersection-over-union (Jaccard index) of boxes.
+
+ Both sets of boxes are expected to be in
+ (x_center, y_center, width, height, angle) format.
+
+ If ``aligned`` is ``False``, then calculate the ious between each bbox
+ of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
+ bboxes1 and bboxes2.
+
+ .. note::
+ The operator assumes:
+
+ 1) The positive direction along x axis is left -> right.
+
+ 2) The positive direction along y axis is top -> down.
+
+ 3) The w border is in parallel with x axis when angle = 0.
+
+ However, there are 2 opposite definitions of the positive angular
+ direction, clockwise (CW) and counter-clockwise (CCW). MMCV supports
+ both definitions and uses CW by default.
+
+ Please set ``clockwise=False`` if you are using the CCW definition.
+
+ The coordinate system when ``clockwise`` is ``True`` (default)
+
+ .. code-block:: none
+
+ 0-------------------> x (0 rad)
+ | A-------------B
+ | | |
+ | | box h
+ | | angle=0 |
+ | D------w------C
+ v
+ y (pi/2 rad)
+
+ In such coordination system the rotation matrix is
+
+ .. math::
+ \\begin{pmatrix}
+ \\cos\\alpha & -\\sin\\alpha \\\\
+ \\sin\\alpha & \\cos\\alpha
+ \\end{pmatrix}
+
+ The coordinates of the corner point A can be calculated as:
+
+ .. math::
+ P_A=
+ \\begin{pmatrix} x_A \\\\ y_A\\end{pmatrix}
+ =
+ \\begin{pmatrix} x_{center} \\\\ y_{center}\\end{pmatrix} +
+ \\begin{pmatrix}\\cos\\alpha & -\\sin\\alpha \\\\
+ \\sin\\alpha & \\cos\\alpha\\end{pmatrix}
+ \\begin{pmatrix} -0.5w \\\\ -0.5h\\end{pmatrix} \\\\
+ =
+ \\begin{pmatrix} x_{center}-0.5w\\cos\\alpha+0.5h\\sin\\alpha
+ \\\\
+ y_{center}-0.5w\\sin\\alpha-0.5h\\cos\\alpha\\end{pmatrix}
+
+
+ The coordinate system when ``clockwise`` is ``False``
+
+ .. code-block:: none
+
+ 0-------------------> x (0 rad)
+ | A-------------B
+ | | |
+ | | box h
+ | | angle=0 |
+ | D------w------C
+ v
+ y (-pi/2 rad)
+
+ In such coordination system the rotation matrix is
+
+ .. math::
+ \\begin{pmatrix}
+ \\cos\\alpha & \\sin\\alpha \\\\
+ -\\sin\\alpha & \\cos\\alpha
+ \\end{pmatrix}
+
+ The coordinates of the corner point A can be calculated as:
+
+ .. math::
+ P_A=
+ \\begin{pmatrix} x_A \\\\ y_A\\end{pmatrix}
+ =
+ \\begin{pmatrix} x_{center} \\\\ y_{center}\\end{pmatrix} +
+ \\begin{pmatrix}\\cos\\alpha & \\sin\\alpha \\\\
+ -\\sin\\alpha & \\cos\\alpha\\end{pmatrix}
+ \\begin{pmatrix} -0.5w \\\\ -0.5h\\end{pmatrix} \\\\
+ =
+ \\begin{pmatrix} x_{center}-0.5w\\cos\\alpha-0.5h\\sin\\alpha
+ \\\\
+ y_{center}+0.5w\\sin\\alpha-0.5h\\cos\\alpha\\end{pmatrix}
+
+ Args:
+ boxes1 (torch.Tensor): rotated bboxes 1. It has shape (N, 5),
+ indicating (x, y, w, h, theta) for each row. Note that theta is in
+ radian.
+ boxes2 (torch.Tensor): rotated bboxes 2. It has shape (M, 5),
+ indicating (x, y, w, h, theta) for each row. Note that theta is in
+ radian.
+ mode (str): "iou" (intersection over union) or iof (intersection over
+ foreground).
+ clockwise (bool): flag indicating whether the positive angular
+ orientation is clockwise. default True.
+ `New in version 1.4.3.`
+
+ Returns:
+ torch.Tensor: Return the ious betweens boxes. If ``aligned`` is
+ ``False``, the shape of ious is (N, M) else (N,).
+ """
+ assert mode in ['iou', 'iof']
+ mode_dict = {'iou': 0, 'iof': 1}
+ mode_flag = mode_dict[mode]
+ rows = bboxes1.size(0)
+ cols = bboxes2.size(0)
+ if aligned:
+ ious = bboxes1.new_zeros(rows)
+ else:
+ ious = bboxes1.new_zeros((rows * cols))
+ if not clockwise:
+ flip_mat = bboxes1.new_ones(bboxes1.shape[-1])
+ flip_mat[-1] = -1
+ bboxes1 = bboxes1 * flip_mat
+ bboxes2 = bboxes2 * flip_mat
+ bboxes1 = bboxes1.contiguous()
+ bboxes2 = bboxes2.contiguous()
+ ext_module.box_iou_rotated(
+ bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned)
+ if not aligned:
+ ious = ious.view(rows, cols)
+ return ious
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/carafe.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/carafe.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d185f40778d064cc57474fc23fb71163017e5b6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/carafe.py
@@ -0,0 +1,301 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Function
+from torch.nn.modules.module import Module
+
+from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', [
+ 'carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward',
+ 'carafe_backward'
+])
+
+
+class CARAFENaiveFunction(Function):
+
+ @staticmethod
+ def symbolic(g, features, masks, kernel_size, group_size, scale_factor):
+ return g.op(
+ 'mmcv::MMCVCARAFENaive',
+ features,
+ masks,
+ kernel_size_i=kernel_size,
+ group_size_i=group_size,
+ scale_factor_f=scale_factor)
+
+ @staticmethod
+ def forward(ctx, features, masks, kernel_size, group_size, scale_factor):
+ assert scale_factor >= 1
+ assert masks.size(1) == kernel_size * kernel_size * group_size
+ assert masks.size(-1) == features.size(-1) * scale_factor
+ assert masks.size(-2) == features.size(-2) * scale_factor
+ assert features.size(1) % group_size == 0
+ assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1
+ ctx.kernel_size = kernel_size
+ ctx.group_size = group_size
+ ctx.scale_factor = scale_factor
+ ctx.feature_size = features.size()
+ ctx.mask_size = masks.size()
+
+ n, c, h, w = features.size()
+ output = features.new_zeros((n, c, h * scale_factor, w * scale_factor))
+ ext_module.carafe_naive_forward(
+ features,
+ masks,
+ output,
+ kernel_size=kernel_size,
+ group_size=group_size,
+ scale_factor=scale_factor)
+
+ if features.requires_grad or masks.requires_grad:
+ ctx.save_for_backward(features, masks)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ assert grad_output.is_cuda
+
+ features, masks = ctx.saved_tensors
+ kernel_size = ctx.kernel_size
+ group_size = ctx.group_size
+ scale_factor = ctx.scale_factor
+
+ grad_input = torch.zeros_like(features)
+ grad_masks = torch.zeros_like(masks)
+ ext_module.carafe_naive_backward(
+ grad_output.contiguous(),
+ features,
+ masks,
+ grad_input,
+ grad_masks,
+ kernel_size=kernel_size,
+ group_size=group_size,
+ scale_factor=scale_factor)
+
+ return grad_input, grad_masks, None, None, None
+
+
+carafe_naive = CARAFENaiveFunction.apply
+
+
+class CARAFENaive(Module):
+
+ def __init__(self, kernel_size, group_size, scale_factor):
+ super(CARAFENaive, self).__init__()
+
+ assert isinstance(kernel_size, int) and isinstance(
+ group_size, int) and isinstance(scale_factor, int)
+ self.kernel_size = kernel_size
+ self.group_size = group_size
+ self.scale_factor = scale_factor
+
+ def forward(self, features, masks):
+ return carafe_naive(features, masks, self.kernel_size, self.group_size,
+ self.scale_factor)
+
+
+class CARAFEFunction(Function):
+
+ @staticmethod
+ def symbolic(g, features, masks, kernel_size, group_size, scale_factor):
+ return g.op(
+ 'mmcv::MMCVCARAFE',
+ features,
+ masks,
+ kernel_size_i=kernel_size,
+ group_size_i=group_size,
+ scale_factor_f=scale_factor)
+
+ @staticmethod
+ def forward(ctx, features, masks, kernel_size, group_size, scale_factor):
+ assert scale_factor >= 1
+ assert masks.size(1) == kernel_size * kernel_size * group_size
+ assert masks.size(-1) == features.size(-1) * scale_factor
+ assert masks.size(-2) == features.size(-2) * scale_factor
+ assert features.size(1) % group_size == 0
+ assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1
+ ctx.kernel_size = kernel_size
+ ctx.group_size = group_size
+ ctx.scale_factor = scale_factor
+ ctx.feature_size = features.size()
+ ctx.mask_size = masks.size()
+
+ n, c, h, w = features.size()
+ output = features.new_zeros((n, c, h * scale_factor, w * scale_factor))
+ routput = features.new_zeros(output.size(), requires_grad=False)
+ rfeatures = features.new_zeros(features.size(), requires_grad=False)
+ rmasks = masks.new_zeros(masks.size(), requires_grad=False)
+ ext_module.carafe_forward(
+ features,
+ masks,
+ rfeatures,
+ routput,
+ rmasks,
+ output,
+ kernel_size=kernel_size,
+ group_size=group_size,
+ scale_factor=scale_factor)
+
+ if features.requires_grad or masks.requires_grad:
+ ctx.save_for_backward(features, masks, rfeatures)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ assert grad_output.is_cuda
+
+ features, masks, rfeatures = ctx.saved_tensors
+ kernel_size = ctx.kernel_size
+ group_size = ctx.group_size
+ scale_factor = ctx.scale_factor
+
+ rgrad_output = torch.zeros_like(grad_output, requires_grad=False)
+ rgrad_input_hs = torch.zeros_like(grad_output, requires_grad=False)
+ rgrad_input = torch.zeros_like(features, requires_grad=False)
+ rgrad_masks = torch.zeros_like(masks, requires_grad=False)
+ grad_input = torch.zeros_like(features, requires_grad=False)
+ grad_masks = torch.zeros_like(masks, requires_grad=False)
+ ext_module.carafe_backward(
+ grad_output.contiguous(),
+ rfeatures,
+ masks,
+ rgrad_output,
+ rgrad_input_hs,
+ rgrad_input,
+ rgrad_masks,
+ grad_input,
+ grad_masks,
+ kernel_size=kernel_size,
+ group_size=group_size,
+ scale_factor=scale_factor)
+ return grad_input, grad_masks, None, None, None
+
+
+carafe = CARAFEFunction.apply
+
+
+class CARAFE(Module):
+ """ CARAFE: Content-Aware ReAssembly of FEatures
+
+ Please refer to `CARAFE: Content-Aware ReAssembly of FEatures
+ `_ for more details.
+
+ Args:
+ kernel_size (int): reassemble kernel size
+ group_size (int): reassemble group size
+ scale_factor (int): upsample ratio
+
+ Returns:
+ upsampled feature map
+ """
+
+ def __init__(self, kernel_size, group_size, scale_factor):
+ super(CARAFE, self).__init__()
+
+ assert isinstance(kernel_size, int) and isinstance(
+ group_size, int) and isinstance(scale_factor, int)
+ self.kernel_size = kernel_size
+ self.group_size = group_size
+ self.scale_factor = scale_factor
+
+ def forward(self, features, masks):
+ return carafe(features, masks, self.kernel_size, self.group_size,
+ self.scale_factor)
+
+
+@UPSAMPLE_LAYERS.register_module(name='carafe')
+class CARAFEPack(nn.Module):
+ """A unified package of CARAFE upsampler that contains: 1) channel
+ compressor 2) content encoder 3) CARAFE op.
+
+ Official implementation of ICCV 2019 paper
+ `CARAFE: Content-Aware ReAssembly of FEatures
+ `_.
+
+ Args:
+ channels (int): input feature channels
+ scale_factor (int): upsample ratio
+ up_kernel (int): kernel size of CARAFE op
+ up_group (int): group size of CARAFE op
+ encoder_kernel (int): kernel size of content encoder
+ encoder_dilation (int): dilation of content encoder
+ compressed_channels (int): output channels of channels compressor
+
+ Returns:
+ upsampled feature map
+ """
+
+ def __init__(self,
+ channels,
+ scale_factor,
+ up_kernel=5,
+ up_group=1,
+ encoder_kernel=3,
+ encoder_dilation=1,
+ compressed_channels=64):
+ super(CARAFEPack, self).__init__()
+ self.channels = channels
+ self.scale_factor = scale_factor
+ self.up_kernel = up_kernel
+ self.up_group = up_group
+ self.encoder_kernel = encoder_kernel
+ self.encoder_dilation = encoder_dilation
+ self.compressed_channels = compressed_channels
+ self.channel_compressor = nn.Conv2d(channels, self.compressed_channels,
+ 1)
+ self.content_encoder = nn.Conv2d(
+ self.compressed_channels,
+ self.up_kernel * self.up_kernel * self.up_group *
+ self.scale_factor * self.scale_factor,
+ self.encoder_kernel,
+ padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2),
+ dilation=self.encoder_dilation,
+ groups=1)
+ self.init_weights()
+
+ def init_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ xavier_init(m, distribution='uniform')
+ normal_init(self.content_encoder, std=0.001)
+
+ def kernel_normalizer(self, mask):
+ mask = F.pixel_shuffle(mask, self.scale_factor)
+ n, mask_c, h, w = mask.size()
+ # use float division explicitly,
+ # to void inconsistency while exporting to onnx
+ mask_channel = int(mask_c / float(self.up_kernel**2))
+ mask = mask.view(n, mask_channel, -1, h, w)
+
+ mask = F.softmax(mask, dim=2, dtype=mask.dtype)
+ mask = mask.view(n, mask_c, h, w).contiguous()
+
+ return mask
+
+ def feature_reassemble(self, x, mask):
+ x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor)
+ return x
+
+ def forward(self, x):
+ compressed_x = self.channel_compressor(x)
+ mask = self.content_encoder(compressed_x)
+ mask = self.kernel_normalizer(mask)
+
+ x = self.feature_reassemble(x, mask)
+ return x
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/cc_attention.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/cc_attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..a42401eae5f73c5219b7db3bc941372be98292b9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/cc_attention.py
@@ -0,0 +1,97 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from mmcv.cnn import PLUGIN_LAYERS, Scale
+
+
+def NEG_INF_DIAG(n, device):
+ """Returns a diagonal matrix of size [n, n].
+
+ The diagonal are all "-inf". This is for avoiding calculating the
+ overlapped element in the Criss-Cross twice.
+ """
+ return torch.diag(torch.tensor(float('-inf')).to(device).repeat(n), 0)
+
+
+@PLUGIN_LAYERS.register_module()
+class CrissCrossAttention(nn.Module):
+ """Criss-Cross Attention Module.
+
+ .. note::
+ Before v1.3.13, we use a CUDA op. Since v1.3.13, we switch
+ to a pure PyTorch and equivalent implementation. For more
+ details, please refer to https://github.com/open-mmlab/mmcv/pull/1201.
+
+ Speed comparison for one forward pass
+
+ - Input size: [2,512,97,97]
+ - Device: 1 NVIDIA GeForce RTX 2080 Ti
+
+ +-----------------------+---------------+------------+---------------+
+ | |PyTorch version|CUDA version|Relative speed |
+ +=======================+===============+============+===============+
+ |with torch.no_grad() |0.00554402 s |0.0299619 s |5.4x |
+ +-----------------------+---------------+------------+---------------+
+ |no with torch.no_grad()|0.00562803 s |0.0301349 s |5.4x |
+ +-----------------------+---------------+------------+---------------+
+
+ Args:
+ in_channels (int): Channels of the input feature map.
+ """
+
+ def __init__(self, in_channels):
+ super().__init__()
+ self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
+ self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
+ self.value_conv = nn.Conv2d(in_channels, in_channels, 1)
+ self.gamma = Scale(0.)
+ self.in_channels = in_channels
+
+ def forward(self, x):
+ """forward function of Criss-Cross Attention.
+
+ Args:
+ x (torch.Tensor): Input feature with the shape of
+ (batch_size, in_channels, height, width).
+
+ Returns:
+ torch.Tensor: Output of the layer, with the shape of
+ (batch_size, in_channels, height, width)
+ """
+ B, C, H, W = x.size()
+ query = self.query_conv(x)
+ key = self.key_conv(x)
+ value = self.value_conv(x)
+ energy_H = torch.einsum('bchw,bciw->bwhi', query, key) + NEG_INF_DIAG(
+ H, query.device)
+ energy_H = energy_H.transpose(1, 2)
+ energy_W = torch.einsum('bchw,bchj->bhwj', query, key)
+ attn = F.softmax(
+ torch.cat([energy_H, energy_W], dim=-1), dim=-1) # [B,H,W,(H+W)]
+ out = torch.einsum('bciw,bhwi->bchw', value, attn[..., :H])
+ out += torch.einsum('bchj,bhwj->bchw', value, attn[..., H:])
+
+ out = self.gamma(out) + x
+ out = out.contiguous()
+
+ return out
+
+ def __repr__(self):
+ s = self.__class__.__name__
+ s += f'(in_channels={self.in_channels})'
+ return s
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/contour_expand.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/contour_expand.py
new file mode 100644
index 0000000000000000000000000000000000000000..af975e32cc073004673bfdb24e55817ce1dd0edb
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/contour_expand.py
@@ -0,0 +1,62 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import torch
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', ['contour_expand'])
+
+
+def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area,
+ kernel_num):
+ """Expand kernel contours so that foreground pixels are assigned into
+ instances.
+
+ Args:
+ kernel_mask (np.array or torch.Tensor): The instance kernel mask with
+ size hxw.
+ internal_kernel_label (np.array or torch.Tensor): The instance internal
+ kernel label with size hxw.
+ min_kernel_area (int): The minimum kernel area.
+ kernel_num (int): The instance kernel number.
+
+ Returns:
+ list: The instance index map with size hxw.
+ """
+ assert isinstance(kernel_mask, (torch.Tensor, np.ndarray))
+ assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray))
+ assert isinstance(min_kernel_area, int)
+ assert isinstance(kernel_num, int)
+
+ if isinstance(kernel_mask, np.ndarray):
+ kernel_mask = torch.from_numpy(kernel_mask)
+ if isinstance(internal_kernel_label, np.ndarray):
+ internal_kernel_label = torch.from_numpy(internal_kernel_label)
+
+ if torch.__version__ == 'parrots':
+ if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0:
+ label = []
+ else:
+ label = ext_module.contour_expand(
+ kernel_mask,
+ internal_kernel_label,
+ min_kernel_area=min_kernel_area,
+ kernel_num=kernel_num)
+ label = label.tolist()
+ else:
+ label = ext_module.contour_expand(kernel_mask, internal_kernel_label,
+ min_kernel_area, kernel_num)
+ return label
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/convex_iou.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/convex_iou.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b30c0e8e0c85cacea9ad7501096bc044e025a6e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/convex_iou.py
@@ -0,0 +1,59 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', ['convex_iou', 'convex_giou'])
+
+
+def convex_giou(pointsets, polygons):
+ """Return generalized intersection-over-union (Jaccard index) between point
+ sets and polygons.
+
+ Args:
+ pointsets (torch.Tensor): It has shape (N, 18),
+ indicating (x1, y1, x2, y2, ..., x9, y9) for each row.
+ polygons (torch.Tensor): It has shape (N, 8),
+ indicating (x1, y1, x2, y2, x3, y3, x4, y4) for each row.
+
+ Returns:
+ tuple[torch.Tensor, torch.Tensor]: The first element is the gious
+ between point sets and polygons with the shape (N,). The second
+ element is the gradient of point sets with the shape (N, 18).
+ """
+ output = pointsets.new_zeros((pointsets.size(0), 19))
+ ext_module.convex_giou(pointsets, polygons, output)
+ convex_giou = output[:, -1]
+ points_grad = output[:, 0:-1]
+ return convex_giou, points_grad
+
+
+def convex_iou(pointsets, polygons):
+ """Return intersection-over-union (Jaccard index) between point sets and
+ polygons.
+
+ Args:
+ pointsets (torch.Tensor): It has shape (N, 18),
+ indicating (x1, y1, x2, y2, ..., x9, y9) for each row.
+ polygons (torch.Tensor): It has shape (K, 8),
+ indicating (x1, y1, x2, y2, x3, y3, x4, y4) for each row.
+
+ Returns:
+ torch.Tensor: Return the ious between point sets and polygons with the
+ shape (N, K).
+ """
+ N, K = pointsets.size(0), polygons.size(0)
+ ious = pointsets.new_zeros((N, K))
+ ext_module.convex_iou(pointsets, polygons, ious)
+ return ious
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/corner_pool.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/corner_pool.py
new file mode 100644
index 0000000000000000000000000000000000000000..aecd96bb9916db806b1ebe172aaf1b7dcf5235f1
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/corner_pool.py
@@ -0,0 +1,176 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from torch import nn
+from torch.autograd import Function
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext('_ext', [
+ 'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward',
+ 'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward',
+ 'right_pool_forward', 'right_pool_backward'
+])
+
+_mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3}
+
+
+class TopPoolFunction(Function):
+
+ @staticmethod
+ def symbolic(g, input):
+ output = g.op(
+ 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top']))
+ return output
+
+ @staticmethod
+ def forward(ctx, input):
+ output = ext_module.top_pool_forward(input)
+ ctx.save_for_backward(input)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ input, = ctx.saved_tensors
+ output = ext_module.top_pool_backward(input, grad_output)
+ return output
+
+
+class BottomPoolFunction(Function):
+
+ @staticmethod
+ def symbolic(g, input):
+ output = g.op(
+ 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom']))
+ return output
+
+ @staticmethod
+ def forward(ctx, input):
+ output = ext_module.bottom_pool_forward(input)
+ ctx.save_for_backward(input)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ input, = ctx.saved_tensors
+ output = ext_module.bottom_pool_backward(input, grad_output)
+ return output
+
+
+class LeftPoolFunction(Function):
+
+ @staticmethod
+ def symbolic(g, input):
+ output = g.op(
+ 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left']))
+ return output
+
+ @staticmethod
+ def forward(ctx, input):
+ output = ext_module.left_pool_forward(input)
+ ctx.save_for_backward(input)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ input, = ctx.saved_tensors
+ output = ext_module.left_pool_backward(input, grad_output)
+ return output
+
+
+class RightPoolFunction(Function):
+
+ @staticmethod
+ def symbolic(g, input):
+ output = g.op(
+ 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right']))
+ return output
+
+ @staticmethod
+ def forward(ctx, input):
+ output = ext_module.right_pool_forward(input)
+ ctx.save_for_backward(input)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ input, = ctx.saved_tensors
+ output = ext_module.right_pool_backward(input, grad_output)
+ return output
+
+
+class CornerPool(nn.Module):
+ """Corner Pooling.
+
+ Corner Pooling is a new type of pooling layer that helps a
+ convolutional network better localize corners of bounding boxes.
+
+ Please refer to `CornerNet: Detecting Objects as Paired Keypoints
+ `_ for more details.
+
+ Code is modified from https://github.com/princeton-vl/CornerNet-Lite.
+
+ Args:
+ mode (str): Pooling orientation for the pooling layer
+
+ - 'bottom': Bottom Pooling
+ - 'left': Left Pooling
+ - 'right': Right Pooling
+ - 'top': Top Pooling
+
+ Returns:
+ Feature map after pooling.
+ """
+
+ pool_functions = {
+ 'bottom': BottomPoolFunction,
+ 'left': LeftPoolFunction,
+ 'right': RightPoolFunction,
+ 'top': TopPoolFunction,
+ }
+
+ cummax_dim_flip = {
+ 'bottom': (2, False),
+ 'left': (3, True),
+ 'right': (3, False),
+ 'top': (2, True),
+ }
+
+ def __init__(self, mode):
+ super(CornerPool, self).__init__()
+ assert mode in self.pool_functions
+ self.mode = mode
+ self.corner_pool = self.pool_functions[mode]
+
+ def forward(self, x):
+ if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0':
+ if torch.onnx.is_in_onnx_export():
+ assert torch.__version__ >= '1.7.0', \
+ 'When `cummax` serves as an intermediate component whose '\
+ 'outputs is used as inputs for another modules, it\'s '\
+ 'expected that pytorch version must be >= 1.7.0, '\
+ 'otherwise Error appears like: `RuntimeError: tuple '\
+ 'appears in op that does not forward tuples, unsupported '\
+ 'kind: prim::PythonOp`.'
+
+ dim, flip = self.cummax_dim_flip[self.mode]
+ if flip:
+ x = x.flip(dim)
+ pool_tensor, _ = torch.cummax(x, dim=dim)
+ if flip:
+ pool_tensor = pool_tensor.flip(dim)
+ return pool_tensor
+ else:
+ return self.corner_pool.apply(x)
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/correlation.py b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/correlation.py
new file mode 100644
index 0000000000000000000000000000000000000000..f520636431baaeff4ba752955434b3b2f0aaee31
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/correlation.py
@@ -0,0 +1,209 @@
+# encoding=utf-8
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from torch import Tensor, nn
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+from torch.nn.modules.utils import _pair
+
+from ..utils import ext_loader
+
+ext_module = ext_loader.load_ext(
+ '_ext', ['correlation_forward', 'correlation_backward'])
+
+
+class CorrelationFunction(Function):
+
+ @staticmethod
+ def forward(ctx,
+ input1,
+ input2,
+ kernel_size=1,
+ max_displacement=1,
+ stride=1,
+ padding=1,
+ dilation=1,
+ dilation_patch=1):
+
+ ctx.save_for_backward(input1, input2)
+
+ kH, kW = ctx.kernel_size = _pair(kernel_size)
+ patch_size = max_displacement * 2 + 1
+ ctx.patch_size = patch_size
+ dH, dW = ctx.stride = _pair(stride)
+ padH, padW = ctx.padding = _pair(padding)
+ dilationH, dilationW = ctx.dilation = _pair(dilation)
+ dilation_patchH, dilation_patchW = ctx.dilation_patch = _pair(
+ dilation_patch)
+
+ output_size = CorrelationFunction._output_size(ctx, input1)
+
+ output = input1.new_zeros(output_size)
+
+ ext_module.correlation_forward(
+ input1,
+ input2,
+ output,
+ kH=kH,
+ kW=kW,
+ patchH=patch_size,
+ patchW=patch_size,
+ padH=padH,
+ padW=padW,
+ dilationH=dilationH,
+ dilationW=dilationW,
+ dilation_patchH=dilation_patchH,
+ dilation_patchW=dilation_patchW,
+ dH=dH,
+ dW=dW)
+
+ return output
+
+ @staticmethod
+ @once_differentiable
+ def backward(ctx, grad_output):
+ input1, input2 = ctx.saved_tensors
+
+ kH, kW = ctx.kernel_size
+ patch_size = ctx.patch_size
+ padH, padW = ctx.padding
+ dilationH, dilationW = ctx.dilation
+ dilation_patchH, dilation_patchW = ctx.dilation_patch
+ dH, dW = ctx.stride
+ grad_input1 = torch.zeros_like(input1)
+ grad_input2 = torch.zeros_like(input2)
+
+ ext_module.correlation_backward(
+ grad_output,
+ input1,
+ input2,
+ grad_input1,
+ grad_input2,
+ kH=kH,
+ kW=kW,
+ patchH=patch_size,
+ patchW=patch_size,
+ padH=padH,
+ padW=padW,
+ dilationH=dilationH,
+ dilationW=dilationW,
+ dilation_patchH=dilation_patchH,
+ dilation_patchW=dilation_patchW,
+ dH=dH,
+ dW=dW)
+ return grad_input1, grad_input2, None, None, None, None, None, None
+
+ @staticmethod
+ def _output_size(ctx, input1):
+ iH, iW = input1.size(2), input1.size(3)
+ batch_size = input1.size(0)
+ kH, kW = ctx.kernel_size
+ patch_size = ctx.patch_size
+ dH, dW = ctx.stride
+ padH, padW = ctx.padding
+ dilationH, dilationW = ctx.dilation
+ dilatedKH = (kH - 1) * dilationH + 1
+ dilatedKW = (kW - 1) * dilationW + 1
+
+ oH = int((iH + 2 * padH - dilatedKH) / dH + 1)
+ oW = int((iW + 2 * padW - dilatedKW) / dW + 1)
+
+ output_size = (batch_size, patch_size, patch_size, oH, oW)
+ return output_size
+
+
+class Correlation(nn.Module):
+ r"""Correlation operator
+
+ This correlation operator works for optical flow correlation computation.
+
+ There are two batched tensors with shape :math:`(N, C, H, W)`,
+ and the correlation output's shape is :math:`(N, max\_displacement \times
+ 2 + 1, max\_displacement * 2 + 1, H_{out}, W_{out})`
+
+ where
+
+ .. math::
+ H_{out} = \left\lfloor\frac{H_{in} + 2 \times padding -
+ dilation \times (kernel\_size - 1) - 1}
+ {stride} + 1\right\rfloor
+
+ .. math::
+ W_{out} = \left\lfloor\frac{W_{in} + 2 \times padding - dilation
+ \times (kernel\_size - 1) - 1}
+ {stride} + 1\right\rfloor
+
+ the correlation item :math:`(N_i, dy, dx)` is formed by taking the sliding
+ window convolution between input1 and shifted input2,
+
+ .. math::
+ Corr(N_i, dx, dy) =
+ \sum_{c=0}^{C-1}
+ input1(N_i, c) \star
+ \mathcal{S}(input2(N_i, c), dy, dx)
+
+ where :math:`\star` is the valid 2d sliding window convolution operator,
+ and :math:`\mathcal{S}` means shifting the input features (auto-complete
+ zero marginal), and :math:`dx, dy` are shifting distance, :math:`dx, dy \in
+ [-max\_displacement \times dilation\_patch, max\_displacement \times
+ dilation\_patch]`.
+
+ Args:
+ kernel_size (int): The size of sliding window i.e. local neighborhood
+ representing the center points and involved in correlation
+ computation. Defaults to 1.
+ max_displacement (int): The radius for computing correlation volume,
+ but the actual working space can be dilated by dilation_patch.
+ Defaults to 1.
+ stride (int): The stride of the sliding blocks in the input spatial
+ dimensions. Defaults to 1.
+ padding (int): Zero padding added to all four sides of the input1.
+ Defaults to 0.
+ dilation (int): The spacing of local neighborhood that will involved
+ in correlation. Defaults to 1.
+ dilation_patch (int): The spacing between position need to compute
+ correlation. Defaults to 1.
+ """
+
+ def __init__(self,
+ kernel_size: int = 1,
+ max_displacement: int = 1,
+ stride: int = 1,
+ padding: int = 0,
+ dilation: int = 1,
+ dilation_patch: int = 1) -> None:
+ super().__init__()
+ self.kernel_size = kernel_size
+ self.max_displacement = max_displacement
+ self.stride = stride
+ self.padding = padding
+ self.dilation = dilation
+ self.dilation_patch = dilation_patch
+
+ def forward(self, input1: Tensor, input2: Tensor) -> Tensor:
+ return CorrelationFunction.apply(input1, input2, self.kernel_size,
+ self.max_displacement, self.stride,
+ self.padding, self.dilation,
+ self.dilation_patch)
+
+ def __repr__(self) -> str:
+ s = self.__class__.__name__
+ s += f'(kernel_size={self.kernel_size}, '
+ s += f'max_displacement={self.max_displacement}, '
+ s += f'stride={self.stride}, '
+ s += f'padding={self.padding}, '
+ s += f'dilation={self.dilation}, '
+ s += f'dilation_patch={self.dilation_patch})'
+ return s
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/README.md b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3bc02004017a0d607131b4de168b320c3beed23c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/README.md
@@ -0,0 +1,170 @@
+# Code Structure of CUDA operators
+
+This folder contains all non-python code for MMCV custom ops. Please follow the same architecture if you want to add new ops.
+
+## Directories Tree
+
+```folder
+.
+├── common
+│ ├── box_iou_rotated_utils.hpp
+│ ├── parrots_cpp_helper.hpp
+│ ├── parrots_cuda_helper.hpp
+│ ├── pytorch_cpp_helper.hpp
+│ ├── pytorch_cuda_helper.hpp
+│ ├── pytorch_device_registry.hpp
+│ └── cuda
+│ ├── common_cuda_helper.hpp
+│ ├── parrots_cudawarpfunction.cuh
+│ ├── ...
+│ └── ops_cuda_kernel.cuh
+├── onnxruntime
+│ ├── onnxruntime_register.h
+│ ├── onnxruntime_session_options_config_keys.h
+│ ├── ort_mmcv_utils.h
+│ ├── ...
+│ ├── onnx_ops.h
+│ └── cpu
+│ ├── onnxruntime_register.cpp
+│ ├── ...
+│ └── onnx_ops_impl.cpp
+├── parrots
+│ ├── ...
+│ ├── ops.cpp
+│ ├── ops_parrots.cpp
+│ └── ops_pytorch.h
+├── pytorch
+│ ├── info.cpp
+│ ├── pybind.cpp
+│ ├── ...
+│ ├── ops.cpp
+│ ├── cuda
+│ │ ├── ...
+│ │ └── ops_cuda.cu
+│ └── cpu
+│ ├── ...
+│ └── ops.cpp
+└── tensorrt
+ ├── trt_cuda_helper.cuh
+ ├── trt_plugin_helper.hpp
+ ├── trt_plugin.hpp
+ ├── trt_serialize.hpp
+ ├── ...
+ ├── trt_ops.hpp
+ └── plugins
+ ├── trt_cuda_helper.cu
+ ├── trt_plugin.cpp
+ ├── ...
+ ├── trt_ops.cpp
+ └── trt_ops_kernel.cu
+```
+
+## Components
+
+- `common`: This directory contains all tools and shared codes.
+ - `cuda`: The cuda kernels which can be shared by all backends. **HIP** kernel is also here since they have similar syntax.
+- `onnxruntime`: **ONNX Runtime** support for custom ops.
+ - `cpu`: CPU implementation of supported ops.
+- `parrots`: **Parrots** is a deep learning frame for model training and inference. Parrots custom ops are placed in this directory.
+- `pytorch`: **PyTorch** custom ops are supported by binding C++ to Python with **pybind11**. The ops implementation and binding codes are placed in this directory.
+ - `cuda`: This directory contains cuda kernel launchers, which feed memory pointers of tensor to the cuda kernel in `common/cuda`. The launchers provide c++ interface of cuda implementation of corresponding custom ops.
+ - `cpu`: This directory contain cpu implementations of corresponding custom ops.
+- `tensorrt`: **TensorRT** support for custom ops.
+ - `plugins`: This directory contains the implementation of the supported custom ops. Some ops might also use shared cuda kernel in `common/cuda`.
+
+## How to add new PyTorch ops?
+
+1. (Optional) Add shared kernel in `common` to support special hardware platform.
+
+ ```c++
+ // src/common/cuda/new_ops_cuda_kernel.cuh
+
+ template
+ __global__ void new_ops_forward_cuda_kernel(const T* input, T* output, ...) {
+ // forward here
+ }
+
+ ```
+
+ Add cuda kernel launcher in `pytorch/cuda`.
+
+ ```c++
+ // src/pytorch/cuda
+ #include
+
+ void NewOpsForwardCUDAKernelLauncher(Tensor input, Tensor output, ...){
+ // initialize
+ at::cuda::CUDAGuard device_guard(input.device());
+ cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+ ...
+ AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+ input.scalar_type(), "new_ops_forward_cuda_kernel", ([&] {
+ new_ops_forward_cuda_kernel
+ <<>>(
+ input.data_ptr(), output.data_ptr(),...);
+ }));
+ AT_CUDA_CHECK(cudaGetLastError());
+ }
+ ```
+
+2. Register implementation for different devices.
+
+ ```c++
+ // src/pytorch/cuda/cudabind.cpp
+ ...
+
+ Tensor new_ops_forward_cuda(Tensor input, Tensor output, ...){
+ // implement cuda forward here
+ // use `NewOpsForwardCUDAKernelLauncher` here
+ }
+ // declare interface here.
+ Tensor new_ops_forward_impl(Tensor input, Tensor output, ...);
+ // register the implementation for given device (CUDA here).
+ REGISTER_DEVICE_IMPL(new_ops_forward_impl, CUDA, new_ops_forward_cuda);
+ ```
+
+3. Add ops implementation in `pytorch` directory. Select different implementations according to device type.
+
+ ```c++
+ // src/pytorch/new_ops.cpp
+ Tensor new_ops_forward_impl(Tensor input, Tensor output, ...){
+ // dispatch the implementation according to the device type of input.
+ DISPATCH_DEVICE_IMPL(new_ops_forward_impl, input, output, ...);
+ }
+ ...
+
+ Tensor new_ops_forward(Tensor input, Tensor output, ...){
+ return new_ops_forward_impl(input, output, ...);
+ }
+ ```
+
+4. Binding the implementation in `pytorch/pybind.cpp`
+
+ ```c++
+ // src/pytorch/pybind.cpp
+
+ ...
+
+ Tensor new_ops_forward(Tensor input, Tensor output, ...);
+
+ ...
+
+ // bind with pybind11
+ m.def("new_ops_forward", &new_ops_forward, "new_ops_forward",
+ py::arg("input"), py::arg("output"), ...);
+
+ ...
+
+ ```
+
+5. Build MMCV again. Enjoy new ops in python
+
+ ```python
+ from ..utils import ext_loader
+ ext_module = ext_loader.load_ext('_ext', ['new_ops_forward'])
+
+ ...
+
+ ext_module.new_ops_forward(input, output, ...)
+
+ ```
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/box_iou_rotated_utils.hpp b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/box_iou_rotated_utils.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..243200e156f1384b625d6bac7fa4c68e533d9441
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/box_iou_rotated_utils.hpp
@@ -0,0 +1,347 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h
+#pragma once
+#include
+#include
+
+#ifdef __CUDACC__
+// Designates functions callable from the host (CPU) and the device (GPU)
+#define HOST_DEVICE __host__ __device__
+#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__
+#else
+#include
+#define HOST_DEVICE
+#define HOST_DEVICE_INLINE HOST_DEVICE inline
+#endif
+
+namespace {
+
+template
+struct RotatedBox {
+ T x_ctr, y_ctr, w, h, a;
+};
+
+template
+struct Point {
+ T x, y;
+ HOST_DEVICE_INLINE Point(const T& px = 0, const T& py = 0) : x(px), y(py) {}
+ HOST_DEVICE_INLINE Point operator+(const Point& p) const {
+ return Point(x + p.x, y + p.y);
+ }
+ HOST_DEVICE_INLINE Point& operator+=(const Point& p) {
+ x += p.x;
+ y += p.y;
+ return *this;
+ }
+ HOST_DEVICE_INLINE Point operator-(const Point& p) const {
+ return Point(x - p.x, y - p.y);
+ }
+ HOST_DEVICE_INLINE Point operator*(const T coeff) const {
+ return Point(x * coeff, y * coeff);
+ }
+};
+
+template
+HOST_DEVICE_INLINE T dot_2d(const Point& A, const Point& B) {
+ return A.x * B.x + A.y * B.y;
+}
+
+template
+HOST_DEVICE_INLINE T cross_2d(const Point& A, const Point& B) {
+ return A.x * B.y - B.x * A.y;
+}
+
+template
+HOST_DEVICE_INLINE void get_rotated_vertices(const RotatedBox& box,
+ Point (&pts)[4]) {
+ // M_PI / 180. == 0.01745329251
+ // double theta = box.a * 0.01745329251;
+ // MODIFIED
+ double theta = box.a;
+ T cosTheta2 = (T)cos(theta) * 0.5f;
+ T sinTheta2 = (T)sin(theta) * 0.5f;
+
+ // y: top --> down; x: left --> right
+ pts[0].x = box.x_ctr - sinTheta2 * box.h - cosTheta2 * box.w;
+ pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w;
+ pts[1].x = box.x_ctr + sinTheta2 * box.h - cosTheta2 * box.w;
+ pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w;
+ pts[2].x = 2 * box.x_ctr - pts[0].x;
+ pts[2].y = 2 * box.y_ctr - pts[0].y;
+ pts[3].x = 2 * box.x_ctr - pts[1].x;
+ pts[3].y = 2 * box.y_ctr - pts[1].y;
+}
+
+template
+HOST_DEVICE_INLINE int get_intersection_points(const Point (&pts1)[4],
+ const Point (&pts2)[4],
+ Point (&intersections)[24]) {
+ // Line vector
+ // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1]
+ Point vec1[4], vec2[4];
+ for (int i = 0; i < 4; i++) {
+ vec1[i] = pts1[(i + 1) % 4] - pts1[i];
+ vec2[i] = pts2[(i + 1) % 4] - pts2[i];
+ }
+
+ // Line test - test all line combos for intersection
+ int num = 0; // number of intersections
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ // Solve for 2x2 Ax=b
+ T det = cross_2d(vec2[j], vec1[i]);
+
+ // This takes care of parallel lines
+ if (fabs(det) <= 1e-14) {
+ continue;
+ }
+
+ auto vec12 = pts2[j] - pts1[i];
+
+ T t1 = cross_2d(vec2[j], vec12) / det;
+ T t2 = cross_2d(vec1[i], vec12) / det;
+
+ if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) {
+ intersections[num++] = pts1[i] + vec1[i] * t1;
+ }
+ }
+ }
+
+ // Check for vertices of rect1 inside rect2
+ {
+ const auto& AB = vec2[0];
+ const auto& DA = vec2[3];
+ auto ABdotAB = dot_2d(AB, AB);
+ auto ADdotAD = dot_2d(DA, DA);
+ for (int i = 0; i < 4; i++) {
+ // assume ABCD is the rectangle, and P is the point to be judged
+ // P is inside ABCD iff. P's projection on AB lies within AB
+ // and P's projection on AD lies within AD
+
+ auto AP = pts1[i] - pts2[0];
+
+ auto APdotAB = dot_2d(AP, AB);
+ auto APdotAD = -dot_2d(AP, DA);
+
+ if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
+ (APdotAD <= ADdotAD)) {
+ intersections[num++] = pts1[i];
+ }
+ }
+ }
+
+ // Reverse the check - check for vertices of rect2 inside rect1
+ {
+ const auto& AB = vec1[0];
+ const auto& DA = vec1[3];
+ auto ABdotAB = dot_2d(AB, AB);
+ auto ADdotAD = dot_2d(DA, DA);
+ for (int i = 0; i < 4; i++) {
+ auto AP = pts2[i] - pts1[0];
+
+ auto APdotAB = dot_2d(AP, AB);
+ auto APdotAD = -dot_2d(AP, DA);
+
+ if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) &&
+ (APdotAD <= ADdotAD)) {
+ intersections[num++] = pts2[i];
+ }
+ }
+ }
+
+ return num;
+}
+
+template
+HOST_DEVICE_INLINE int convex_hull_graham(const Point (&p)[24],
+ const int& num_in, Point (&q)[24],
+ bool shift_to_zero = false) {
+ assert(num_in >= 2);
+
+ // Step 1:
+ // Find point with minimum y
+ // if more than 1 points have the same minimum y,
+ // pick the one with the minimum x.
+ int t = 0;
+ for (int i = 1; i < num_in; i++) {
+ if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) {
+ t = i;
+ }
+ }
+ auto& start = p[t]; // starting point
+
+ // Step 2:
+ // Subtract starting point from every points (for sorting in the next step)
+ for (int i = 0; i < num_in; i++) {
+ q[i] = p[i] - start;
+ }
+
+ // Swap the starting point to position 0
+ auto tmp = q[0];
+ q[0] = q[t];
+ q[t] = tmp;
+
+ // Step 3:
+ // Sort point 1 ~ num_in according to their relative cross-product values
+ // (essentially sorting according to angles)
+ // If the angles are the same, sort according to their distance to origin
+ T dist[24];
+ for (int i = 0; i < num_in; i++) {
+ dist[i] = dot_2d(q[i], q[i]);
+ }
+
+#ifdef __CUDACC__
+ // CUDA version
+ // In the future, we can potentially use thrust
+ // for sorting here to improve speed (though not guaranteed)
+ for (int i = 1; i < num_in - 1; i++) {
+ for (int j = i + 1; j < num_in; j++) {
+ T crossProduct = cross_2d(q[i], q[j]);
+ if ((crossProduct < -1e-6) ||
+ (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) {
+ auto q_tmp = q[i];
+ q[i] = q[j];
+ q[j] = q_tmp;
+ auto dist_tmp = dist[i];
+ dist[i] = dist[j];
+ dist[j] = dist_tmp;
+ }
+ }
+ }
+#else
+ // CPU version
+ std::sort(q + 1, q + num_in,
+ [](const Point& A, const Point& B) -> bool {
+ T temp = cross_2d(A, B);
+ if (fabs(temp) < 1e-6) {
+ return dot_2d(A, A) < dot_2d(B, B);
+ } else {
+ return temp > 0;
+ }
+ });
+ // compute distance to origin after sort, since the points are now different.
+ for (int i = 0; i < num_in; i++) {
+ dist[i] = dot_2d(q[i], q[i]);
+ }
+#endif
+
+ // Step 4:
+ // Make sure there are at least 2 points (that don't overlap with each other)
+ // in the stack
+ int k; // index of the non-overlapped second point
+ for (k = 1; k < num_in; k++) {
+ if (dist[k] > 1e-8) {
+ break;
+ }
+ }
+ if (k == num_in) {
+ // We reach the end, which means the convex hull is just one point
+ q[0] = p[t];
+ return 1;
+ }
+ q[1] = q[k];
+ int m = 2; // 2 points in the stack
+ // Step 5:
+ // Finally we can start the scanning process.
+ // When a non-convex relationship between the 3 points is found
+ // (either concave shape or duplicated points),
+ // we pop the previous point from the stack
+ // until the 3-point relationship is convex again, or
+ // until the stack only contains two points
+ for (int i = k + 1; i < num_in; i++) {
+ while (m > 1 && cross_2d(q[i] - q[m - 2], q[m - 1] - q[m - 2]) >= 0) {
+ m--;
+ }
+ q[m++] = q[i];
+ }
+
+ // Step 6 (Optional):
+ // In general sense we need the original coordinates, so we
+ // need to shift the points back (reverting Step 2)
+ // But if we're only interested in getting the area/perimeter of the shape
+ // We can simply return.
+ if (!shift_to_zero) {
+ for (int i = 0; i < m; i++) {
+ q[i] += start;
+ }
+ }
+
+ return m;
+}
+
+template
+HOST_DEVICE_INLINE T polygon_area(const Point (&q)[24], const int& m) {
+ if (m <= 2) {
+ return 0;
+ }
+
+ T area = 0;
+ for (int i = 1; i < m - 1; i++) {
+ area += fabs(cross_2d(q[i] - q[0], q[i + 1] - q[0]));
+ }
+
+ return area / 2.0;
+}
+
+template
+HOST_DEVICE_INLINE T rotated_boxes_intersection(const RotatedBox& box1,
+ const RotatedBox& box2) {
+ // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned
+ // from rotated_rect_intersection_pts
+ Point intersectPts[24], orderedPts[24];
+
+ Point pts1[4];
+ Point pts2[4];
+ get_rotated_vertices(box1, pts1);
+ get_rotated_vertices(box2, pts2);
+
+ int num = get_intersection_points(pts1, pts2, intersectPts);
+
+ if (num <= 2) {
+ return 0.0;
+ }
+
+ // Convex Hull to order the intersection points in clockwise order and find
+ // the contour area.
+ int num_convex = convex_hull_graham(intersectPts, num, orderedPts, true);
+ return polygon_area(orderedPts, num_convex);
+}
+
+} // namespace
+
+template
+HOST_DEVICE_INLINE T single_box_iou_rotated(T const* const box1_raw,
+ T const* const box2_raw,
+ const int mode_flag) {
+ // shift center to the middle point to achieve higher precision in result
+ RotatedBox box1, box2;
+ auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0;
+ auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0;
+ box1.x_ctr = box1_raw[0] - center_shift_x;
+ box1.y_ctr = box1_raw[1] - center_shift_y;
+ box1.w = box1_raw[2];
+ box1.h = box1_raw[3];
+ box1.a = box1_raw[4];
+ box2.x_ctr = box2_raw[0] - center_shift_x;
+ box2.y_ctr = box2_raw[1] - center_shift_y;
+ box2.w = box2_raw[2];
+ box2.h = box2_raw[3];
+ box2.a = box2_raw[4];
+
+ const T area1 = box1.w * box1.h;
+ const T area2 = box2.w * box2.h;
+ if (area1 < 1e-14 || area2 < 1e-14) {
+ return 0.f;
+ }
+
+ const T intersection = rotated_boxes_intersection(box1, box2);
+ T baseS = 1.0;
+ if (mode_flag == 0) {
+ baseS = (area1 + area2 - intersection);
+ } else if (mode_flag == 1) {
+ baseS = area1;
+ }
+ const T iou = intersection / baseS;
+ return iou;
+}
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..0f7454032bc12d4304b923709b09d335daa3cd07
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh
@@ -0,0 +1,72 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License..
+// Modified from
+// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/cuda/ActiveRotatingFilter_cuda.cu
+#ifndef ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH
+#define ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template
+__global__ void active_rotated_filter_forward_cuda_kernel(
+ const int nthreads, const scalar_t* weight_data, const int* indices_data,
+ const int num_input_planes, const int num_output_planes,
+ const int num_orientations, const int num_rotations, const int nEntry,
+ scalar_t* output_data) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ int l = index % nEntry;
+ int j = (index / nEntry) % num_input_planes;
+ int i = index / nEntry / num_input_planes;
+ int k;
+ scalar_t val = *(weight_data + index);
+ for (k = 0; k < num_rotations; k++) {
+ int idx = (int)(*(indices_data + l * num_rotations + k)) - 1;
+ scalar_t* target = output_data +
+ i * (num_rotations * num_input_planes * nEntry) +
+ k * (num_input_planes * nEntry) + j * (nEntry) + idx;
+ *target = val;
+ }
+ }
+}
+
+template
+__global__ void active_rotated_filter_backward_cuda_kernel(
+ const int nthreads, const scalar_t* gradWeight_data,
+ const int* indices_data, const int num_input_planes,
+ const int num_output_planes, const int num_orientations,
+ const int num_rotations, const int nEntry, scalar_t* weight_data) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ int l = index % nEntry;
+ int j = (index / nEntry) % num_input_planes;
+ int i = index / nEntry / num_input_planes;
+ int k;
+ scalar_t* val = weight_data + index;
+ *val = 0;
+ scalar_t tmp = 0;
+ for (k = 0; k < num_rotations; k++) {
+ int idx = (int)(*(indices_data + l * num_rotations + k)) - 1;
+ scalar_t target =
+ *(gradWeight_data + i * (num_rotations * num_input_planes * nEntry) +
+ k * (num_input_planes * nEntry) + j * (nEntry) + idx);
+ tmp = tmp + target;
+ }
+ *val = tmp;
+ }
+}
+#endif // ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..2770fc4db71ddafc555778abfb7618c459f91110
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh
@@ -0,0 +1,129 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH
+#define ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+// input: points(B,N0,M,O), centers(B,N0,M,O), scores(B,N1,K,M), knn_idx(B,N1,K)
+// output: fout(B,O,N)
+// algo: fout(b,i,k,j) = s(b,i,k,m)*p(b,c(i),k,m,j) = s(b,i,k,m)*p(b,i(k),m,j)
+// i(k) = idx(b,i,k)
+// sum: fout(b,i,j) = fout(b,i,j) + s(b,i,k,m)*p(b,i,k,m,j)
+// avg: fout(b,i,j) = sum(fout(b,i,k,j)) / k
+// max: fout(b,i,j) = max(fout(b,i,k,j), sum(s(b,i,k,m)*p(b,i,k,m,j)))
+
+template
+__global__ void assign_score_withk_forward_cuda_kernel(
+ const int B, const int N0, const int N1, const int M, const int K,
+ const int O, const int aggregate, const T* points, const T* centers,
+ const T* scores, const int64_t* knn_idx, T* output) {
+ // ----- parallel loop for B, N1, K and O ---------
+ CUDA_1D_KERNEL_LOOP(i, B * O * N1 * K) {
+ // ------- loop for M ----------
+ const int b = (int)(i / (O * N1 * K));
+ const int o = (int)(i % (O * N1 * K) / (N1 * K));
+ const int n = (int)(i % (N1 * K) / K);
+ const int k = (int)(i % K);
+ const int cn = (int)knn_idx[b * K * N1 + n * K +
+ 0]; // The first neighbor is the center point
+ const int kn = (int)knn_idx[b * K * N1 + n * K + k];
+ if (kn >= N0 ||
+ kn < 0) { // if index overflows, it is out of the neighborhood range
+ return;
+ }
+ assert(b < B);
+ assert(kn < N0);
+ assert(cn < N0);
+ assert(o < O);
+ assert(n < N1);
+ const int out_idx = b * N1 * O * K + o * N1 * K + n * K + k;
+ T val = output[out_idx];
+ for (int m = 0; m < M; m++) {
+ val += points[b * N0 * M * O + kn * M * O + m * O + o] *
+ scores[b * N1 * K * M + n * K * M + k * M + m] -
+ centers[b * N0 * M * O + cn * M * O + m * O + o] *
+ scores[b * N1 * K * M + n * K * M + k * M + m];
+ }
+ output[out_idx] = val;
+ }
+}
+
+template
+__global__ void assign_score_withk_points_backward_cuda_kernel(
+ const int B, const int N0, const int N, const int M, const int K,
+ const int O, const int aggregate, const T* grad_out, const T* scores,
+ const int64_t* knn_idx, T* grad_points, T* grad_centers) {
+ // ----- parallel loop for B, M, O ---------
+ CUDA_1D_KERNEL_LOOP(i, B * M * O) {
+ int b = (int)(i / (M * O));
+ int m = (int)(i % (M * O) / O);
+ int o = (int)(i % O);
+
+ // ----- loop for N,K ---------
+ for (int n = 0; n < N; n++) {
+ for (int k = 0; k < K; k++) {
+ int kn = knn_idx[b * N * K + n * K + k];
+ int cn = knn_idx[b * N * K + n * K + 0];
+ if (kn >= N0 || kn < 0) { // if index overflows, it is out of the
+ // neighborhood range
+ continue;
+ }
+ atomicAdd(grad_points + b * N0 * M * O + kn * M * O + m * O + o,
+ scores[b * N * K * M + n * K * M + k * M + m] *
+ grad_out[b * O * N * K + o * N * K + n * K + k]);
+ atomicAdd(grad_centers + b * N0 * M * O + cn * M * O + m * O + o,
+ -scores[b * N * K * M + n * K * M + k * M + m] *
+ grad_out[b * O * N * K + o * N * K + n * K + k]);
+ }
+ }
+ }
+}
+
+template
+__global__ void assign_score_withk_scores_backward_cuda_kernel(
+ const int B, const int N0, const int N, const int M, const int K,
+ const int O, const int aggregate, const T* grad_out, const T* points,
+ const T* centers, const int64_t* knn_idx, T* grad_scores) {
+ // ----- parallel loop for B, N, K, M ---------
+ CUDA_1D_KERNEL_LOOP(i, B * N * K * M) {
+ const int b = (int)(i / (N * M * K));
+ const int n = (int)(i % (N * M * K) / M / K);
+ const int k = (int)(i % (M * K) / M);
+ const int m = (int)(i % M);
+ const int cn = knn_idx[b * N * K + n * K + 0];
+ const int kn = knn_idx[b * N * K + n * K + k];
+ if (kn >= N0 ||
+ kn < 0) { // if index overflows, it is out of the neighborhood range
+ return;
+ }
+
+ // -------------- loop for O ------------------------
+ const int out_idx = b * N * K * M + n * K * M + k * M + m;
+ T val = grad_scores[out_idx];
+ for (int o = 0; o < O; o++) {
+ val += (points[b * N0 * M * O + kn * M * O + m * O + o] -
+ centers[b * N0 * M * O + cn * M * O + m * O + o]) *
+ grad_out[b * O * N * K + o * N * K + n * K + k];
+ }
+ grad_scores[out_idx] = val;
+ }
+}
+
+#endif // ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..d003d46549f0f833c8df14b092d56fbfc837b3a2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh
@@ -0,0 +1,71 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// Modified from
+// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query_gpu.cu
+#ifndef BALL_QUERY_CUDA_KERNEL_CUH
+#define BALL_QUERY_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template
+__global__ void ball_query_forward_cuda_kernel(int b, int n, int m,
+ float min_radius,
+ float max_radius, int nsample,
+ const T* new_xyz, const T* xyz,
+ int* idx) {
+ // new_xyz: (B, M, 3)
+ // xyz: (B, N, 3)
+ // output:
+ // idx: (B, M, nsample)
+ int bs_idx = blockIdx.y;
+ CUDA_1D_KERNEL_LOOP(pt_idx, m) {
+ if (bs_idx >= b) return;
+
+ new_xyz += bs_idx * m * 3 + pt_idx * 3;
+ xyz += bs_idx * n * 3;
+ idx += bs_idx * m * nsample + pt_idx * nsample;
+
+ float max_radius2 = max_radius * max_radius;
+ float min_radius2 = min_radius * min_radius;
+ T new_x = new_xyz[0];
+ T new_y = new_xyz[1];
+ T new_z = new_xyz[2];
+
+ int cnt = 0;
+ for (int k = 0; k < n; ++k) {
+ T x = xyz[k * 3 + 0];
+ T y = xyz[k * 3 + 1];
+ T z = xyz[k * 3 + 2];
+ T d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
+ (new_z - z) * (new_z - z);
+ if (d2 == 0 || (d2 >= min_radius2 && d2 < max_radius2)) {
+ if (cnt == 0) {
+ for (int l = 0; l < nsample; ++l) {
+ idx[l] = k;
+ }
+ }
+ idx[cnt] = k;
+ ++cnt;
+ if (cnt >= nsample) break;
+ }
+ }
+ }
+}
+
+#endif // BALL_QUERY_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..55069b95b9bb23ee1dc2e71b192c4e1bc0b80b86
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh
@@ -0,0 +1,97 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef BBOX_OVERLAPS_CUDA_KERNEL_CUH
+#define BBOX_OVERLAPS_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template
+__global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2,
+ T* ious, const int num_bbox1,
+ const int num_bbox2, const int mode,
+ const bool aligned,
+ const int offset) {
+ if (aligned) {
+ CUDA_1D_KERNEL_LOOP(index, num_bbox1) {
+ int b1 = index;
+ int b2 = index;
+
+ int base1 = b1 * 4;
+ T b1_x1 = bbox1[base1];
+ T b1_y1 = bbox1[base1 + 1];
+ T b1_x2 = bbox1[base1 + 2];
+ T b1_y2 = bbox1[base1 + 3];
+ T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset);
+
+ int base2 = b2 * 4;
+ T b2_x1 = bbox2[base2];
+ T b2_y1 = bbox2[base2 + 1];
+ T b2_x2 = bbox2[base2 + 2];
+ T b2_y2 = bbox2[base2 + 3];
+ T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset);
+
+ T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2);
+ T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2);
+ T width = fmaxf(right - left + offset, 0.f);
+ T height = fmaxf(bottom - top + offset, 0.f);
+ T interS = width * height;
+ T baseS = 1.0;
+ if (mode == 0) {
+ baseS = fmaxf(b1_area + b2_area - interS, T(offset));
+ } else if (mode == 1) {
+ baseS = fmaxf(b1_area, T(offset));
+ }
+ ious[index] = interS / baseS;
+ }
+ } else {
+ CUDA_1D_KERNEL_LOOP(index, num_bbox1 * num_bbox2) {
+ int b1 = index / num_bbox2;
+ int b2 = index % num_bbox2;
+
+ int base1 = b1 * 4;
+ T b1_x1 = bbox1[base1];
+ T b1_y1 = bbox1[base1 + 1];
+ T b1_x2 = bbox1[base1 + 2];
+ T b1_y2 = bbox1[base1 + 3];
+ T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset);
+
+ int base2 = b2 * 4;
+ T b2_x1 = bbox2[base2];
+ T b2_y1 = bbox2[base2 + 1];
+ T b2_x2 = bbox2[base2 + 2];
+ T b2_y2 = bbox2[base2 + 3];
+ T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset);
+
+ T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2);
+ T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2);
+ T width = fmaxf(right - left + offset, 0.f);
+ T height = fmaxf(bottom - top + offset, 0.f);
+ T interS = width * height;
+ T baseS = 1.0;
+ if (mode == 0) {
+ baseS = fmaxf(b1_area + b2_area - interS, T(offset));
+ } else if (mode == 1) {
+ baseS = fmaxf(b1_area, T(offset));
+ }
+ ious[index] = interS / baseS;
+ }
+ }
+}
+
+#endif // BBOX_OVERLAPS_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/border_align_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/border_align_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..e89278533c851829177f86d91c68ff3e5452f06d
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/border_align_cuda_kernel.cuh
@@ -0,0 +1,213 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// modified from
+// https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/csrc/border_align/border_align_kernel.cu.
+// the main difference: (1) use `argmax_idx` for fast computing of gradient
+// during the backward. (2) `wh` is directly computed by `boxes`, rather than
+// passing it as argument to forward or backward functions.
+
+#ifndef BORDER_ALIGN_CUDA_KERNEL_CUH
+#define BORDER_ALIGN_CUDA_KERNEL_CUH
+
+#include
+#ifdef MMCV_WITH_TRT
+#include "common_cuda_helper.hpp"
+#else // MMCV_WITH_TRT
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else // MMCV_USE_PARROTS
+#include "pytorch_cuda_helper.hpp"
+#endif // MMCV_USE_PARROTS
+#endif // MMCV_WITH_TRT
+
+enum BorderMode { Top = 0, Left = 1, Bottom = 2, Right = 3 };
+
+/*** Forward ***/
+template
+__global__ void border_align_forward_cuda_kernel(
+ const int nthreads, const T* input, const T* boxes, T* output,
+ int* argmax_idx, const int channels, const int box_size, const int height,
+ const int width, const int pool_size) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (batch_idx, c_idx, box_idx) is an element paralleled for computing
+ // output, and `extreme_idx` is in range [0,3]
+ int batch_idx, c_idx, box_idx, extreme_idx, maxidx, *offset_argmax_idx;
+ const T *offset_box, *offset_input, *offset_box_x;
+ T *offset_output, box_width, box_height, stride, x_stride, y_stride, x, y,
+ val, maxval;
+
+ extreme_idx = threadIdx.y;
+ // shape (N, C, box_size, 4) for output
+ batch_idx = index / channels / box_size;
+ // shape (N, box_size, 4) for boxes
+ box_idx = index % box_size + batch_idx * box_size;
+ c_idx = (index / box_size) % channels;
+
+ offset_box = boxes + box_idx * 4;
+ box_width = *(offset_box + 2) - *offset_box;
+ box_height = *(offset_box + 3) - *(offset_box + 1);
+ offset_output = output + index * 4 + extreme_idx;
+ offset_argmax_idx = argmax_idx + index * 4 + extreme_idx;
+ // shape (N, 4C, h, w) for input.
+ // [0,C) for top feature, [C,2C) for left feature,
+ // [2C,3C) for bottom feature, [3C,4C) for right feature
+ offset_input =
+ input + (batch_idx * channels * 4 + extreme_idx * channels + c_idx) *
+ height * width;
+
+ // extreme_idx in [0,1] -> offset_box_x indexed at x1
+ // extreme_idx in [2,3] -> offset_box_x indexed at x2
+ offset_box_x = offset_box + extreme_idx / 2 * 2;
+
+ // (x1,y1) or (x2,y2) for (x,y)
+ x = *offset_box_x;
+ y = *(offset_box_x + 1);
+
+ switch (extreme_idx) {
+ // top
+ case BorderMode::Top:
+ stride = box_width / pool_size;
+ x_stride = stride;
+ y_stride = 0;
+ break;
+ // left
+ case BorderMode::Left:
+ stride = box_height / pool_size;
+ x_stride = 0;
+ y_stride = stride;
+ break;
+ // bottom
+ case BorderMode::Bottom:
+ stride = box_width / pool_size;
+ x_stride = -stride;
+ y_stride = 0;
+ break;
+ // right
+ case BorderMode::Right:
+ stride = box_height / pool_size;
+ x_stride = 0;
+ y_stride = -stride;
+ break;
+ }
+
+ // initialize maxval and maxidx with the start position (e.g. (x1,y1) or
+ // (x2,y2))
+ maxval = bilinear_interpolate(offset_input, height, width, y, x, index);
+ maxidx = 0;
+
+ // do max_pool along the border
+ for (int i = 1; i <= pool_size; i++) {
+ x += x_stride;
+ y += y_stride;
+ val = bilinear_interpolate(offset_input, height, width, y, x, index);
+ if (val > maxval) {
+ maxval = val;
+ maxidx = i;
+ }
+ }
+
+ // update output and argmax_idx
+ *offset_output = maxval;
+ *offset_argmax_idx = maxidx;
+ }
+}
+
+/*** Backward ***/
+template
+__global__ void border_align_backward_cuda_kernel(
+ const int nthreads, const T* grad_output, const T* boxes,
+ const int* argmax_idx, T* grad_input, const int channels,
+ const int box_size, const int height, const int width,
+ const int pool_size) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (batch_idx, c_idx, box_idx) is an element paralleled for computing
+ // output, and `extreme_idx` is in range [0,3]
+ int batch_idx, c_idx, box_idx, extreme_idx;
+ const int* offset_argmax_idx;
+ const T *offset_grad_output, *offset_box, *offset_box_x;
+ T *offset_grad_input, box_width, box_height, stride, x_stride, y_stride, x,
+ y;
+
+ extreme_idx = threadIdx.y;
+ batch_idx = index / channels / box_size;
+ box_idx = index % box_size + batch_idx * box_size;
+ c_idx = (index / box_size) % channels;
+
+ offset_box = boxes + box_idx * 4;
+ box_width = *(offset_box + 2) - *offset_box;
+ box_height = *(offset_box + 3) - *(offset_box + 1);
+ offset_grad_output = grad_output + index * 4 + extreme_idx;
+ offset_argmax_idx = argmax_idx + index * 4 + extreme_idx;
+ // [0,C) for top feature grad, [C,2C) for left feature grad,
+ // [2C,3C) for bottom feature grad, [3C,4C) for right feature grad
+ offset_grad_input = grad_input + (batch_idx * channels * 4 +
+ extreme_idx * channels + c_idx) *
+ height * width;
+
+ // extreme_idx in [0,1] -> offset_box_x indexed at x1
+ // extreme_idx in [2,3] -> offset_box_x indexed at x2
+ offset_box_x = offset_box + extreme_idx / 2 * 2;
+
+ switch (extreme_idx) {
+ // top
+ case BorderMode::Top:
+ stride = box_width / pool_size;
+ x_stride = stride;
+ y_stride = 0;
+ break;
+ // left
+ case BorderMode::Left:
+ stride = box_height / pool_size;
+ x_stride = 0;
+ y_stride = stride;
+ break;
+ // bottom
+ case BorderMode::Bottom:
+ stride = box_width / pool_size;
+ x_stride = -stride;
+ y_stride = 0;
+ break;
+ // right
+ case BorderMode::Right:
+ stride = box_height / pool_size;
+ x_stride = 0;
+ y_stride = -stride;
+ break;
+ }
+
+ // get position (x,y) which has maximum value during forward
+ x = *offset_box_x;
+ y = *(offset_box_x + 1);
+ x += x_stride * (T)(*offset_argmax_idx);
+ y += y_stride * (T)(*offset_argmax_idx);
+
+ T w1, w2, w3, w4;
+ int x_low, x_high, y_low, y_high;
+ bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, x_low,
+ x_high, y_low, y_high, index);
+
+ // update grad_output
+ atomicAdd(offset_grad_input + y_low * width + x_low,
+ *offset_grad_output * w1);
+ atomicAdd(offset_grad_input + y_low * width + x_high,
+ *offset_grad_output * w2);
+ atomicAdd(offset_grad_input + y_high * width + x_low,
+ *offset_grad_output * w3);
+ atomicAdd(offset_grad_input + y_high * width + x_high,
+ *offset_grad_output * w4);
+ }
+}
+
+#endif // BORDER_ALIGN_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/box_iou_rotated_cuda.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/box_iou_rotated_cuda.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..abd47cd85437804310886de057b5a839a49481b2
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/box_iou_rotated_cuda.cuh
@@ -0,0 +1,81 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu
+#ifndef BOX_IOU_ROTATED_CUDA_CUH
+#define BOX_IOU_ROTATED_CUDA_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+#include "box_iou_rotated_utils.hpp"
+
+// 2D block with 32 * 16 = 512 threads per block
+const int BLOCK_DIM_X = 32;
+const int BLOCK_DIM_Y = 16;
+
+inline int divideUP(const int x, const int y) { return (((x) + (y)-1) / (y)); }
+
+template
+__global__ void box_iou_rotated_cuda_kernel(
+ const int n_boxes1, const int n_boxes2, const T* dev_boxes1,
+ const T* dev_boxes2, T* dev_ious, const int mode_flag, const bool aligned) {
+ if (aligned) {
+ CUDA_1D_KERNEL_LOOP(index, n_boxes1) {
+ int b1 = index;
+ int b2 = index;
+
+ int base1 = b1 * 5;
+
+ float block_boxes1[5];
+ float block_boxes2[5];
+
+ block_boxes1[0] = dev_boxes1[base1 + 0];
+ block_boxes1[1] = dev_boxes1[base1 + 1];
+ block_boxes1[2] = dev_boxes1[base1 + 2];
+ block_boxes1[3] = dev_boxes1[base1 + 3];
+ block_boxes1[4] = dev_boxes1[base1 + 4];
+
+ int base2 = b2 * 5;
+
+ block_boxes2[0] = dev_boxes2[base2 + 0];
+ block_boxes2[1] = dev_boxes2[base2 + 1];
+ block_boxes2[2] = dev_boxes2[base2 + 2];
+ block_boxes2[3] = dev_boxes2[base2 + 3];
+ block_boxes2[4] = dev_boxes2[base2 + 4];
+
+ dev_ious[index] =
+ single_box_iou_rotated(block_boxes1, block_boxes2, mode_flag);
+ }
+ } else {
+ CUDA_1D_KERNEL_LOOP(index, n_boxes1 * n_boxes2) {
+ int b1 = index / n_boxes2;
+ int b2 = index % n_boxes2;
+
+ int base1 = b1 * 5;
+
+ float block_boxes1[5];
+ float block_boxes2[5];
+
+ block_boxes1[0] = dev_boxes1[base1 + 0];
+ block_boxes1[1] = dev_boxes1[base1 + 1];
+ block_boxes1[2] = dev_boxes1[base1 + 2];
+ block_boxes1[3] = dev_boxes1[base1 + 3];
+ block_boxes1[4] = dev_boxes1[base1 + 4];
+
+ int base2 = b2 * 5;
+
+ block_boxes2[0] = dev_boxes2[base2 + 0];
+ block_boxes2[1] = dev_boxes2[base2 + 1];
+ block_boxes2[2] = dev_boxes2[base2 + 2];
+ block_boxes2[3] = dev_boxes2[base2 + 3];
+ block_boxes2[4] = dev_boxes2[base2 + 4];
+
+ dev_ious[index] =
+ single_box_iou_rotated(block_boxes1, block_boxes2, mode_flag);
+ }
+ }
+}
+
+#endif
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/carafe_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..bcfceb0d68cf7c99e2e7a845b18f095fe37d8271
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
@@ -0,0 +1,345 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef CARAFE_CUDA_KERNEL_CUH
+#define CARAFE_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+#ifdef HIP_DIFF
+#define WARP_SIZE 64
+#else
+#define WARP_SIZE 32
+#endif
+#define THREADS_PER_PIXEL 32
+#define MAX_SHARED_MEMORY 49152
+#define MAX_SHARED_SCALAR_T 6144 // 49152 / 8 = 6144
+#define MAXIMIZE_KERNEL_SIZE true
+#define kTileDim 32
+#define kBlockRows 8
+#define FULL_MASK 0xffffffff
+
+inline int divideUP(const int x, const int y) { return (((x) + (y)-1) / (y)); }
+
+__device__ inline int Loc2Index(const int n, const int c, const int h,
+ const int w, const int channel_num,
+ const int height, const int width) {
+ int index = w + (h + (c + n * channel_num) * height) * width;
+ return index;
+}
+#ifndef HIP_DIFF
+/* TODO: move this to a common place */
+template
+__device__ inline scalar_t min(scalar_t a, scalar_t b) {
+ return a < b ? a : b;
+}
+
+template
+__device__ inline scalar_t max(scalar_t a, scalar_t b) {
+ return a > b ? a : b;
+}
+#endif
+template
+__device__ __forceinline__ scalar_t warpReduceSum(scalar_t val) {
+ for (int offset = WARP_SIZE / 2; offset > 0; offset /= 2)
+#ifdef HIP_DIFF
+ val += __shfl_down(val, offset);
+#else
+ val += __shfl_down_sync(FULL_MASK, val, offset);
+#endif
+ return val;
+}
+
+template <>
+__device__ __forceinline__ phalf warpReduceSum(phalf val) {
+ for (int offset = WARP_SIZE / 2; offset > 0; offset /= 2)
+#ifdef HIP_DIFF
+ __PHALF(val) += __shfl_down(FULL_MASK, val, offset);
+#else
+ __PHALF(val) +=
+ __shfl_down_sync(FULL_MASK, static_cast<__half>(__PHALF(val)), offset);
+#endif
+ return val;
+}
+
+// Splits the original matrix into submatrices with size 32 * 32.
+// Each block transposes one submatrix by loading it into shared memory.
+// Reference https://devblogs.nvidia.com/efficient-matrix-transpose-cuda-cc/
+template
+__global__ void BatchTranspose2DCUDAKernel(const int N, const int H,
+ const int W, const int dh,
+ const int dw,
+ const scalar_t *__restrict__ X,
+ scalar_t *__restrict__ Y) {
+ __shared__ scalar_t tile[kTileDim][kTileDim + 1];
+ const int n = blockIdx.x / (dh * dw);
+ const int k = blockIdx.x % (dh * dw);
+ const int r = k / dw;
+ const int c = k % dw;
+ const int offset = n * H * W;
+ int x = c * kTileDim + threadIdx.x;
+ int y = r * kTileDim + threadIdx.y;
+ if (x < W) {
+ for (int i = 0; threadIdx.y + i < kTileDim && y + i < H; i += kBlockRows) {
+ tile[threadIdx.y + i][threadIdx.x] = X[offset + (y + i) * W + x];
+ }
+ }
+ __syncthreads();
+ x = r * kTileDim + threadIdx.x;
+ y = c * kTileDim + threadIdx.y;
+ if (x < H) {
+ for (int i = 0; threadIdx.y + i < kTileDim && y + i < W; i += kBlockRows) {
+ Y[offset + (y + i) * H + x] = tile[threadIdx.x][threadIdx.y + i];
+ }
+ }
+}
+template
+__global__ void CARAFEForward(
+ const int num_kernels, const scalar_t *__restrict__ bottom_data,
+ const scalar_t *__restrict__ bottom_masks, const int kernel_size,
+ const int group_size, const int scale_factor, const int channels,
+ const int down_height, const int down_width, const int height,
+ const int width, const int mask_channels, scalar_t *__restrict__ top_data) {
+#if MAXIMIZE_KERNEL_SIZE
+ __shared__ float shared_mask[MAX_SHARED_SCALAR_T * 2];
+#else
+ __shared__ scalar_t shared_mask[MAX_SHARED_SCALAR_T];
+#endif
+
+ int index = threadIdx.x + blockIdx.x * blockDim.x;
+ if (index > num_kernels - 1) {
+ return;
+ }
+ const int pixel_id = threadIdx.x / THREADS_PER_PIXEL;
+ const int split_id = threadIdx.x % THREADS_PER_PIXEL;
+ index = index / THREADS_PER_PIXEL;
+ const int pw = index % width;
+ const int ph = (index / width) % height;
+ const int n = index / width / height;
+
+ const int down_pw = pw / scale_factor;
+ const int down_ph = ph / scale_factor;
+
+ const int start_w = down_pw - (kernel_size - 1) / 2;
+ const int end_w = down_pw + (kernel_size - 1) / 2 + 1;
+ const int start_h = down_ph - (kernel_size - 1) / 2;
+ const int end_h = down_ph + (kernel_size - 1) / 2 + 1;
+ for (int c = split_id; c < mask_channels; c += THREADS_PER_PIXEL) {
+ int mask_index = Loc2Index(n, ph, pw, c, height, width, mask_channels);
+ shared_mask[c * WARP_SIZE + pixel_id] = bottom_masks[mask_index];
+ }
+ __syncthreads();
+
+ const int channels_per_group = ceilf(channels / (float)group_size);
+#pragma unroll
+ for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) {
+ int mask_group = c / channels_per_group;
+ scalar_t output_val = 0;
+#pragma unroll
+ for (int iy = start_h; iy < end_h; iy++) {
+#pragma unroll
+ for (int ix = start_w; ix < end_w; ix++) {
+ if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) {
+ continue;
+ }
+ int mask_iy = iy - down_ph + (kernel_size - 1) / 2;
+ int mask_ix = ix - down_pw + (kernel_size - 1) / 2;
+ int mask_c =
+ (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix;
+ int feat_index =
+ Loc2Index(n, iy, ix, c, down_height, down_width, channels);
+
+ output_val += bottom_data[feat_index] *
+ shared_mask[mask_c * WARP_SIZE + pixel_id];
+ }
+ }
+
+ int top_index = Loc2Index(n, ph, pw, c, height, width, channels);
+ top_data[top_index] = output_val;
+ }
+}
+
+template
+__global__ void CARAFEBackward_Feature(
+ const int num_kernels, const scalar_t *__restrict__ top_diff,
+ const scalar_t *__restrict__ bottom_masks, const int kernel_size,
+ const int group_size, const int scale_factor, const int channels,
+ const int down_height, const int down_width, const int height,
+ const int width, const int mask_channels,
+ scalar_t *__restrict__ bottom_diff) {
+#if MAXIMIZE_KERNEL_SIZE
+ __shared__ float shared_mask[MAX_SHARED_SCALAR_T * 2];
+#else
+ __shared__ scalar_t shared_mask[MAX_SHARED_SCALAR_T];
+#endif
+
+ int index = threadIdx.x + blockIdx.x * blockDim.x;
+ if (index > num_kernels - 1) {
+ return;
+ }
+
+ const int pixel_id = threadIdx.x / THREADS_PER_PIXEL;
+ const int split_id = threadIdx.x % THREADS_PER_PIXEL;
+ // (n, c, ph, pw) is an element in the bottom_data
+ index = index / THREADS_PER_PIXEL;
+ const int pw = index % width;
+ const int ph = (index / width) % height;
+ const int n = index / width / height;
+
+ const int start_w = pw - (kernel_size - 1) * scale_factor / 2;
+ const int end_w = pw + (kernel_size - 1) * scale_factor / 2 + 1;
+ const int start_h = ph - (kernel_size - 1) * scale_factor / 2;
+ const int end_h = ph + (kernel_size - 1) * scale_factor / 2 + 1;
+ for (int c = split_id; c < mask_channels; c += THREADS_PER_PIXEL) {
+ const int mask_w = (c % kernel_size) * scale_factor;
+ const int mask_h = (c / kernel_size % kernel_size) * scale_factor;
+ const int mask_x = start_w + mask_w;
+ const int mask_y = start_h + mask_h;
+ if (mask_y < 0 || mask_y > height - 1 || mask_x < 0 || mask_x > width - 1) {
+ shared_mask[c * WARP_SIZE + pixel_id] = 0;
+ continue;
+ }
+ const int mask_group = c / (kernel_size * kernel_size);
+ const int mask_c = (2 * mask_group + 1) * kernel_size * kernel_size - c - 1;
+ int mask_index =
+ Loc2Index(n, mask_c, mask_y, mask_x, mask_channels, height, width);
+ shared_mask[c * WARP_SIZE + pixel_id] = bottom_masks[mask_index];
+ }
+ __syncthreads();
+ const int channels_per_group = ceilf(channels / (float)group_size);
+#pragma unroll
+ for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) {
+ int mask_group = c / channels_per_group;
+ int top_index = Loc2Index(n, ph, pw, c, height, width, channels);
+ scalar_t output_val = 0;
+#pragma unroll
+ for (int iy = start_h; iy < end_h; iy += scale_factor) {
+#pragma unroll
+ for (int ix = start_w; ix < end_w; ix += scale_factor) {
+ if (iy < 0 || iy > height - 1 || ix < 0 || ix > width - 1) {
+ continue;
+ }
+ int mask_iy =
+ (iy - ph + (kernel_size - 1) * scale_factor / 2) / scale_factor;
+ int mask_ix =
+ (ix - pw + (kernel_size - 1) * scale_factor / 2) / scale_factor;
+ int mask_c =
+ (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix;
+ int feat_index = Loc2Index(n, iy, ix, c, height, width, channels);
+ output_val +=
+ shared_mask[mask_c * WARP_SIZE + pixel_id] * top_diff[feat_index];
+ }
+ }
+ bottom_diff[top_index] = output_val;
+ }
+}
+
+template
+__global__ void FeatureSum(const int num_kernels,
+ const scalar_t *__restrict__ input_data,
+ const int scale_factor, const int channels,
+ const int height, const int width,
+ scalar_t *__restrict__ output_data) {
+ int index = threadIdx.x + blockIdx.x * blockDim.x;
+ if (index > num_kernels - 1) {
+ return;
+ }
+ const int split_id = threadIdx.x % THREADS_PER_PIXEL;
+ index = index / THREADS_PER_PIXEL;
+ const int pw = index % width;
+ const int ph = (index / width) % height;
+ const int n = index / width / height;
+ for (int c = split_id; c < channels; c += THREADS_PER_PIXEL) {
+ scalar_t output_val = 0;
+ for (int iy = ph * scale_factor; iy < (ph + 1) * scale_factor; iy++) {
+ for (int ix = pw * scale_factor; ix < (pw + 1) * scale_factor; ix++) {
+ int input_id = Loc2Index(n, iy, ix, c, height * scale_factor,
+ width * scale_factor, channels);
+ output_val += input_data[input_id];
+ }
+ }
+ const int output_id = Loc2Index(n, ph, pw, c, height, width, channels);
+ output_data[output_id] = output_val;
+ }
+}
+
+template
+__global__ void CARAFEBackward_Mask(const int num_kernels,
+ const scalar_t *__restrict__ top_diff,
+ const scalar_t *__restrict__ bottom_data,
+ const int kernel_size, const int group_size,
+ const int scale_factor, const int channels,
+ const int down_height, const int down_width,
+ const int height, const int width,
+ const int mask_channels,
+ scalar_t *__restrict__ mask_diff) {
+ int index = threadIdx.x + blockIdx.x * blockDim.x;
+ if (index > num_kernels - 1) {
+ return;
+ }
+
+ const int lane_id = index % WARP_SIZE;
+ index = index / WARP_SIZE;
+ const int mask_c = index % mask_channels;
+ // (n, c, ph, pw) is an element in the bottom_data
+ index = index / mask_channels;
+ const int pw = index % width;
+ const int ph = (index / width) % height;
+ const int n = index / width / height;
+
+ const int down_pw = pw / scale_factor;
+ const int down_ph = ph / scale_factor;
+
+ const int mask_group = mask_c / (kernel_size * kernel_size);
+ const int mask_loc = mask_c % (kernel_size * kernel_size);
+
+ const int offset_x = mask_loc % kernel_size - (kernel_size - 1) / 2;
+ const int offset_y =
+ mask_loc / kernel_size % kernel_size - (kernel_size - 1) / 2;
+
+ const int down_x = down_pw + offset_x;
+ const int down_y = down_ph + offset_y;
+
+ scalar_t output_val = 0;
+
+ if (down_y >= 0 && down_y <= down_height - 1 && down_x >= 0 &&
+ down_x <= down_width - 1) {
+ const int channels_per_mask = ceilf(channels / (float)group_size);
+ const int start = channels_per_mask * mask_group;
+ const int end = min(channels_per_mask * (mask_group + 1), channels);
+ for (int c = start + lane_id; c < end; c += WARP_SIZE) {
+ int bottom_id =
+ Loc2Index(n, down_y, down_x, c, down_height, down_width, channels);
+ int top_id = Loc2Index(n, ph, pw, c, height, width, channels);
+ output_val += top_diff[top_id] * bottom_data[bottom_id];
+ }
+ }
+#ifdef HIP_DIFF
+ __syncthreads();
+#else
+ __syncwarp();
+#endif
+ output_val = warpReduceSum(output_val);
+ if (lane_id == 0) {
+ const int mask_id =
+ Loc2Index(n, ph, pw, mask_c, height, width, mask_channels);
+ mask_diff[mask_id] = output_val;
+ }
+}
+
+#endif // CARAFE_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/carafe_naive_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/carafe_naive_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..1e3b3876083d81130d837cfba6209e1b97c4b64c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/carafe_naive_cuda_kernel.cuh
@@ -0,0 +1,124 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef CARAFE_NAIVE_CUDA_KERNEL_CUH
+#define CARAFE_NAIVE_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+__device__ inline int Loc2Index(const int n, const int c, const int h,
+ const int w, const int channel_num,
+ const int height, const int width) {
+ int index = w + (h + (c + n * channel_num) * height) * width;
+ return index;
+}
+
+template
+__global__ void carafe_naive_forward_cuda_kernel(
+ const int nthreads, const scalar_t *bottom_data,
+ const scalar_t *bottom_masks, scalar_t *top_data, const int kernel_size,
+ const int group_size, const int scale_factor, const int channels,
+ const int height, const int width) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (n, c, ph, pw) is an element in the bottom_data
+ int pw = index % width;
+ int ph = (index / width) % height;
+ int c = (index / width / height) % channels;
+ int n = index / width / height / channels;
+
+ int mask_channels = kernel_size * kernel_size * group_size;
+ int mask_group = c / (channels / group_size);
+
+ int down_pw = pw / scale_factor;
+ int down_ph = ph / scale_factor;
+ int down_width = width / scale_factor;
+ int down_height = height / scale_factor;
+ int start_w = down_pw - (kernel_size - 1) / 2;
+ int end_w = down_pw + (kernel_size - 1) / 2 + 1;
+ int start_h = down_ph - (kernel_size - 1) / 2;
+ int end_h = down_ph + (kernel_size - 1) / 2 + 1;
+
+ scalar_t output_val = 0;
+ for (int iy = start_h; iy < end_h; iy++) {
+ for (int ix = start_w; ix < end_w; ix++) {
+ if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) {
+ continue;
+ }
+ int mask_iy = iy - down_ph + (kernel_size - 1) / 2;
+ int mask_ix = ix - down_pw + (kernel_size - 1) / 2;
+ int mask_c =
+ (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix;
+ int feat_index =
+ Loc2Index(n, c, iy, ix, channels, down_height, down_width);
+ int mask_index =
+ Loc2Index(n, mask_c, ph, pw, mask_channels, height, width);
+ output_val += bottom_data[feat_index] * bottom_masks[mask_index];
+ }
+ }
+ top_data[index] = output_val;
+ }
+}
+
+template
+__global__ void carafe_naive_backward_cuda_kernel(
+ const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_data,
+ const scalar_t *bottom_masks, scalar_t *bottom_diff, scalar_t *mask_diff,
+ const int kernel_size, const int group_size, const int scale_factor,
+ const int channels, const int height, const int width) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (n, c, ph, pw) is an element in the bottom_data
+ int pw = index % width;
+ int ph = (index / width) % height;
+ int c = (index / width / height) % channels;
+ int n = index / width / height / channels;
+
+ int mask_channels = kernel_size * kernel_size * group_size;
+ int mask_group = c / (channels / group_size);
+
+ int down_pw = pw / scale_factor;
+ int down_ph = ph / scale_factor;
+ int down_width = width / scale_factor;
+ int down_height = height / scale_factor;
+ int start_w = down_pw - (kernel_size - 1) / 2;
+ int end_w = down_pw + (kernel_size - 1) / 2 + 1;
+ int start_h = down_ph - (kernel_size - 1) / 2;
+ int end_h = down_ph + (kernel_size - 1) / 2 + 1;
+
+ for (int iy = start_h; iy < end_h; iy++) {
+ for (int ix = start_w; ix < end_w; ix++) {
+ if (iy < 0 || iy > down_height - 1 || ix < 0 || ix > down_width - 1) {
+ continue;
+ }
+ int mask_iy = iy - down_ph + (kernel_size - 1) / 2;
+ int mask_ix = ix - down_pw + (kernel_size - 1) / 2;
+ int mask_c =
+ (mask_group * kernel_size + mask_iy) * kernel_size + mask_ix;
+ int feat_index =
+ Loc2Index(n, c, iy, ix, channels, down_height, down_width);
+ int mask_index =
+ Loc2Index(n, mask_c, ph, pw, mask_channels, height, width);
+ atomicAdd(bottom_diff + feat_index,
+ bottom_masks[mask_index] * top_diff[index]);
+ atomicAdd(mask_diff + mask_index,
+ bottom_data[feat_index] * top_diff[index]);
+ }
+ }
+ }
+}
+
+#endif // CARAFE_NAIVE_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/common_cuda_helper.hpp b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/common_cuda_helper.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b12aa9a26a2cc162fd89f68ccc97e17749090a41
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/common_cuda_helper.hpp
@@ -0,0 +1,120 @@
+#ifndef COMMON_CUDA_HELPER
+#define COMMON_CUDA_HELPER
+
+#include
+
+#define CUDA_1D_KERNEL_LOOP(i, n) \
+ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
+ i += blockDim.x * gridDim.x)
+
+#define CUDA_2D_KERNEL_LOOP(i, n, j, m) \
+ for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
+ i += blockDim.x * gridDim.x) \
+ for (size_t j = blockIdx.y * blockDim.y + threadIdx.y; j < (m); \
+ j += blockDim.y * gridDim.y)
+
+#define CUDA_2D_KERNEL_BLOCK_LOOP(i, n, j, m) \
+ for (size_t i = blockIdx.x; i < (n); i += gridDim.x) \
+ for (size_t j = blockIdx.y; j < (m); j += gridDim.y)
+
+#define THREADS_PER_BLOCK 512
+
+inline int GET_BLOCKS(const int N, const int num_threads = THREADS_PER_BLOCK) {
+ int optimal_block_num = (N + num_threads - 1) / num_threads;
+ int max_block_num = 4096;
+ return min(optimal_block_num, max_block_num);
+}
+
+template
+__device__ T bilinear_interpolate(const T* input, const int height,
+ const int width, T y, T x,
+ const int index /* index for debug only*/) {
+ // deal with cases that inverse elements are out of feature map boundary
+ if (y < -1.0 || y > height || x < -1.0 || x > width) return 0;
+
+ if (y <= 0) y = 0;
+ if (x <= 0) x = 0;
+
+ int y_low = (int)y;
+ int x_low = (int)x;
+ int y_high;
+ int x_high;
+
+ if (y_low >= height - 1) {
+ y_high = y_low = height - 1;
+ y = (T)y_low;
+ } else {
+ y_high = y_low + 1;
+ }
+
+ if (x_low >= width - 1) {
+ x_high = x_low = width - 1;
+ x = (T)x_low;
+ } else {
+ x_high = x_low + 1;
+ }
+
+ T ly = y - y_low;
+ T lx = x - x_low;
+ T hy = 1. - ly, hx = 1. - lx;
+ // do bilinear interpolation
+ T v1 = input[y_low * width + x_low];
+ T v2 = input[y_low * width + x_high];
+ T v3 = input[y_high * width + x_low];
+ T v4 = input[y_high * width + x_high];
+ T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+ T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+
+ return val;
+}
+
+template
+__device__ void bilinear_interpolate_gradient(
+ const int height, const int width, T y, T x, T& w1, T& w2, T& w3, T& w4,
+ int& x_low, int& x_high, int& y_low, int& y_high,
+ const int index /* index for debug only*/) {
+ // deal with cases that inverse elements are out of feature map boundary
+ if (y < -1.0 || y > height || x < -1.0 || x > width) {
+ // empty
+ w1 = w2 = w3 = w4 = 0.;
+ x_low = x_high = y_low = y_high = -1;
+ return;
+ }
+
+ if (y <= 0) y = 0;
+ if (x <= 0) x = 0;
+
+ y_low = (int)y;
+ x_low = (int)x;
+
+ if (y_low >= height - 1) {
+ y_high = y_low = height - 1;
+ y = (T)y_low;
+ } else {
+ y_high = y_low + 1;
+ }
+
+ if (x_low >= width - 1) {
+ x_high = x_low = width - 1;
+ x = (T)x_low;
+ } else {
+ x_high = x_low + 1;
+ }
+
+ T ly = y - y_low;
+ T lx = x - x_low;
+ T hy = 1. - ly, hx = 1. - lx;
+
+ // reference in forward
+ // T v1 = input[y_low * width + x_low];
+ // T v2 = input[y_low * width + x_high];
+ // T v3 = input[y_high * width + x_low];
+ // T v4 = input[y_high * width + x_high];
+ // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+
+ w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+ return;
+}
+#endif // COMMON_CUDA_HELPER
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..98ce7cdb384810188cb6552de27cc237fd865b7e
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh
@@ -0,0 +1,844 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef CONVEX_IOU_CUDA_KERNEL_CUH
+#define CONVEX_IOU_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+#define MAXN 100
+#define NMAX 512
+__device__ const double EPS = 1E-8;
+
+__device__ inline int sig(double d) { return (d > EPS) - (d < -EPS); }
+
+struct Point {
+ double x, y;
+ __device__ Point() {}
+ __device__ Point(double x, double y) : x(x), y(y) {}
+};
+
+__device__ inline bool point_same(Point& a, Point& b) {
+ return sig(a.x - b.x) == 0 && sig(a.y - b.y) == 0;
+}
+
+__device__ inline void swap1(Point* a, Point* b) {
+ Point temp;
+ temp.x = a->x;
+ temp.y = a->y;
+
+ a->x = b->x;
+ a->y = b->y;
+
+ b->x = temp.x;
+ b->y = temp.y;
+}
+
+__device__ inline void reverse1(Point* a, const int n) {
+ for (int i = 0; i < (n - 1) / 2.0; i++) {
+ Point* j = &(a[i]);
+ Point* k = &(a[n - 1 - i]);
+ swap1(j, k);
+ }
+}
+
+__device__ inline double cross(Point o, Point a, Point b) {
+ return (a.x - o.x) * (b.y - o.y) - (b.x - o.x) * (a.y - o.y);
+}
+
+__device__ inline double dis(Point a, Point b) {
+ return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y);
+}
+__device__ inline double area(Point* ps, int n) {
+ ps[n] = ps[0];
+ double res = 0;
+ for (int i = 0; i < n; i++) {
+ res += ps[i].x * ps[i + 1].y - ps[i].y * ps[i + 1].x;
+ }
+ return res / 2.0;
+}
+__device__ inline double polygon_area_grad(Point* ps, int n,
+ int* polygon_to_pred_index,
+ int n_pred, double* grad_C) {
+ ps[n] = ps[0];
+ double partion_grad[4 * 30 + 2];
+ double res = 0;
+ for (int i = 0; i < n; i++) {
+ res += ps[i].x * ps[i + 1].y - ps[i].y * ps[i + 1].x;
+ partion_grad[i * 4 + 2] = ps[i + 1].y;
+ partion_grad[i * 4 + 3] = -ps[i + 1].x;
+ if (i != n - 1) {
+ partion_grad[i * 4 + 4] = -ps[i].y;
+ partion_grad[i * 4 + 5] = ps[i].x;
+ } else {
+ partion_grad[0] = -ps[i].y;
+ partion_grad[1] = ps[i].x;
+ }
+ }
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n_pred; j++) {
+ if (i == polygon_to_pred_index[j]) {
+ grad_C[2 * polygon_to_pred_index[j + n_pred]] =
+ (partion_grad[i * 4] + partion_grad[i * 4 + 2]) / 2;
+ break;
+ }
+ }
+ for (int j = 0; j < n_pred; j++) {
+ if (i == polygon_to_pred_index[j]) {
+ grad_C[2 * polygon_to_pred_index[j + n_pred] + 1] =
+ (partion_grad[i * 4 + 1] + partion_grad[i * 4 + 1 + 2]) / 2;
+ break;
+ }
+ }
+ }
+
+ return res / 2.0;
+}
+
+__device__ inline int lineCross(Point a, Point b, Point c, Point d, Point& p,
+ double* cut_grad, int m, int n, int i) {
+ double s1, s2;
+ double s2_s1_2;
+ double ds1_dxc, ds1_dyc, ds2_dxd, ds2_dyd;
+ double dxp_dxc, dxp_dyc, dxp_dxd, dxp_dyd, dyp_dxc, dyp_dyc, dyp_dxd, dyp_dyd;
+ s1 = cross(a, b, c);
+ s2 = cross(a, b, d);
+
+ ds1_dxc = -(b.y - a.y);
+ ds1_dyc = b.x - a.x;
+ ds2_dxd = ds1_dxc;
+ ds2_dyd = ds1_dyc;
+ s2_s1_2 = (s2 - s1) * (s2 - s1);
+
+ if (sig(s1) == 0 && sig(s2) == 0) return 2;
+ if (sig(s2 - s1) == 0) return 0;
+
+ dxp_dxc =
+ ((s2 - d.x * ds1_dxc) * (s2 - s1) - (c.x * s2 - d.x * s1) * (-ds1_dxc)) /
+ (s2_s1_2);
+ dxp_dyc =
+ ((0 - d.x * ds1_dyc) * (s2 - s1) - (c.x * s2 - d.x * s1) * (-ds1_dyc)) /
+ (s2_s1_2);
+ dxp_dxd =
+ ((c.x * ds2_dxd - s1) * (s2 - s1) - (c.x * s2 - d.x * s1) * (ds2_dxd)) /
+ (s2_s1_2);
+ dxp_dyd =
+ ((c.x * ds2_dyd - 0) * (s2 - s1) - (c.x * s2 - d.x * s1) * (ds2_dyd)) /
+ (s2_s1_2);
+
+ dyp_dxc =
+ ((0 - d.y * ds1_dxc) * (s2 - s1) - (c.y * s2 - d.y * s1) * (-ds1_dxc)) /
+ (s2_s1_2);
+ dyp_dyc =
+ ((s2 - d.y * ds1_dyc) * (s2 - s1) - (c.y * s2 - d.y * s1) * (-ds1_dyc)) /
+ (s2_s1_2);
+ dyp_dxd =
+ ((c.y * ds2_dxd - 0) * (s2 - s1) - (c.y * s2 - d.y * s1) * (ds2_dxd)) /
+ (s2_s1_2);
+ dyp_dyd =
+ ((c.y * ds2_dyd - s1) * (s2 - s1) - (c.y * s2 - d.y * s1) * (ds2_dyd)) /
+ (s2_s1_2);
+
+ p.x = (c.x * s2 - d.x * s1) / (s2 - s1);
+ p.y = (c.y * s2 - d.y * s1) / (s2 - s1);
+ if (i == n - 1) {
+ cut_grad[4 * n * m + 4 * i] = dxp_dxc; // + dyp_dxc;
+ cut_grad[4 * n * m + 4 * i + 1] = dyp_dxc;
+ cut_grad[4 * n * m + 4 * i + 2] = dxp_dyc; // + dyp_dyc;
+ cut_grad[4 * n * m + 4 * i + 3] = dyp_dyc;
+ cut_grad[4 * n * m + 0] = dxp_dxd; // + dyp_dxd;
+ cut_grad[4 * n * m + 1] = dyp_dxd;
+ cut_grad[4 * n * m + 2] = dxp_dyd; // + dyp_dyd;
+ cut_grad[4 * n * m + 3] = dyp_dyd;
+ } else {
+ cut_grad[4 * n * m + 4 * i] = dxp_dxc; // + dyp_dxc;
+ cut_grad[4 * n * m + 4 * i + 1] = dyp_dxc;
+ cut_grad[4 * n * m + 4 * i + 2] = dxp_dyc; // + dyp_dyc;
+ cut_grad[4 * n * m + 4 * i + 3] = dyp_dyc;
+ cut_grad[4 * n * m + 4 * (i + 1)] = dxp_dxd; // + dyp_dxd;
+ cut_grad[4 * n * m + 4 * (i + 1) + 1] = dyp_dxd;
+ cut_grad[4 * n * m + 4 * (i + 1) + 2] = dxp_dyd; // + dyp_dyd;
+ cut_grad[4 * n * m + 4 * (i + 1) + 3] = dyp_dyd;
+ }
+
+ return 1;
+}
+__device__ inline void polygon_cut(Point* p, int& n, Point a, Point b,
+ double* cut_grad) {
+ Point pp[MAXN];
+ double ccur_grad[MAXN] = {};
+ int m = 0;
+ p[n] = p[0];
+ int k = n;
+ for (int i = 0; i < n; i++) {
+ if (sig(cross(a, b, p[i])) > 0) {
+ pp[m] = p[i];
+ ccur_grad[4 * n * m + 4 * i] = 1.0;
+ ccur_grad[4 * n * m + 4 * i + 3] = 1.0;
+ m++;
+ }
+ if (sig(cross(a, b, p[i])) != sig(cross(a, b, p[i + 1]))) {
+ lineCross(a, b, p[i], p[i + 1], pp[m], ccur_grad, m, n, i);
+ m++;
+ }
+ }
+
+ n = 0;
+ for (int i = 0; i < m; i++) {
+ if (!i || !(point_same(pp[i], pp[i - 1]))) {
+ p[n] = pp[i];
+ for (int j = 0; j < 4 * k; j++) {
+ cut_grad[4 * k * n + j] = ccur_grad[4 * k * i + j];
+ }
+ n++;
+ }
+ }
+
+ while (n > 1 && point_same(p[n - 1], p[0])) n--;
+}
+
+__device__ inline double intersectArea(Point a, Point b, Point c, Point d,
+ double* grad_AB, int order,
+ int convex_n) {
+ Point o(0, 0);
+ int res_flag = 0;
+ int s1 = sig(cross(o, a, b));
+ int s2 = sig(cross(o, c, d));
+ if (s1 == 0 || s2 == 0) return 0.0;
+ if (s1 == -1) {
+ Point* i = &a;
+ Point* j = &b;
+ swap1(i, j);
+ res_flag = 1;
+ }
+ if (s2 == -1) {
+ Point* i = &c;
+ Point* j = &d;
+ swap1(i, j);
+ }
+ Point p[10] = {o, a, b};
+ int n = 3, n0 = 3, n1, n2, n3;
+ double cut_grad1[MAXN] = {};
+ double cut_grad2[MAXN] = {};
+ double cut_grad3[MAXN] = {};
+ double p1_p_grad[10][10] = {};
+ double p2_p1_grad[10][10] = {};
+ double p3_p2_grad[10][10] = {};
+
+ double p3_p1_grad[10][10] = {};
+ double p3_p_grad[10][10] = {};
+
+ // 1
+ polygon_cut(p, n, o, c, cut_grad1);
+ n1 = n;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < 4 * n0; j++) {
+ if (!(j % 2)) {
+ p1_p_grad[2 * i][j / 2] = cut_grad1[4 * n0 * i + j];
+ } else {
+ p1_p_grad[2 * i + 1][j / 2] = cut_grad1[4 * n0 * i + j];
+ }
+ }
+ }
+
+ // 2
+ polygon_cut(p, n, c, d, cut_grad2);
+ n2 = n;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < 4 * n1; j++) {
+ if (!(j % 2)) {
+ p2_p1_grad[2 * i][j / 2] = cut_grad2[4 * n1 * i + j];
+ } else {
+ p2_p1_grad[2 * i + 1][j / 2] = cut_grad2[4 * n1 * i + j];
+ }
+ }
+ }
+ // 3
+ polygon_cut(p, n, d, o, cut_grad3);
+ n3 = n;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < 4 * n2; j++) {
+ if (!(j % 2)) {
+ p3_p2_grad[2 * i][j / 2] = cut_grad3[4 * n2 * i + j];
+ } else {
+ p3_p2_grad[2 * i + 1][j / 2] = cut_grad3[4 * n2 * i + j];
+ }
+ }
+ }
+
+ // mul
+ // p3_p2(n3 * n2) * p2_p1(n2 * n1) = p3_p1 (n3 * n1)
+ for (int i = 0; i < 2 * n3; i++) {
+ for (int j = 0; j < 2 * n1; j++) {
+ double sum = 0.0;
+ for (int m = 0; m < 2 * n2; m++) {
+ sum = sum + p3_p2_grad[i][m] * p2_p1_grad[m][j];
+ }
+ p3_p1_grad[i][j] = sum;
+ }
+ }
+
+ // p3_p1 (n3 * n1) * p1_p (n1 * n0) = p3_p (n3 * n0)
+ for (int i = 0; i < 2 * n3; i++) {
+ for (int j = 0; j < 2 * n0; j++) {
+ double sum = 0.0;
+ for (int m = 0; m < 2 * n1; m++) {
+ sum = sum + p3_p1_grad[i][m] * p1_p_grad[m][j];
+ }
+ p3_p_grad[i][j] = sum;
+ }
+ }
+
+ // calculate S_grad
+ int polygon_index_box_index[20];
+ double grad_polygon[20];
+ double S_grad[6];
+
+ for (int i = 0; i < n3; i++) {
+ polygon_index_box_index[i] = i;
+ polygon_index_box_index[i + n3] = i;
+ }
+
+ double res =
+ polygon_area_grad(p, n3, polygon_index_box_index, n3, grad_polygon);
+
+ if (s1 * s2 == -1) {
+ for (int j = 0; j < 2 * 3; j++) {
+ double sum = 0.0;
+ for (int m = 0; m < 2 * n3; m++) {
+ sum = sum - grad_polygon[m] * p3_p_grad[m][j];
+ }
+ S_grad[j] = sum;
+ }
+
+ if (order != convex_n - 1) {
+ if (res_flag) {
+ grad_AB[2 * order] += S_grad[4];
+ grad_AB[2 * order + 1] += S_grad[5];
+ grad_AB[2 * order + 2] += S_grad[2];
+ grad_AB[2 * order + 3] += S_grad[3];
+
+ } else {
+ grad_AB[2 * order] += S_grad[2];
+ grad_AB[2 * order + 1] += S_grad[3];
+ grad_AB[2 * order + 2] += S_grad[4];
+ grad_AB[2 * order + 3] += S_grad[5];
+ }
+ } else {
+ if (res_flag) {
+ grad_AB[2 * order] += S_grad[4];
+ grad_AB[2 * order + 1] += S_grad[5];
+ grad_AB[0] += S_grad[2];
+ grad_AB[1] += S_grad[3];
+
+ } else {
+ grad_AB[2 * order] += S_grad[2];
+ grad_AB[2 * order + 1] += S_grad[3];
+ grad_AB[0] += S_grad[4];
+ grad_AB[1] += S_grad[5];
+ }
+ }
+ res = -res;
+ } else {
+ for (int j = 0; j < 2 * 3; j++) {
+ double sum = 0.0;
+ for (int m = 0; m < 2 * n3; m++) {
+ sum = sum + grad_polygon[m] * p3_p_grad[m][j];
+ }
+ S_grad[j] = sum;
+ }
+
+ if (order != convex_n - 1) {
+ if (res_flag) {
+ grad_AB[2 * order] += S_grad[4];
+ grad_AB[2 * order + 1] += S_grad[5];
+ grad_AB[2 * order + 2] += S_grad[2];
+ grad_AB[2 * order + 3] += S_grad[3];
+ } else {
+ grad_AB[2 * order] += S_grad[2];
+ grad_AB[2 * order + 1] += S_grad[3];
+ grad_AB[2 * order + 2] += S_grad[4];
+ grad_AB[2 * order + 3] += S_grad[5];
+ }
+ } else {
+ if (res_flag) {
+ grad_AB[2 * order] += S_grad[4];
+ grad_AB[2 * order + 1] += S_grad[5];
+ grad_AB[0] += S_grad[2];
+ grad_AB[1] += S_grad[3];
+ } else {
+ grad_AB[2 * order] += S_grad[2];
+ grad_AB[2 * order + 1] += S_grad[3];
+ grad_AB[0] += S_grad[4];
+ grad_AB[1] += S_grad[5];
+ }
+ }
+ }
+ return res;
+}
+
+__device__ inline double intersectAreaO(Point* ps1, int n1, Point* ps2, int n2,
+ double* grad_AB) {
+ if (area(ps1, n1) < 0) reverse1(ps1, n1);
+ if (area(ps2, n2) < 0) reverse1(ps2, n2);
+ ps1[n1] = ps1[0];
+ ps2[n2] = ps2[0];
+ double res = 0;
+ for (int i = 0; i < n1; i++) {
+ for (int j = 0; j < n2; j++) {
+ res +=
+ intersectArea(ps1[i], ps1[i + 1], ps2[j], ps2[j + 1], grad_AB, i, n1);
+ }
+ }
+ return res;
+}
+
+__device__ inline void Jarvis(Point* in_poly, int& n_poly) {
+ Point p_max, p_k;
+ int max_index, k_index;
+ int Stack[NMAX] = {}, top1, top2;
+ double sign;
+ Point right_point[10], left_point[10];
+
+ for (int i = 0; i < n_poly; i++) {
+ if (in_poly[i].y < in_poly[0].y ||
+ in_poly[i].y == in_poly[0].y && in_poly[i].x < in_poly[0].x) {
+ Point* j = &(in_poly[0]);
+ Point* k = &(in_poly[i]);
+ swap1(j, k);
+ }
+ if (i == 0) {
+ p_max = in_poly[0];
+ max_index = 0;
+ }
+ if (in_poly[i].y > p_max.y ||
+ in_poly[i].y == p_max.y && in_poly[i].x > p_max.x) {
+ p_max = in_poly[i];
+ max_index = i;
+ }
+ }
+
+ if (max_index == 0) {
+ max_index = 1;
+ p_max = in_poly[max_index];
+ }
+
+ k_index = 0, Stack[0] = 0, top1 = 0;
+ while (k_index != max_index) {
+ p_k = p_max;
+ k_index = max_index;
+ for (int i = 1; i < n_poly; i++) {
+ sign = cross(in_poly[Stack[top1]], in_poly[i], p_k);
+ if ((sign > 0) || ((sign == 0) && (dis(in_poly[Stack[top1]], in_poly[i]) >
+ dis(in_poly[Stack[top1]], p_k)))) {
+ p_k = in_poly[i];
+ k_index = i;
+ }
+ }
+ top1++;
+ Stack[top1] = k_index;
+ }
+ for (int i = 0; i <= top1; i++) right_point[i] = in_poly[Stack[i]];
+
+ k_index = 0, Stack[0] = 0, top2 = 0;
+
+ while (k_index != max_index) {
+ p_k = p_max;
+ k_index = max_index;
+ for (int i = 1; i < n_poly; i++) {
+ sign = cross(in_poly[Stack[top2]], in_poly[i], p_k);
+ if ((sign < 0) || (sign == 0) && (dis(in_poly[Stack[top2]], in_poly[i]) >
+ dis(in_poly[Stack[top2]], p_k))) {
+ p_k = in_poly[i];
+ k_index = i;
+ }
+ }
+ top2++;
+ Stack[top2] = k_index;
+ }
+ for (int i = top2 - 1; i >= 0; i--) left_point[i] = in_poly[Stack[i]];
+
+ for (int i = 0; i < top1 + top2; i++) {
+ if (i <= top1) {
+ in_poly[i] = right_point[i];
+ } else {
+ in_poly[i] = left_point[top2 - (i - top1)];
+ }
+ }
+ n_poly = top1 + top2;
+}
+
+__device__ inline double intersectAreaPoly(Point* ps1, int n1, Point* ps2,
+ int n2, double* grad_C) {
+ Point polygon[MAXN];
+ int n = n1 + n2, n_poly = 0;
+ for (int i = 0; i < n1; i++) {
+ for (int j = 0; j < n - n1; j++) {
+ if (point_same(ps1[i], ps2[j])) {
+ for (int k = j; k < n - n1 - 1; k++) {
+ ps2[k] = ps2[k + 1];
+ }
+ n2--;
+ break;
+ }
+ }
+ }
+ n_poly = n1 + n2;
+ for (int i = 0; i < n_poly; i++) {
+ if (i < n1) {
+ polygon[i] = ps1[i];
+ } else {
+ polygon[i] = ps2[i - n1];
+ }
+ }
+
+ Jarvis(polygon, n_poly);
+
+ int polygon_to_pred_index[18] = {-1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1};
+ int n_pred = 0;
+ for (int i = 0; i < n_poly; i++) {
+ for (int j = 0; j < n1; j++) {
+ if (polygon[i].x == ps1[j].x && polygon[i].y == ps1[j].y) {
+ polygon_to_pred_index[n_pred] = i;
+ polygon_to_pred_index[n_pred + n1] = j;
+ n_pred += 1;
+ break;
+ }
+ }
+ }
+ if (n_pred == 0) {
+ double polygon_area = fabs(area(polygon, n_poly));
+ for (int i = 0; i < 18; i++) {
+ grad_C[i] = 0.0;
+ }
+ return polygon_area;
+ } else {
+ double polygon_area =
+ polygon_area_grad(polygon, n_poly, polygon_to_pred_index, n1, grad_C);
+ if (polygon_area < 0) {
+ for (int i = 0; i < 18; i++) {
+ grad_C[i] = -grad_C[i];
+ }
+ }
+ return fabs(polygon_area);
+ }
+}
+
+// convex_find and get the polygon_index_box_index
+__device__ inline void Jarvis_and_index(Point* in_poly, int& n_poly,
+ int* points_to_convex_ind) {
+ int n_input = n_poly;
+ Point input_poly[20];
+ for (int i = 0; i < n_input; i++) {
+ input_poly[i].x = in_poly[i].x;
+ input_poly[i].y = in_poly[i].y;
+ }
+ Point p_max, p_k;
+ int max_index, k_index;
+ int Stack[20], top1, top2;
+ double sign;
+ Point right_point[10], left_point[10];
+
+ for (int i = 0; i < n_poly; i++) {
+ if (in_poly[i].y < in_poly[0].y ||
+ in_poly[i].y == in_poly[0].y && in_poly[i].x < in_poly[0].x) {
+ Point* j = &(in_poly[0]);
+ Point* k = &(in_poly[i]);
+ swap1(j, k);
+ }
+ if (i == 0) {
+ p_max = in_poly[0];
+ max_index = 0;
+ }
+ if (in_poly[i].y > p_max.y ||
+ in_poly[i].y == p_max.y && in_poly[i].x > p_max.x) {
+ p_max = in_poly[i];
+ max_index = i;
+ }
+ }
+ if (max_index == 0) {
+ max_index = 1;
+ p_max = in_poly[max_index];
+ }
+
+ k_index = 0, Stack[0] = 0, top1 = 0;
+ while (k_index != max_index) {
+ p_k = p_max;
+ k_index = max_index;
+ for (int i = 1; i < n_poly; i++) {
+ sign = cross(in_poly[Stack[top1]], in_poly[i], p_k);
+ if ((sign > 0) || ((sign == 0) && (dis(in_poly[Stack[top1]], in_poly[i]) >
+ dis(in_poly[Stack[top1]], p_k)))) {
+ p_k = in_poly[i];
+ k_index = i;
+ }
+ }
+ top1++;
+ Stack[top1] = k_index;
+ }
+ for (int i = 0; i <= top1; i++) {
+ right_point[i] = in_poly[Stack[i]];
+ }
+
+ k_index = 0, Stack[0] = 0, top2 = 0;
+
+ while (k_index != max_index) {
+ p_k = p_max;
+ k_index = max_index;
+ for (int i = 1; i < n_poly; i++) {
+ sign = cross(in_poly[Stack[top2]], in_poly[i], p_k);
+ if ((sign < 0) || (sign == 0) && (dis(in_poly[Stack[top2]], in_poly[i]) >
+ dis(in_poly[Stack[top2]], p_k))) {
+ p_k = in_poly[i];
+ k_index = i;
+ }
+ }
+ top2++;
+ Stack[top2] = k_index;
+ }
+
+ for (int i = top2 - 1; i >= 0; i--) {
+ left_point[i] = in_poly[Stack[i]];
+ }
+
+ for (int i = 0; i < top1 + top2; i++) {
+ if (i <= top1) {
+ in_poly[i] = right_point[i];
+ } else {
+ in_poly[i] = left_point[top2 - (i - top1)];
+ }
+ }
+ n_poly = top1 + top2;
+ for (int i = 0; i < n_poly; i++) {
+ for (int j = 0; j < n_input; j++) {
+ if (point_same(in_poly[i], input_poly[j])) {
+ points_to_convex_ind[i] = j;
+ break;
+ }
+ }
+ }
+}
+
+template
+__device__ inline float devrIoU(T const* const p, T const* const q,
+ T* point_grad, const int idx) {
+ Point ps1[MAXN], ps2[MAXN];
+
+ Point convex[MAXN];
+ for (int i = 0; i < 9; i++) {
+ convex[i].x = (double)p[i * 2];
+ convex[i].y = (double)p[i * 2 + 1];
+ }
+ int n_convex = 9;
+ int points_to_convex_ind[9] = {-1, -1, -1, -1, -1, -1, -1, -1, -1};
+ Jarvis_and_index(convex, n_convex, points_to_convex_ind);
+
+ int n1 = n_convex;
+ int n2 = 4;
+
+ for (int i = 0; i < n1; i++) {
+ ps1[i].x = (double)convex[i].x;
+ ps1[i].y = (double)convex[i].y;
+ }
+
+ for (int i = 0; i < n2; i++) {
+ ps2[i].x = (double)q[i * 2];
+ ps2[i].y = (double)q[i * 2 + 1];
+ }
+
+ int polygon_index_box_index[18];
+ for (int i = 0; i < n1; i++) {
+ polygon_index_box_index[i] = i;
+ polygon_index_box_index[i + n1] = i;
+ }
+
+ double grad_A[18] = {};
+ double grad_AB[18] = {};
+ double grad_C[18] = {};
+
+ double inter_area = intersectAreaO(ps1, n1, ps2, n2, grad_AB);
+ double S_pred =
+ polygon_area_grad(ps1, n1, polygon_index_box_index, n1, grad_A);
+ if (S_pred < 0) {
+ for (int i = 0; i < n_convex * 2; i++) {
+ grad_A[i] = -grad_A[i];
+ }
+ }
+ double union_area = fabs(S_pred) + fabs(area(ps2, n2)) - inter_area;
+
+ double iou = inter_area / union_area;
+ double polygon_area = intersectAreaPoly(ps1, n1, ps2, n2, grad_C);
+
+ // printf("%d:live\n", idx);
+ double rot_giou = iou - (polygon_area - union_area) / polygon_area;
+
+ float grad_point_temp[18] = {};
+
+ for (int i = 0; i < n_convex; i++) {
+ int grad_point = points_to_convex_ind[i];
+ grad_point_temp[2 * grad_point] =
+ (float)((union_area + inter_area) / (union_area * union_area) *
+ grad_AB[2 * i] -
+ iou / union_area * grad_A[2 * i] -
+ 1 / polygon_area * (grad_AB[2 * i] - grad_A[2 * i]) -
+ (union_area) / polygon_area / polygon_area * grad_C[2 * i]);
+ grad_point_temp[2 * grad_point + 1] =
+ (float)((union_area + inter_area) / (union_area * union_area) *
+ grad_AB[2 * i + 1] -
+ iou / union_area * grad_A[2 * i + 1] -
+ 1 / polygon_area * (grad_AB[2 * i + 1] - grad_A[2 * i + 1]) -
+ (union_area) / polygon_area / polygon_area * grad_C[2 * i + 1]);
+ }
+
+ for (int i = 0; i < 9; i++) {
+ point_grad[2 * i] = grad_point_temp[2 * i];
+ point_grad[2 * i + 1] = grad_point_temp[2 * i + 1];
+ }
+ return (float)rot_giou;
+}
+
+template
+__global__ void convex_giou_cuda_kernel(const int ex_n_boxes,
+ const int gt_n_boxes, const T* ex_boxes,
+ const T* gt_boxes, T* point_grad) {
+ CUDA_1D_KERNEL_LOOP(index, ex_n_boxes) {
+ const T* cur_box = ex_boxes + index * 18;
+ const T* cur_gt_box = gt_boxes + index * 8;
+ T* cur_grad = point_grad + index * 19;
+ T giou = devrIoU(cur_box, cur_gt_box, cur_grad, threadIdx.x);
+ cur_grad[18] = giou;
+ }
+}
+
+__device__ inline int lineCross(Point a, Point b, Point c, Point d, Point& p) {
+ double s1, s2;
+ s1 = cross(a, b, c);
+ s2 = cross(a, b, d);
+ if (sig(s1) == 0 && sig(s2) == 0) return 2;
+ if (sig(s2 - s1) == 0) return 0;
+ p.x = (c.x * s2 - d.x * s1) / (s2 - s1);
+ p.y = (c.y * s2 - d.y * s1) / (s2 - s1);
+ return 1;
+}
+
+__device__ inline void polygon_cut(Point* p, int& n, Point a, Point b) {
+ Point pp[MAXN];
+ int m = 0;
+ p[n] = p[0];
+ for (int i = 0; i < n; i++) {
+ if (sig(cross(a, b, p[i])) > 0) {
+ pp[m] = p[i];
+ m++;
+ }
+ if (sig(cross(a, b, p[i])) != sig(cross(a, b, p[i + 1]))) {
+ lineCross(a, b, p[i], p[i + 1], pp[m]);
+ m++;
+ }
+ }
+ n = 0;
+ for (int i = 0; i < m; i++) {
+ if (!i || !(point_same(pp[i], pp[i - 1]))) {
+ p[n] = pp[i];
+ n++;
+ }
+ }
+
+ while (n > 1 && point_same(p[n - 1], p[0])) n--;
+}
+
+__device__ inline double intersectArea(Point a, Point b, Point c, Point d) {
+ Point o(0, 0);
+ int s1 = sig(cross(o, a, b));
+ int s2 = sig(cross(o, c, d));
+ if (s1 == 0 || s2 == 0) return 0.0;
+ if (s1 == -1) {
+ Point* i = &a;
+ Point* j = &b;
+ swap1(i, j);
+ }
+ if (s2 == -1) {
+ Point* i = &c;
+ Point* j = &d;
+ swap1(i, j);
+ }
+ Point p[10] = {o, a, b};
+ int n = 3;
+
+ polygon_cut(p, n, o, c);
+ polygon_cut(p, n, c, d);
+ polygon_cut(p, n, d, o);
+ double res = area(p, n);
+ if (s1 * s2 == -1) res = -res;
+ return res;
+}
+__device__ inline double intersectAreaO(Point* ps1, int n1, Point* ps2,
+ int n2) {
+ if (area(ps1, n1) < 0) reverse1(ps1, n1);
+ if (area(ps2, n2) < 0) reverse1(ps2, n2);
+ ps1[n1] = ps1[0];
+ ps2[n2] = ps2[0];
+ double res = 0;
+ for (int i = 0; i < n1; i++) {
+ for (int j = 0; j < n2; j++) {
+ res += intersectArea(ps1[i], ps1[i + 1], ps2[j], ps2[j + 1]);
+ }
+ }
+ return res;
+}
+
+template
+__device__ inline float devrIoU(T const* const p, T const* const q) {
+ Point ps1[MAXN], ps2[MAXN];
+ Point convex[MAXN];
+ for (int i = 0; i < 9; i++) {
+ convex[i].x = (double)p[i * 2];
+ convex[i].y = (double)p[i * 2 + 1];
+ }
+ int n_convex = 9;
+ int points_to_convex_ind[9] = {-1, -1, -1, -1, -1, -1, -1, -1, -1};
+ Jarvis_and_index(convex, n_convex, points_to_convex_ind);
+ int n1 = n_convex;
+ for (int i = 0; i < n1; i++) {
+ ps1[i].x = (double)convex[i].x;
+ ps1[i].y = (double)convex[i].y;
+ }
+ int n2 = 4;
+ for (int i = 0; i < n2; i++) {
+ ps2[i].x = (double)q[i * 2];
+ ps2[i].y = (double)q[i * 2 + 1];
+ }
+ double inter_area = intersectAreaO(ps1, n1, ps2, n2);
+ double S_pred = area(ps1, n1);
+ double union_area = fabs(S_pred) + fabs(area(ps2, n2)) - inter_area;
+ double iou = inter_area / union_area;
+ return (float)iou;
+}
+
+template
+__global__ void convex_iou_cuda_kernel(const int ex_n_boxes,
+ const int gt_n_boxes, const T* ex_boxes,
+ const T* gt_boxes, T* iou) {
+ CUDA_1D_KERNEL_LOOP(index, ex_n_boxes) {
+ const T* cur_box = ex_boxes + index * 18;
+ for (int i = 0; i < gt_n_boxes; i++) {
+ iou[index * gt_n_boxes + i] = devrIoU(cur_box, gt_boxes + i * 8);
+ }
+ }
+}
+#endif // CONVEX_IOU_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/correlation_cuda.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/correlation_cuda.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..c4674fab776a409cd4a0019c85b2f39699e6dc29
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/correlation_cuda.cuh
@@ -0,0 +1,244 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License..
+// Modified from
+// https://github.com/ClementPinard/Pytorch-Correlation-extension/blob/master/Correlation_Module/correlation_cuda_kernel.cu
+// Original licence: Under MIT License
+
+#ifndef CORRELATION_CUDA
+#define CORRELATION_CUDA
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+#include
+#include
+// Using is recommended in the official documentation in
+// https://pytorch.org/tutorials/advanced/cpp_extension.html#writing-the-c-op.
+// However, we use for compatibility with CUDA 9.0
+// Read https://github.com/pytorch/extension-cpp/issues/35 for more details.
+#include
+
+#include
+#include
+
+using namespace torch;
+
+#define TensorAcc4R PackedTensorAccessor32
+#define TensorAcc5R PackedTensorAccessor32
+#define WITHIN_BOUNDS(x, y, H, W) (x >= 0 && x < H && y >= 0 && y < W)
+
+#define THREADS_FORWARD 32
+#define THREADS_BACKWARD 16
+
+template
+__global__ void correlation_forward_cuda_kernel(
+ const TensorAcc4R rInput1, const TensorAcc4R rInput2, TensorAcc5R output,
+ int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH,
+ int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) {
+ const int iH = rInput1.size(1);
+ const int iW = rInput1.size(2);
+ const int C = rInput1.size(3);
+
+ const int n = blockIdx.x;
+ const int h = blockIdx.y;
+ const int w = blockIdx.z;
+ const int thread = threadIdx.x;
+
+ const int start_i = -padH + h * dH;
+ const int start_j = -padW + w * dW;
+
+ const int patchRadH = dilation_patchH * (patchH - 1) / 2;
+ const int patchRadW = dilation_patchW * (patchW - 1) / 2;
+
+ __shared__ scalar_t prod_sum[THREADS_FORWARD];
+
+ for (int ph = 0; ph < patchH; ++ph) {
+ int ph_dilated = ph * dilation_patchH - patchRadH;
+ for (int pw = 0; pw < patchW; ++pw) {
+ int pw_dilated = pw * dilation_patchW - patchRadW;
+ prod_sum[thread] = 0;
+ for (int i = 0; i < kH; ++i) {
+ int i1 = start_i + i * dilationH;
+ int i2 = i1 + ph_dilated;
+ if
+ WITHIN_BOUNDS(i1, i2, iH, iH) {
+ for (int j = 0; j < kW; ++j) {
+ int j1 = start_j + j * dilationW;
+ int j2 = j1 + pw_dilated;
+ if
+ WITHIN_BOUNDS(j1, j2, iW, iW) {
+ for (int c = thread; c < C; c += THREADS_FORWARD) {
+ scalar_t v1 = rInput1[n][i1][j1][c];
+ scalar_t v2 = rInput2[n][i2][j2][c];
+ prod_sum[thread] += v1 * v2;
+ }
+ }
+ }
+ }
+ }
+ // accumulate
+ __syncthreads();
+ if (thread == 0) {
+ scalar_t reduce_sum = 0;
+ for (int index = 0; index < THREADS_FORWARD; ++index) {
+ reduce_sum += prod_sum[index];
+ }
+ output[n][ph][pw][h][w] = reduce_sum;
+ }
+ }
+ }
+}
+
+template
+__global__ void correlation_backward_cuda_kernel_input1(
+ const TensorAcc5R grad_output, const TensorAcc4R input2,
+ TensorAcc4R grad_input1, const int kH, const int kW, const int patchH,
+ const int patchW, const int padH, const int padW, const int dilationH,
+ const int dilationW, const int dilation_patchH, const int dilation_patchW,
+ const int dH, const int dW, const int batch) {
+ const int iH = input2.size(2);
+ const int iW = input2.size(3);
+
+ const int H = grad_output.size(3);
+ const int W = grad_output.size(4);
+
+ const int patchRadH = (patchH - 1) / 2;
+ const int patchRadW = (patchW - 1) / 2;
+
+ const int n = batch;
+ const int c = blockIdx.x;
+ const int h = blockIdx.y;
+ const int w = blockIdx.z;
+ const int ph_off = threadIdx.x;
+ const int pw_off = threadIdx.y;
+
+ const int h_2 = h + padH;
+ const int w_2 = w + padW;
+ const int min_h = h_2 - kH * dilationH;
+ const int min_w = w_2 - kW * dilationW;
+
+ __shared__ scalar_t prod_sum[THREADS_BACKWARD][THREADS_BACKWARD];
+ prod_sum[ph_off][pw_off] = 0;
+
+ for (int ph = ph_off; ph < patchH; ph += THREADS_BACKWARD) {
+ int i1 = h + dilation_patchH * (ph - patchRadH);
+ for (int pw = pw_off; pw < patchW; pw += THREADS_BACKWARD) {
+ int j1 = w + dilation_patchW * (pw - patchRadW);
+ if (WITHIN_BOUNDS(i1, j1, iH, iW)) {
+ scalar_t val = input2[n][c][i1][j1];
+ for (int h_3 = h_2; h_3 > min_h; h_3 -= dilationH) {
+ int i2 = (h_3) / dH;
+ if (i2 * dH != h_3) continue;
+ for (int w_3 = w_2; w_3 > min_w; w_3 -= dilationW) {
+ int j2 = (w_3) / dW;
+ if (j2 * dW != w_3) continue;
+ if
+ WITHIN_BOUNDS(i2, j2, H, W) {
+ prod_sum[ph_off][pw_off] +=
+ grad_output[n][ph][pw][i2][j2] * val;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ __syncthreads();
+
+ if (ph_off == 0 && pw_off == 0) {
+ scalar_t reduce_sum = 0;
+ for (int ph = 0; ph < THREADS_BACKWARD; ++ph) {
+ for (int pw = 0; pw < THREADS_BACKWARD; ++pw) {
+ reduce_sum += prod_sum[ph][pw];
+ }
+ }
+ grad_input1[n][c][h][w] = reduce_sum;
+ }
+}
+
+template
+__global__ void correlation_backward_cuda_kernel_input2(
+ const TensorAcc5R grad_output, const TensorAcc4R input1,
+ TensorAcc4R grad_input2, int kH, int kW, int patchH, int patchW, int padH,
+ int padW, int dilationH, int dilationW, int dilation_patchH,
+ int dilation_patchW, int dH, int dW, int batch) {
+ const int iH = input1.size(2);
+ const int iW = input1.size(3);
+
+ const int patchRadH = (patchH - 1) / 2;
+ const int patchRadW = (patchW - 1) / 2;
+
+ const int H = grad_output.size(3);
+ const int W = grad_output.size(4);
+
+ const int dilatedKH = kH * dilationH;
+ const int dilatedKW = kW * dilationW;
+
+ const int n = batch;
+ const int c = blockIdx.x;
+ const int h = blockIdx.y;
+ const int w = blockIdx.z;
+ const int ph_off = threadIdx.x;
+ const int pw_off = threadIdx.y;
+
+ __shared__ scalar_t prod_sum[THREADS_BACKWARD][THREADS_BACKWARD];
+ prod_sum[ph_off][pw_off] = 0;
+
+ for (int ph = ph_off; ph < patchH; ph += THREADS_BACKWARD) {
+ int i1 = h - dilation_patchH * (ph - patchRadH);
+ for (int pw = pw_off; pw < patchW; pw += THREADS_BACKWARD) {
+ int j1 = w - dilation_patchW * (pw - patchRadW);
+ if
+ WITHIN_BOUNDS(i1, j1, iH, iW) {
+ scalar_t val = input1[n][c][i1][j1];
+
+ const int h_2 = i1 + padH;
+ const int w_2 = j1 + padW;
+ const int min_h = h_2 - dilatedKH;
+ const int min_w = w_2 - dilatedKW;
+
+ for (int h_3 = h_2; h_3 > min_h; h_3 -= dilationH) {
+ int i2 = (h_3) / dH;
+ if (i2 * dH != h_3) continue;
+ for (int w_3 = w_2; w_3 > min_w; w_3 -= dilationW) {
+ int j2 = (w_3) / dW;
+ if (j2 * dW != w_3) continue;
+ if
+ WITHIN_BOUNDS(i2, j2, H, W) {
+ prod_sum[ph_off][pw_off] +=
+ grad_output[n][ph][pw][i2][j2] * val;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ __syncthreads();
+
+ if (ph_off == 0 && pw_off == 0) {
+ scalar_t reduce_sum = 0;
+ for (int ph = 0; ph < THREADS_BACKWARD; ++ph) {
+ for (int pw = 0; pw < THREADS_BACKWARD; ++pw) {
+ reduce_sum += prod_sum[ph][pw];
+ }
+ }
+ grad_input2[n][c][h][w] = reduce_sum;
+ }
+}
+#endif
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/deform_conv_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/deform_conv_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..6b4d1bbd85bad1b87ee5d6b8a3cd3b29e3cbc411
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/deform_conv_cuda_kernel.cuh
@@ -0,0 +1,367 @@
+/*!
+ ******************* BEGIN Caffe Copyright Notice and Disclaimer
+ *****************
+ *
+ * COPYRIGHT
+ *
+ * All contributions by the University of California:
+ * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
+ * All rights reserved.
+ *
+ * All other contributions:
+ * Copyright (c) 2014-2017, the respective contributors
+ * All rights reserved.
+ *
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ *
+ * LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * CONTRIBUTION AGREEMENT
+ *
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ *
+ ***************** END Caffe Copyright Notice and Disclaimer
+ *********************
+ *
+ * Copyright (c) 2018 Microsoft
+ * Licensed under The MIT License [see LICENSE for details]
+ * \file modulated_deformable_im2col.cuh
+ * \brief Function definitions of converting an image to
+ * column matrix based on kernel, padding, dilation, and offset.
+ * These functions are mainly used in deformable convolution operators.
+ * \ref: https://arxiv.org/abs/1703.06211
+ * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
+ */
+
+// modified from
+// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
+
+#ifndef DEFORM_CONV_CUDA_KERNEL_CUH
+#define DEFORM_CONV_CUDA_KERNEL_CUH
+
+#include
+#ifdef MMCV_WITH_TRT
+#include "common_cuda_helper.hpp"
+#else // MMCV_WITH_TRT
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else // MMCV_USE_PARROTS
+#include "pytorch_cuda_helper.hpp"
+#endif // MMCV_USE_PARROTS
+#endif // MMCV_WITH_TRT
+
+template
+__device__ T deformable_im2col_bilinear(const T *input, const int data_width,
+ const int height, const int width, T h,
+ T w) {
+ if (h <= -1 || height <= h || w <= -1 || width <= w) {
+ return 0;
+ }
+
+ int h_low = floorf(h);
+ int w_low = floorf(w);
+ int h_high = h_low + 1;
+ int w_high = w_low + 1;
+
+ T lh = h - h_low;
+ T lw = w - w_low;
+ T hh = 1 - lh, hw = 1 - lw;
+
+ T v1 = 0;
+ if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low];
+ T v2 = 0;
+ if (h_low >= 0 && w_high <= width - 1)
+ v2 = input[h_low * data_width + w_high];
+ T v3 = 0;
+ if (h_high <= height - 1 && w_low >= 0)
+ v3 = input[h_high * data_width + w_low];
+ T v4 = 0;
+ if (h_high <= height - 1 && w_high <= width - 1)
+ v4 = input[h_high * data_width + w_high];
+
+ T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+ T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+ return val;
+}
+
+template
+__device__ T get_gradient_weight(T argmax_h, T argmax_w, const int h,
+ const int w, const int height,
+ const int width) {
+ if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
+ argmax_w >= width) {
+ // empty
+ return 0;
+ }
+
+ int argmax_h_low = floorf(argmax_h);
+ int argmax_w_low = floorf(argmax_w);
+ int argmax_h_high = argmax_h_low + 1;
+ int argmax_w_high = argmax_w_low + 1;
+
+ T weight = 0;
+ if (h == argmax_h_low && w == argmax_w_low)
+ weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+ if (h == argmax_h_low && w == argmax_w_high)
+ weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+ if (h == argmax_h_high && w == argmax_w_low)
+ weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+ if (h == argmax_h_high && w == argmax_w_high)
+ weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+ return weight;
+}
+
+template
+__device__ T get_coordinate_weight(T argmax_h, T argmax_w, const int height,
+ const int width, const T *im_data,
+ const int data_width, const int bp_dir) {
+ if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
+ argmax_w >= width) {
+ // empty
+ return 0;
+ }
+
+ int argmax_h_low = floorf(argmax_h);
+ int argmax_w_low = floorf(argmax_w);
+ int argmax_h_high = argmax_h_low + 1;
+ int argmax_w_high = argmax_w_low + 1;
+
+ T weight = 0;
+
+ if (bp_dir == 0) {
+ if (argmax_h_low >= 0 && argmax_w_low >= 0)
+ weight += -1 * (argmax_w_low + 1 - argmax_w) *
+ im_data[argmax_h_low * data_width + argmax_w_low];
+ if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+ weight += -1 * (argmax_w - argmax_w_low) *
+ im_data[argmax_h_low * data_width + argmax_w_high];
+ if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+ weight += (argmax_w_low + 1 - argmax_w) *
+ im_data[argmax_h_high * data_width + argmax_w_low];
+ if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+ weight += (argmax_w - argmax_w_low) *
+ im_data[argmax_h_high * data_width + argmax_w_high];
+ } else if (bp_dir == 1) {
+ if (argmax_h_low >= 0 && argmax_w_low >= 0)
+ weight += -1 * (argmax_h_low + 1 - argmax_h) *
+ im_data[argmax_h_low * data_width + argmax_w_low];
+ if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+ weight += (argmax_h_low + 1 - argmax_h) *
+ im_data[argmax_h_low * data_width + argmax_w_high];
+ if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+ weight += -1 * (argmax_h - argmax_h_low) *
+ im_data[argmax_h_high * data_width + argmax_w_low];
+ if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+ weight += (argmax_h - argmax_h_low) *
+ im_data[argmax_h_high * data_width + argmax_w_high];
+ }
+
+ return weight;
+}
+
+template
+__global__ void deformable_im2col_gpu_kernel(
+ const int n, const T *data_im, const T *data_offset, const int height,
+ const int width, const int kernel_h, const int kernel_w, const int pad_h,
+ const int pad_w, const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int channel_per_deformable_group, const int batch_size,
+ const int num_channels, const int deformable_group, const int height_col,
+ const int width_col, T *data_col) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ // index index of output matrix
+ const int w_col = index % width_col;
+ const int h_col = (index / width_col) % height_col;
+ const int b_col = (index / width_col / height_col) % batch_size;
+ const int c_im = (index / width_col / height_col) / batch_size;
+ const int c_col = c_im * kernel_h * kernel_w;
+
+ // compute deformable group index
+ const int deformable_group_index = c_im / channel_per_deformable_group;
+
+ const int h_in = h_col * stride_h - pad_h;
+ const int w_in = w_col * stride_w - pad_w;
+ T *data_col_ptr =
+ data_col +
+ ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+ const T *data_im_ptr =
+ data_im + (b_col * num_channels + c_im) * height * width;
+ const T *data_offset_ptr =
+ data_offset + (b_col * deformable_group + deformable_group_index) * 2 *
+ kernel_h * kernel_w * height_col * width_col;
+
+ for (int i = 0; i < kernel_h; ++i) {
+ for (int j = 0; j < kernel_w; ++j) {
+ const int data_offset_h_ptr =
+ ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+ const int data_offset_w_ptr =
+ ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col +
+ w_col;
+ const T offset_h = data_offset_ptr[data_offset_h_ptr];
+ const T offset_w = data_offset_ptr[data_offset_w_ptr];
+ T val = static_cast(0);
+ const T h_im = h_in + i * dilation_h + offset_h;
+ const T w_im = w_in + j * dilation_w + offset_w;
+ if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
+ val = deformable_im2col_bilinear(data_im_ptr, width, height, width,
+ h_im, w_im);
+ *data_col_ptr = val;
+ data_col_ptr += batch_size * height_col * width_col;
+ }
+ }
+ }
+}
+
+template
+__global__ void deformable_col2im_gpu_kernel(
+ const int n, const T *data_col, const T *data_offset, const int channels,
+ const int height, const int width, const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int channel_per_deformable_group, const int batch_size,
+ const int deformable_group, const int height_col, const int width_col,
+ T *grad_im) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ const int j = (index / width_col / height_col / batch_size) % kernel_w;
+ const int i =
+ (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+ const int c =
+ index / width_col / height_col / batch_size / kernel_w / kernel_h;
+ // compute the start and end of the output
+
+ const int deformable_group_index = c / channel_per_deformable_group;
+
+ int w_out = index % width_col;
+ int h_out = (index / width_col) % height_col;
+ int b = (index / width_col / height_col) % batch_size;
+ int w_in = w_out * stride_w - pad_w;
+ int h_in = h_out * stride_h - pad_h;
+
+ const T *data_offset_ptr =
+ data_offset + (b * deformable_group + deformable_group_index) * 2 *
+ kernel_h * kernel_w * height_col * width_col;
+ const int data_offset_h_ptr =
+ ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+ const int data_offset_w_ptr =
+ ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+ const T offset_h = data_offset_ptr[data_offset_h_ptr];
+ const T offset_w = data_offset_ptr[data_offset_w_ptr];
+ const T cur_inv_h_data = h_in + i * dilation_h + offset_h;
+ const T cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+ const T cur_top_grad = data_col[index];
+ const int cur_h = (int)cur_inv_h_data;
+ const int cur_w = (int)cur_inv_w_data;
+ for (int dy = -2; dy <= 2; dy++) {
+ for (int dx = -2; dx <= 2; dx++) {
+ if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 &&
+ cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+ abs(cur_inv_w_data - (cur_w + dx)) < 1) {
+ int cur_bottom_grad_pos =
+ ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+ T weight = get_gradient_weight(cur_inv_h_data, cur_inv_w_data,
+ cur_h + dy, cur_w + dx, height, width);
+ atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+ }
+ }
+ }
+ }
+}
+
+template
+__global__ void deformable_col2im_coord_gpu_kernel(
+ const int n, const T *data_col, const T *data_im, const T *data_offset,
+ const int channels, const int height, const int width, const int kernel_h,
+ const int kernel_w, const int pad_h, const int pad_w, const int stride_h,
+ const int stride_w, const int dilation_h, const int dilation_w,
+ const int channel_per_deformable_group, const int batch_size,
+ const int offset_channels, const int deformable_group, const int height_col,
+ const int width_col, T *grad_offset) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ T val = 0;
+ int w = index % width_col;
+ int h = (index / width_col) % height_col;
+ int c = (index / width_col / height_col) % offset_channels;
+ int b = (index / width_col / height_col) / offset_channels;
+ // compute the start and end of the output
+
+ const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+ const int col_step = kernel_h * kernel_w;
+ int cnt = 0;
+ const T *data_col_ptr = data_col + deformable_group_index *
+ channel_per_deformable_group *
+ batch_size * width_col * height_col;
+ const T *data_im_ptr =
+ data_im + (b * deformable_group + deformable_group_index) *
+ channel_per_deformable_group / kernel_h / kernel_w *
+ height * width;
+ const T *data_offset_ptr =
+ data_offset + (b * deformable_group + deformable_group_index) * 2 *
+ kernel_h * kernel_w * height_col * width_col;
+
+ const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+ for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group;
+ col_c += col_step) {
+ const int col_pos =
+ (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+ const int bp_dir = offset_c % 2;
+
+ int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+ int i =
+ (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+ int w_out = col_pos % width_col;
+ int h_out = (col_pos / width_col) % height_col;
+ int w_in = w_out * stride_w - pad_w;
+ int h_in = h_out * stride_h - pad_h;
+ const int data_offset_h_ptr =
+ (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+ const int data_offset_w_ptr =
+ (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col +
+ w_out);
+ const T offset_h = data_offset_ptr[data_offset_h_ptr];
+ const T offset_w = data_offset_ptr[data_offset_w_ptr];
+ T inv_h = h_in + i * dilation_h + offset_h;
+ T inv_w = w_in + j * dilation_w + offset_w;
+ if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
+ inv_h = inv_w = -2;
+ const T weight = get_coordinate_weight(inv_h, inv_w, height, width,
+ data_im_ptr + cnt * height * width,
+ width, bp_dir);
+ val += weight * data_col_ptr[col_pos];
+ cnt += 1;
+ }
+
+ grad_offset[index] = val;
+ }
+}
+
+#endif // DEFORM_CONV_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/deform_roi_pool_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/deform_roi_pool_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..850c338bb74e2ce9b8aca6e53863887e4d25675a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/deform_roi_pool_cuda_kernel.cuh
@@ -0,0 +1,199 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef DEFORM_ROI_POOL_CUDA_KERNEL_CUH
+#define DEFORM_ROI_POOL_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template
+__global__ void deform_roi_pool_forward_cuda_kernel(
+ const int nthreads, const T* input, const T* rois, const T* offset,
+ T* output, const int pooled_height, const int pooled_width,
+ const T spatial_scale, const int sampling_ratio, const T gamma,
+ const int channels, const int height, const int width) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (n, c, ph, pw) is an element in the pooled output
+ int pw = index % pooled_width;
+ int ph = (index / pooled_width) % pooled_height;
+ int c = (index / pooled_width / pooled_height) % channels;
+ int n = index / pooled_width / pooled_height / channels;
+
+ const T* offset_rois = rois + n * 5;
+ int roi_batch_ind = offset_rois[0];
+
+ // Do not using rounding; this implementation detail is critical
+ T roi_start_w = offset_rois[1] * spatial_scale - 0.5;
+ T roi_start_h = offset_rois[2] * spatial_scale - 0.5;
+ T roi_end_w = offset_rois[3] * spatial_scale - 0.5;
+ T roi_end_h = offset_rois[4] * spatial_scale - 0.5;
+
+ T roi_width = roi_end_w - roi_start_w;
+ T roi_height = roi_end_h - roi_start_h;
+
+ T bin_size_h = static_cast(roi_height) / static_cast(pooled_height);
+ T bin_size_w = static_cast(roi_width) / static_cast(pooled_width);
+
+ const T* offset_input =
+ input + (roi_batch_ind * channels + c) * height * width;
+
+ // We use roi_bin_grid to sample the grid and mimic integral
+ int roi_bin_grid_h =
+ (sampling_ratio > 0)
+ ? sampling_ratio
+ : static_cast(ceilf(roi_height / pooled_height));
+ int roi_bin_grid_w =
+ (sampling_ratio > 0)
+ ? sampling_ratio
+ : static_cast(ceilf(roi_width / pooled_width));
+
+ // Compute roi offset
+ if (offset != NULL) {
+ const T* offset_cur_w = offset + n * pooled_width * pooled_height * 2 +
+ ph * pooled_width + pw;
+ T offset_roi_w = gamma * roi_width * offset_cur_w[0];
+ T offset_roi_h =
+ gamma * roi_height * offset_cur_w[pooled_width * pooled_height];
+ roi_start_w += offset_roi_w;
+ roi_start_h += offset_roi_h;
+ }
+
+ // We do average pooling inside a bin
+ const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1);
+ T output_val = 0.;
+ for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+ const T y = roi_start_h + ph * bin_size_h +
+ static_cast(iy + .5f) * bin_size_h /
+ static_cast(roi_bin_grid_h);
+ for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+ const T x = roi_start_w + pw * bin_size_w +
+ static_cast(ix + .5f) * bin_size_w /
+ static_cast(roi_bin_grid_w);
+ T val = bilinear_interpolate(offset_input, height, width, y, x, index);
+ output_val += val;
+ }
+ }
+ output[index] = output_val / count;
+ }
+}
+
+template
+__global__ void deform_roi_pool_backward_cuda_kernel(
+ const int nthreads, const T* grad_output, const T* input, const T* rois,
+ const T* offset, T* grad_input, T* grad_offset, const int pooled_height,
+ const int pooled_width, const T spatial_scale, const int sampling_ratio,
+ const T gamma, const int channels, const int height, const int width) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (n, c, ph, pw) is an element in the pooled output
+ int pw = index % pooled_width;
+ int ph = (index / pooled_width) % pooled_height;
+ int c = (index / pooled_width / pooled_height) % channels;
+ int n = index / pooled_width / pooled_height / channels;
+
+ const T* offset_rois = rois + n * 5;
+ int roi_batch_ind = offset_rois[0];
+ const T* offset_input =
+ input + ((roi_batch_ind * channels + c) * height * width);
+ T* offset_grad_input =
+ grad_input + ((roi_batch_ind * channels + c) * height * width);
+
+ // Do not using rounding; this implementation detail is critical
+ T roi_start_w = offset_rois[1] * spatial_scale - 0.5;
+ T roi_start_h = offset_rois[2] * spatial_scale - 0.5;
+ T roi_end_w = offset_rois[3] * spatial_scale - 0.5;
+ T roi_end_h = offset_rois[4] * spatial_scale - 0.5;
+
+ T roi_width = roi_end_w - roi_start_w;
+ T roi_height = roi_end_h - roi_start_h;
+
+ T bin_size_h = static_cast(roi_height) / static_cast(pooled_height);
+ T bin_size_w = static_cast(roi_width) / static_cast(pooled_width);
+
+ // We use roi_bin_grid to sample the grid and mimic integral
+ int roi_bin_grid_h =
+ (sampling_ratio > 0)
+ ? sampling_ratio
+ : static_cast(ceilf(roi_height / pooled_height));
+ int roi_bin_grid_w =
+ (sampling_ratio > 0)
+ ? sampling_ratio
+ : static_cast(ceilf(roi_width / pooled_width));
+
+ // Compute roi offset
+ if (offset != NULL) {
+ const T* offset_cur_w = offset + n * pooled_width * pooled_height * 2 +
+ ph * pooled_width + pw;
+ T offset_roi_w = gamma * roi_width * offset_cur_w[0];
+ T offset_roi_h =
+ gamma * roi_height * offset_cur_w[pooled_width * pooled_height];
+ roi_start_w += offset_roi_w;
+ roi_start_h += offset_roi_h;
+ }
+
+ // We do average (integral) pooling inside a bin
+ const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
+ const T grad_output_this_bin = grad_output[index] / count;
+
+ for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+ const T y = roi_start_h + ph * bin_size_h +
+ static_cast(iy + .5f) * bin_size_h /
+ static_cast(roi_bin_grid_h);
+ for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+ const T x = roi_start_w + pw * bin_size_w +
+ static_cast(ix + .5f) * bin_size_w /
+ static_cast(roi_bin_grid_w);
+
+ T w1, w2, w3, w4;
+ int x_low, x_high, y_low, y_high;
+ bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
+ x_low, x_high, y_low, y_high, index);
+
+ if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+ atomicAdd(offset_grad_input + y_low * width + x_low,
+ grad_output_this_bin * w1);
+ atomicAdd(offset_grad_input + y_low * width + x_high,
+ grad_output_this_bin * w2);
+ atomicAdd(offset_grad_input + y_high * width + x_low,
+ grad_output_this_bin * w3);
+ atomicAdd(offset_grad_input + y_high * width + x_high,
+ grad_output_this_bin * w4);
+ if (offset != NULL) {
+ T input_00 = offset_input[y_low * width + x_low];
+ T input_10 = offset_input[y_low * width + x_high];
+ T input_01 = offset_input[y_high * width + x_low];
+ T input_11 = offset_input[y_high * width + x_high];
+ T ogx = gamma * roi_width * grad_output_this_bin *
+ (input_11 * (y - y_low) + input_10 * (y_high - y) +
+ input_01 * (y_low - y) + input_00 * (y - y_high));
+ T ogy = gamma * roi_height * grad_output_this_bin *
+ (input_11 * (x - x_low) + input_01 * (x_high - x) +
+ input_10 * (x_low - x) + input_00 * (x - x_high));
+ atomicAdd(grad_offset + n * pooled_width * pooled_height * 2 +
+ ph * pooled_width + pw,
+ ogx);
+ atomicAdd(grad_offset + n * pooled_width * pooled_height * 2 +
+ pooled_width * pooled_height + ph * pooled_width + pw,
+ ogy);
+ }
+ }
+ }
+ }
+ }
+}
+
+#endif // DEFORM_ROI_POOL_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/furthest_point_sample_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/furthest_point_sample_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..7242536b47995d93a885170749084a97f2c490f9
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/furthest_point_sample_cuda_kernel.cuh
@@ -0,0 +1,165 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef FURTHEST_POINT_SAMPLE_CUDA_KERNEL_CUH
+#define FURTHEST_POINT_SAMPLE_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+__device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,
+ int idx1, int idx2) {
+ const float v1 = dists[idx1], v2 = dists[idx2];
+ const int i1 = dists_i[idx1], i2 = dists_i[idx2];
+ dists[idx1] = max(v1, v2);
+ dists_i[idx1] = v2 > v1 ? i2 : i1;
+}
+
+template
+__global__ void furthest_point_sampling_forward_cuda_kernel(
+ int b, int n, int m, const float *__restrict__ dataset,
+ float *__restrict__ temp, int *__restrict__ idxs) {
+ // dataset: (B, N, 3)
+ // tmp: (B, N)
+ // output:
+ // idx: (B, M)
+
+ if (m <= 0) return;
+ __shared__ float dists[block_size];
+ __shared__ int dists_i[block_size];
+
+ int batch_index = blockIdx.x;
+ dataset += batch_index * n * 3;
+ temp += batch_index * n;
+ idxs += batch_index * m;
+
+ int tid = threadIdx.x;
+ const int stride = block_size;
+
+ int old = 0;
+ if (threadIdx.x == 0) idxs[0] = old;
+
+ __syncthreads();
+ for (int j = 1; j < m; j++) {
+ int besti = 0;
+ float best = -1;
+ float x1 = dataset[old * 3 + 0];
+ float y1 = dataset[old * 3 + 1];
+ float z1 = dataset[old * 3 + 2];
+ for (int k = tid; k < n; k += stride) {
+ float x2, y2, z2;
+ x2 = dataset[k * 3 + 0];
+ y2 = dataset[k * 3 + 1];
+ z2 = dataset[k * 3 + 2];
+ // float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
+ // if (mag <= 1e-3)
+ // continue;
+
+ float d =
+ (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
+ float d2 = min(d, temp[k]);
+ temp[k] = d2;
+ besti = d2 > best ? k : besti;
+ best = d2 > best ? d2 : best;
+ }
+ dists[tid] = best;
+ dists_i[tid] = besti;
+ __syncthreads();
+
+#pragma unroll
+ for (int block_size_thres = 1024; block_size_thres >= 2;
+ block_size_thres >>= 1) {
+ const int tid_thres = block_size_thres / 2;
+ if (block_size >= block_size_thres && tid < tid_thres) {
+ __update(dists, dists_i, tid, tid + tid_thres);
+ }
+ __syncthreads();
+ }
+
+ old = dists_i[0];
+ if (tid == 0) idxs[j] = old;
+ }
+}
+
+// Modified from
+// https://github.com/qiqihaer/3DSSD-pytorch/blob/master/lib/pointnet2/src/sampling_gpu.cu
+template
+__global__ void furthest_point_sampling_with_dist_forward_cuda_kernel(
+ int b, int n, int m, const float *__restrict__ dataset,
+ float *__restrict__ temp, int *__restrict__ idxs) {
+ // dataset: (B, N, N)
+ // tmp: (B, N)
+ // output:
+ // idx: (B, M)
+
+ if (m <= 0) return;
+ __shared__ float dists[block_size];
+ __shared__ int dists_i[block_size];
+
+ int batch_index = blockIdx.x;
+ dataset += batch_index * n * n;
+ temp += batch_index * n;
+ idxs += batch_index * m;
+
+ int tid = threadIdx.x;
+ const int stride = block_size;
+
+ int old = 0;
+ if (threadIdx.x == 0) idxs[0] = old;
+
+ __syncthreads();
+ for (int j = 1; j < m; j++) {
+ int besti = 0;
+ float best = -1;
+ // float x1 = dataset[old * 3 + 0];
+ // float y1 = dataset[old * 3 + 1];
+ // float z1 = dataset[old * 3 + 2];
+ for (int k = tid; k < n; k += stride) {
+ // float x2, y2, z2;
+ // x2 = dataset[k * 3 + 0];
+ // y2 = dataset[k * 3 + 1];
+ // z2 = dataset[k * 3 + 2];
+
+ // float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) *
+ // (z2 - z1);
+ float d = dataset[old * n + k];
+
+ float d2 = min(d, temp[k]);
+ temp[k] = d2;
+ besti = d2 > best ? k : besti;
+ best = d2 > best ? d2 : best;
+ }
+ dists[tid] = best;
+ dists_i[tid] = besti;
+ __syncthreads();
+
+#pragma unroll
+ for (int block_size_thres = 1024; block_size_thres >= 2;
+ block_size_thres >>= 1) {
+ const int tid_thres = block_size_thres / 2;
+ if (block_size >= block_size_thres && tid < tid_thres) {
+ __update(dists, dists_i, tid, tid + tid_thres);
+ }
+ __syncthreads();
+ }
+
+ old = dists_i[0];
+ if (tid == 0) idxs[j] = old;
+ }
+}
+
+#endif // FURTHEST_POINT_SAMPLE_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..cabcd4a6528270a31409a69048e6f4faa9d392ea
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh
@@ -0,0 +1,71 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef GATHER_POINTS_CUDA_KERNEL_CUH
+#define GATHER_POINTS_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+#define TOTAL_THREADS 1024
+
+template
+__global__ void gather_points_forward_cuda_kernel(int b, int c, int n, int m,
+ const T *points,
+ const int *__restrict__ idx,
+ T *out) {
+ // points: (B, C, N)
+ // idx: (B, M)
+ // output:
+ // out: (B, C, M)
+
+ int bs_idx = blockIdx.z;
+ int c_idx = blockIdx.y;
+ CUDA_1D_KERNEL_LOOP(pt_idx, m) {
+ if (bs_idx >= b || c_idx >= c) return;
+
+ out += bs_idx * c * m + c_idx * m + pt_idx;
+ idx += bs_idx * m + pt_idx;
+ points += bs_idx * c * n + c_idx * n;
+ out[0] = points[idx[0]];
+ }
+}
+
+template
+__global__ void gather_points_backward_cuda_kernel(int b, int c, int n, int m,
+ const T *grad_out,
+ const int *__restrict__ idx,
+ T *grad_points) {
+ // grad_out: (B, C, M)
+ // idx: (B, M)
+ // output:
+ // grad_points: (B, C, N)
+
+ int bs_idx = blockIdx.z;
+ int c_idx = blockIdx.y;
+ CUDA_1D_KERNEL_LOOP(pt_idx, m) {
+ if (bs_idx >= b || c_idx >= c) return;
+
+ grad_out += bs_idx * c * m + c_idx * m + pt_idx;
+ idx += bs_idx * m + pt_idx;
+ grad_points += bs_idx * c * n + c_idx * n;
+
+ atomicAdd(grad_points + idx[0], grad_out[0]);
+ }
+}
+
+#endif // GATHER_POINTS_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/group_points_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/group_points_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..8a28149f09a714e6afc3cea98ff74b4472a409c6
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/group_points_cuda_kernel.cuh
@@ -0,0 +1,78 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License..
+// Modified from
+// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points_gpu.cu
+#ifndef GROUP_POINTS_CUDA_KERNEL_CUH
+#define GROUP_POINTS_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template
+__global__ void group_points_forward_cuda_kernel(int b, int c, int n,
+ int npoints, int nsample,
+ const T *points,
+ const int *__restrict__ idx,
+ T *out) {
+ // points: (B, C, N)
+ // idx: (B, npoints, nsample)
+ // output:
+ // out: (B, C, npoints, nsample)
+ int bs_idx = blockIdx.z;
+ int c_idx = blockIdx.y;
+ CUDA_1D_KERNEL_LOOP(index, npoints * nsample) {
+ if (bs_idx >= b || c_idx >= c) return;
+
+ int pt_idx = index / nsample;
+ int sample_idx = index % nsample;
+
+ idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;
+ int in_idx = bs_idx * c * n + c_idx * n + idx[0];
+ int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample +
+ pt_idx * nsample + sample_idx;
+
+ out[out_idx] = points[in_idx];
+ }
+}
+
+template
+__global__ void group_points_backward_cuda_kernel(int b, int c, int n,
+ int npoints, int nsample,
+ const T *grad_out,
+ const int *__restrict__ idx,
+ T *grad_points) {
+ // grad_out: (B, C, npoints, nsample)
+ // idx: (B, npoints, nsample)
+ // output:
+ // grad_points: (B, C, N)
+ int bs_idx = blockIdx.z;
+ int c_idx = blockIdx.y;
+ CUDA_1D_KERNEL_LOOP(index, npoints * nsample) {
+ int pt_idx = index / nsample;
+ if (bs_idx >= b || c_idx >= c) return;
+
+ int sample_idx = index % nsample;
+ grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample +
+ pt_idx * nsample + sample_idx;
+ idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;
+
+ atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0], grad_out[0]);
+ }
+}
+
+#endif // GROUP_POINTS_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..d6a04d575e1f43314656f7046aee3b491ba92e53
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh
@@ -0,0 +1,383 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef IOU3D_CUDA_KERNEL_CUH
+#define IOU3D_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+const int THREADS_PER_BLOCK_IOU3D = 16;
+const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
+__device__ const float EPS = 1e-8;
+
+struct Point {
+ float x, y;
+ __device__ Point() {}
+ __device__ Point(double _x, double _y) { x = _x, y = _y; }
+
+ __device__ void set(float _x, float _y) {
+ x = _x;
+ y = _y;
+ }
+
+ __device__ Point operator+(const Point &b) const {
+ return Point(x + b.x, y + b.y);
+ }
+
+ __device__ Point operator-(const Point &b) const {
+ return Point(x - b.x, y - b.y);
+ }
+};
+
+__device__ inline float cross(const Point &a, const Point &b) {
+ return a.x * b.y - a.y * b.x;
+}
+
+__device__ inline float cross(const Point &p1, const Point &p2,
+ const Point &p0) {
+ return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y);
+}
+
+__device__ int check_rect_cross(const Point &p1, const Point &p2,
+ const Point &q1, const Point &q2) {
+ int ret = min(p1.x, p2.x) <= max(q1.x, q2.x) &&
+ min(q1.x, q2.x) <= max(p1.x, p2.x) &&
+ min(p1.y, p2.y) <= max(q1.y, q2.y) &&
+ min(q1.y, q2.y) <= max(p1.y, p2.y);
+ return ret;
+}
+
+__device__ inline int check_in_box2d(const float *box, const Point &p) {
+ // params: box (5) [x1, y1, x2, y2, angle]
+ const float MARGIN = 1e-5;
+
+ float center_x = (box[0] + box[2]) / 2;
+ float center_y = (box[1] + box[3]) / 2;
+ float angle_cos = cos(-box[4]),
+ angle_sin =
+ sin(-box[4]); // rotate the point in the opposite direction of box
+ float rot_x =
+ (p.x - center_x) * angle_cos - (p.y - center_y) * angle_sin + center_x;
+ float rot_y =
+ (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos + center_y;
+
+ return (rot_x > box[0] - MARGIN && rot_x < box[2] + MARGIN &&
+ rot_y > box[1] - MARGIN && rot_y < box[3] + MARGIN);
+}
+
+__device__ inline int intersection(const Point &p1, const Point &p0,
+ const Point &q1, const Point &q0,
+ Point &ans_point) {
+ // fast exclusion
+ if (check_rect_cross(p0, p1, q0, q1) == 0) return 0;
+
+ // check cross standing
+ float s1 = cross(q0, p1, p0);
+ float s2 = cross(p1, q1, p0);
+ float s3 = cross(p0, q1, q0);
+ float s4 = cross(q1, p1, q0);
+
+ if (!(s1 * s2 > 0 && s3 * s4 > 0)) return 0;
+
+ // calculate intersection of two lines
+ float s5 = cross(q1, p1, p0);
+ if (fabs(s5 - s1) > EPS) {
+ ans_point.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1);
+ ans_point.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1);
+
+ } else {
+ float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y;
+ float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y;
+ float D = a0 * b1 - a1 * b0;
+
+ ans_point.x = (b0 * c1 - b1 * c0) / D;
+ ans_point.y = (a1 * c0 - a0 * c1) / D;
+ }
+
+ return 1;
+}
+
+__device__ inline void rotate_around_center(const Point ¢er,
+ const float angle_cos,
+ const float angle_sin, Point &p) {
+ float new_x =
+ (p.x - center.x) * angle_cos - (p.y - center.y) * angle_sin + center.x;
+ float new_y =
+ (p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y;
+ p.set(new_x, new_y);
+}
+
+__device__ inline int point_cmp(const Point &a, const Point &b,
+ const Point ¢er) {
+ return atan2(a.y - center.y, a.x - center.x) >
+ atan2(b.y - center.y, b.x - center.x);
+}
+
+__device__ inline float box_overlap(const float *box_a, const float *box_b) {
+ // params: box_a (5) [x1, y1, x2, y2, angle]
+ // params: box_b (5) [x1, y1, x2, y2, angle]
+
+ float a_x1 = box_a[0], a_y1 = box_a[1], a_x2 = box_a[2], a_y2 = box_a[3],
+ a_angle = box_a[4];
+ float b_x1 = box_b[0], b_y1 = box_b[1], b_x2 = box_b[2], b_y2 = box_b[3],
+ b_angle = box_b[4];
+
+ Point center_a((a_x1 + a_x2) / 2, (a_y1 + a_y2) / 2);
+ Point center_b((b_x1 + b_x2) / 2, (b_y1 + b_y2) / 2);
+
+ Point box_a_corners[5];
+ box_a_corners[0].set(a_x1, a_y1);
+ box_a_corners[1].set(a_x2, a_y1);
+ box_a_corners[2].set(a_x2, a_y2);
+ box_a_corners[3].set(a_x1, a_y2);
+
+ Point box_b_corners[5];
+ box_b_corners[0].set(b_x1, b_y1);
+ box_b_corners[1].set(b_x2, b_y1);
+ box_b_corners[2].set(b_x2, b_y2);
+ box_b_corners[3].set(b_x1, b_y2);
+
+ // get oriented corners
+ float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle);
+ float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle);
+
+ for (int k = 0; k < 4; k++) {
+ rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]);
+ rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]);
+ }
+
+ box_a_corners[4] = box_a_corners[0];
+ box_b_corners[4] = box_b_corners[0];
+
+ // get intersection of lines
+ Point cross_points[16];
+ Point poly_center;
+ int cnt = 0, flag = 0;
+
+ poly_center.set(0, 0);
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ flag = intersection(box_a_corners[i + 1], box_a_corners[i],
+ box_b_corners[j + 1], box_b_corners[j],
+ cross_points[cnt]);
+ if (flag) {
+ poly_center = poly_center + cross_points[cnt];
+ cnt++;
+ }
+ }
+ }
+
+ // check corners
+ for (int k = 0; k < 4; k++) {
+ if (check_in_box2d(box_a, box_b_corners[k])) {
+ poly_center = poly_center + box_b_corners[k];
+ cross_points[cnt] = box_b_corners[k];
+ cnt++;
+ }
+ if (check_in_box2d(box_b, box_a_corners[k])) {
+ poly_center = poly_center + box_a_corners[k];
+ cross_points[cnt] = box_a_corners[k];
+ cnt++;
+ }
+ }
+
+ poly_center.x /= cnt;
+ poly_center.y /= cnt;
+
+ // sort the points of polygon
+ Point temp;
+ for (int j = 0; j < cnt - 1; j++) {
+ for (int i = 0; i < cnt - j - 1; i++) {
+ if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)) {
+ temp = cross_points[i];
+ cross_points[i] = cross_points[i + 1];
+ cross_points[i + 1] = temp;
+ }
+ }
+ }
+
+ // get the overlap areas
+ float area = 0;
+ for (int k = 0; k < cnt - 1; k++) {
+ area += cross(cross_points[k] - cross_points[0],
+ cross_points[k + 1] - cross_points[0]);
+ }
+
+ return fabs(area) / 2.0;
+}
+
+__device__ inline float iou_bev(const float *box_a, const float *box_b) {
+ // params: box_a (5) [x1, y1, x2, y2, angle]
+ // params: box_b (5) [x1, y1, x2, y2, angle]
+ float sa = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1]);
+ float sb = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1]);
+ float s_overlap = box_overlap(box_a, box_b);
+ return s_overlap / fmaxf(sa + sb - s_overlap, EPS);
+}
+
+__global__ void iou3d_boxes_overlap_bev_forward_cuda_kernel(
+ const int num_a, const float *boxes_a, const int num_b,
+ const float *boxes_b, float *ans_overlap) {
+ CUDA_2D_KERNEL_LOOP(b_idx, num_b, a_idx, num_a) {
+ if (a_idx >= num_a || b_idx >= num_b) {
+ return;
+ }
+ const float *cur_box_a = boxes_a + a_idx * 5;
+ const float *cur_box_b = boxes_b + b_idx * 5;
+ float s_overlap = box_overlap(cur_box_a, cur_box_b);
+ ans_overlap[a_idx * num_b + b_idx] = s_overlap;
+ }
+}
+
+__global__ void iou3d_boxes_iou_bev_forward_cuda_kernel(const int num_a,
+ const float *boxes_a,
+ const int num_b,
+ const float *boxes_b,
+ float *ans_iou) {
+ CUDA_2D_KERNEL_LOOP(b_idx, num_b, a_idx, num_a) {
+ if (a_idx >= num_a || b_idx >= num_b) {
+ return;
+ }
+
+ const float *cur_box_a = boxes_a + a_idx * 5;
+ const float *cur_box_b = boxes_b + b_idx * 5;
+ float cur_iou_bev = iou_bev(cur_box_a, cur_box_b);
+ ans_iou[a_idx * num_b + b_idx] = cur_iou_bev;
+ }
+}
+
+__global__ void nms_forward_cuda_kernel(const int boxes_num,
+ const float nms_overlap_thresh,
+ const float *boxes,
+ unsigned long long *mask) {
+ // params: boxes (N, 5) [x1, y1, x2, y2, ry]
+ // params: mask (N, N/THREADS_PER_BLOCK_NMS)
+ const int blocks =
+ (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS;
+ CUDA_2D_KERNEL_BLOCK_LOOP(col_start, blocks, row_start, blocks) {
+ // if (row_start > col_start) return;
+
+ const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS,
+ THREADS_PER_BLOCK_NMS);
+ const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS,
+ THREADS_PER_BLOCK_NMS);
+
+ __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5];
+
+ if (threadIdx.x < col_size) {
+ block_boxes[threadIdx.x * 5 + 0] =
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0];
+ block_boxes[threadIdx.x * 5 + 1] =
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1];
+ block_boxes[threadIdx.x * 5 + 2] =
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2];
+ block_boxes[threadIdx.x * 5 + 3] =
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3];
+ block_boxes[threadIdx.x * 5 + 4] =
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4];
+ }
+ __syncthreads();
+
+ if (threadIdx.x < row_size) {
+ const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
+ const float *cur_box = boxes + cur_box_idx * 5;
+
+ int i = 0;
+ unsigned long long t = 0;
+ int start = 0;
+ if (row_start == col_start) {
+ start = threadIdx.x + 1;
+ }
+ for (i = start; i < col_size; i++) {
+ if (iou_bev(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
+ t |= 1ULL << i;
+ }
+ }
+ const int col_blocks =
+ (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS;
+ mask[cur_box_idx * col_blocks + col_start] = t;
+ }
+ }
+}
+
+__device__ inline float iou_normal(float const *const a, float const *const b) {
+ float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
+ float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
+ float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f);
+ float interS = width * height;
+ float Sa = (a[2] - a[0]) * (a[3] - a[1]);
+ float Sb = (b[2] - b[0]) * (b[3] - b[1]);
+ return interS / fmaxf(Sa + Sb - interS, EPS);
+}
+
+__global__ void nms_normal_forward_cuda_kernel(const int boxes_num,
+ const float nms_overlap_thresh,
+ const float *boxes,
+ unsigned long long *mask) {
+ // params: boxes (N, 5) [x1, y1, x2, y2, ry]
+ // params: mask (N, N/THREADS_PER_BLOCK_NMS)
+
+ const int blocks =
+ (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS;
+ CUDA_2D_KERNEL_BLOCK_LOOP(col_start, blocks, row_start, blocks) {
+ // if (row_start > col_start) return;
+
+ const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS,
+ THREADS_PER_BLOCK_NMS);
+ const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS,
+ THREADS_PER_BLOCK_NMS);
+
+ __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5];
+
+ if (threadIdx.x < col_size) {
+ block_boxes[threadIdx.x * 5 + 0] =
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0];
+ block_boxes[threadIdx.x * 5 + 1] =
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1];
+ block_boxes[threadIdx.x * 5 + 2] =
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2];
+ block_boxes[threadIdx.x * 5 + 3] =
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3];
+ block_boxes[threadIdx.x * 5 + 4] =
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4];
+ }
+ __syncthreads();
+
+ if (threadIdx.x < row_size) {
+ const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
+ const float *cur_box = boxes + cur_box_idx * 5;
+
+ int i = 0;
+ unsigned long long t = 0;
+ int start = 0;
+ if (row_start == col_start) {
+ start = threadIdx.x + 1;
+ }
+ for (i = start; i < col_size; i++) {
+ if (iou_normal(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
+ t |= 1ULL << i;
+ }
+ }
+ const int col_blocks =
+ (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS;
+ mask[cur_box_idx * col_blocks + col_start] = t;
+ }
+ }
+}
+
+#endif // IOU3D_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/knn_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/knn_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..30889e728093195955a52b078e1ff433f8b0c4a5
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/knn_cuda_kernel.cuh
@@ -0,0 +1,105 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// Modified from
+// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap
+#ifndef KNN_CUDA_KERNEL_CUH
+#define KNN_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+inline __device__ void swap_float(float *x, float *y) {
+ float tmp = *x;
+ *x = *y;
+ *y = tmp;
+}
+
+inline __device__ void swap_int(int *x, int *y) {
+ int tmp = *x;
+ *x = *y;
+ *y = tmp;
+}
+
+__device__ void reheap(float *dist, int *idx, int k) {
+ int root = 0;
+ int child = root * 2 + 1;
+ while (child < k) {
+ if (child + 1 < k && dist[child + 1] > dist[child]) child++;
+ if (dist[root] > dist[child]) return;
+ swap_float(&dist[root], &dist[child]);
+ swap_int(&idx[root], &idx[child]);
+ root = child;
+ child = root * 2 + 1;
+ }
+}
+
+__device__ void heap_sort(float *dist, int *idx, int k) {
+ int i;
+ for (i = k - 1; i > 0; i--) {
+ swap_float(&dist[0], &dist[i]);
+ swap_int(&idx[0], &idx[i]);
+ reheap(dist, idx, i);
+ }
+}
+
+// input: xyz (b, n, 3) new_xyz (b, m, 3)
+// output: idx (b, m, nsample) dist2 (b, m, nsample)
+template
+__global__ void knn_forward_cuda_kernel(int b, int n, int m, int nsample,
+ const T *xyz, const T *new_xyz,
+ int *__restrict__ idx, T *dist2) {
+ int bs_idx = blockIdx.y;
+ CUDA_1D_KERNEL_LOOP(pt_idx, m) {
+ if (bs_idx >= b) return;
+
+ new_xyz += bs_idx * m * 3 + pt_idx * 3;
+ xyz += bs_idx * n * 3;
+ idx += bs_idx * m * nsample + pt_idx * nsample;
+ dist2 += bs_idx * m * nsample + pt_idx * nsample;
+
+ T new_x = new_xyz[0];
+ T new_y = new_xyz[1];
+ T new_z = new_xyz[2];
+
+ float best_dist[100];
+ int best_idx[100];
+ for (int i = 0; i < nsample; i++) {
+ best_dist[i] = 1e10;
+ best_idx[i] = 0;
+ }
+ for (int i = 0; i < n; i++) {
+ T x = xyz[i * 3 + 0];
+ T y = xyz[i * 3 + 1];
+ T z = xyz[i * 3 + 2];
+ T d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
+ (new_z - z) * (new_z - z);
+ if (d2 < best_dist[0]) {
+ best_dist[0] = d2;
+ best_idx[0] = i;
+ reheap(best_dist, best_idx, nsample);
+ }
+ }
+ heap_sort(best_dist, best_idx, nsample);
+ for (int i = 0; i < nsample; i++) {
+ idx[i] = best_idx[i];
+ dist2[i] = best_dist[i];
+ }
+ }
+}
+
+#endif // KNN_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/masked_conv2d_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/masked_conv2d_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..7331e59aa687e12d0c326458fbd452bbf308436b
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/masked_conv2d_cuda_kernel.cuh
@@ -0,0 +1,75 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef MASKED_CONV2D_CUDA_KERNEL_CUH
+#define MASKED_CONV2D_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template
+__global__ void MaskedIm2colForward(const int n, const scalar_t *data_im,
+ const int height, const int width,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int64_t *mask_h_idx,
+ const int64_t *mask_w_idx,
+ const int mask_cnt, scalar_t *data_col) {
+ // mask_cnt * channels
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ const int m_index = index % mask_cnt;
+ const int h_col = mask_h_idx[m_index];
+ const int w_col = mask_w_idx[m_index];
+ const int c_im = index / mask_cnt;
+ const int c_col = c_im * kernel_h * kernel_w;
+ const int h_offset = h_col - pad_h;
+ const int w_offset = w_col - pad_w;
+ scalar_t *data_col_ptr = data_col + c_col * mask_cnt + m_index;
+ for (int i = 0; i < kernel_h; ++i) {
+ int h_im = h_offset + i;
+ for (int j = 0; j < kernel_w; ++j) {
+ int w_im = w_offset + j;
+ if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) {
+ *data_col_ptr =
+ (scalar_t)data_im[(c_im * height + h_im) * width + w_im];
+ } else {
+ *data_col_ptr = 0.0;
+ }
+ data_col_ptr += mask_cnt;
+ }
+ }
+ }
+}
+
+template
+__global__ void MaskedCol2imForward(const int n, const scalar_t *data_col,
+ const int height, const int width,
+ const int channels,
+ const int64_t *mask_h_idx,
+ const int64_t *mask_w_idx,
+ const int mask_cnt, scalar_t *data_im) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ const int m_index = index % mask_cnt;
+ const int h_im = mask_h_idx[m_index];
+ const int w_im = mask_w_idx[m_index];
+ const int c_im = index / mask_cnt;
+ // compute the start and end of the output
+ data_im[(c_im * height + h_im) * width + w_im] = data_col[index];
+ }
+}
+
+#endif // MASKED_CONV2D_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/min_area_polygons_cuda.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/min_area_polygons_cuda.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..633b7b91b28beaeb547dc293a6d3e386e8a9e44c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/min_area_polygons_cuda.cuh
@@ -0,0 +1,313 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef MIN_AREA_POLYGONS_CUDA_KERNEL_CUH
+#define MIN_AREA_POLYGONS_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+#define MAXN 20
+__device__ const float PI = 3.1415926;
+
+struct Point {
+ float x, y;
+ __device__ Point() {}
+ __device__ Point(float x, float y) : x(x), y(y) {}
+};
+
+__device__ inline void swap1(Point *a, Point *b) {
+ Point temp;
+ temp.x = a->x;
+ temp.y = a->y;
+
+ a->x = b->x;
+ a->y = b->y;
+
+ b->x = temp.x;
+ b->y = temp.y;
+}
+__device__ inline float cross(Point o, Point a, Point b) {
+ return (a.x - o.x) * (b.y - o.y) - (b.x - o.x) * (a.y - o.y);
+}
+
+__device__ inline float dis(Point a, Point b) {
+ return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y);
+}
+__device__ inline void minBoundingRect(Point *ps, int n_points, float *minbox) {
+ float convex_points[2][MAXN];
+ for (int j = 0; j < n_points; j++) {
+ convex_points[0][j] = ps[j].x;
+ }
+ for (int j = 0; j < n_points; j++) {
+ convex_points[1][j] = ps[j].y;
+ }
+
+ Point edges[MAXN];
+ float edges_angles[MAXN];
+ float unique_angles[MAXN];
+ int n_edges = n_points - 1;
+ int n_unique = 0;
+ int unique_flag = 0;
+
+ for (int i = 0; i < n_edges; i++) {
+ edges[i].x = ps[i + 1].x - ps[i].x;
+ edges[i].y = ps[i + 1].y - ps[i].y;
+ }
+ for (int i = 0; i < n_edges; i++) {
+ edges_angles[i] = atan2((double)edges[i].y, (double)edges[i].x);
+ if (edges_angles[i] >= 0) {
+ edges_angles[i] = fmod((double)edges_angles[i], (double)PI / 2);
+ } else {
+ edges_angles[i] =
+ edges_angles[i] - (int)(edges_angles[i] / (PI / 2) - 1) * (PI / 2);
+ }
+ }
+ unique_angles[0] = edges_angles[0];
+ n_unique += 1;
+ for (int i = 1; i < n_edges; i++) {
+ for (int j = 0; j < n_unique; j++) {
+ if (edges_angles[i] == unique_angles[j]) {
+ unique_flag += 1;
+ }
+ }
+ if (unique_flag == 0) {
+ unique_angles[n_unique] = edges_angles[i];
+ n_unique += 1;
+ unique_flag = 0;
+ } else {
+ unique_flag = 0;
+ }
+ }
+
+ float minarea = 1e12;
+ for (int i = 0; i < n_unique; i++) {
+ float R[2][2];
+ float rot_points[2][MAXN];
+ R[0][0] = cos(unique_angles[i]);
+ R[0][1] = -sin(unique_angles[i]);
+ R[1][0] = sin(unique_angles[i]);
+ R[1][1] = cos(unique_angles[i]);
+ // R x Points
+ for (int m = 0; m < 2; m++) {
+ for (int n = 0; n < n_points; n++) {
+ float sum = 0.0;
+ for (int k = 0; k < 2; k++) {
+ sum = sum + R[m][k] * convex_points[k][n];
+ }
+ rot_points[m][n] = sum;
+ }
+ }
+
+ // xmin;
+ float xmin, ymin, xmax, ymax;
+ xmin = 1e12;
+ for (int j = 0; j < n_points; j++) {
+ if (isinf(rot_points[0][j]) || isnan(rot_points[0][j])) {
+ continue;
+ } else {
+ if (rot_points[0][j] < xmin) {
+ xmin = rot_points[0][j];
+ }
+ }
+ }
+ // ymin
+ ymin = 1e12;
+ for (int j = 0; j < n_points; j++) {
+ if (isinf(rot_points[1][j]) || isnan(rot_points[1][j])) {
+ continue;
+ } else {
+ if (rot_points[1][j] < ymin) {
+ ymin = rot_points[1][j];
+ }
+ }
+ }
+ // xmax
+ xmax = -1e12;
+ for (int j = 0; j < n_points; j++) {
+ if (isinf(rot_points[0][j]) || isnan(rot_points[0][j])) {
+ continue;
+ } else {
+ if (rot_points[0][j] > xmax) {
+ xmax = rot_points[0][j];
+ }
+ }
+ }
+ // ymax
+ ymax = -1e12;
+ for (int j = 0; j < n_points; j++) {
+ if (isinf(rot_points[1][j]) || isnan(rot_points[1][j])) {
+ continue;
+ } else {
+ if (rot_points[1][j] > ymax) {
+ ymax = rot_points[1][j];
+ }
+ }
+ }
+ float area = (xmax - xmin) * (ymax - ymin);
+ if (area < minarea) {
+ minarea = area;
+ minbox[0] = unique_angles[i];
+ minbox[1] = xmin;
+ minbox[2] = ymin;
+ minbox[3] = xmax;
+ minbox[4] = ymax;
+ }
+ }
+}
+
+// convex_find
+__device__ inline void Jarvis(Point *in_poly, int &n_poly) {
+ int n_input = n_poly;
+ Point input_poly[20];
+ for (int i = 0; i < n_input; i++) {
+ input_poly[i].x = in_poly[i].x;
+ input_poly[i].y = in_poly[i].y;
+ }
+ Point p_max, p_k;
+ int max_index, k_index;
+ int Stack[20], top1, top2;
+ // float sign;
+ double sign;
+ Point right_point[10], left_point[10];
+
+ for (int i = 0; i < n_poly; i++) {
+ if (in_poly[i].y < in_poly[0].y ||
+ in_poly[i].y == in_poly[0].y && in_poly[i].x < in_poly[0].x) {
+ Point *j = &(in_poly[0]);
+ Point *k = &(in_poly[i]);
+ swap1(j, k);
+ }
+ if (i == 0) {
+ p_max = in_poly[0];
+ max_index = 0;
+ }
+ if (in_poly[i].y > p_max.y ||
+ in_poly[i].y == p_max.y && in_poly[i].x > p_max.x) {
+ p_max = in_poly[i];
+ max_index = i;
+ }
+ }
+ if (max_index == 0) {
+ max_index = 1;
+ p_max = in_poly[max_index];
+ }
+
+ k_index = 0, Stack[0] = 0, top1 = 0;
+ while (k_index != max_index) {
+ p_k = p_max;
+ k_index = max_index;
+ for (int i = 1; i < n_poly; i++) {
+ sign = cross(in_poly[Stack[top1]], in_poly[i], p_k);
+ if ((sign > 0) || ((sign == 0) && (dis(in_poly[Stack[top1]], in_poly[i]) >
+ dis(in_poly[Stack[top1]], p_k)))) {
+ p_k = in_poly[i];
+ k_index = i;
+ }
+ }
+ top1++;
+ Stack[top1] = k_index;
+ }
+
+ for (int i = 0; i <= top1; i++) {
+ right_point[i] = in_poly[Stack[i]];
+ }
+
+ k_index = 0, Stack[0] = 0, top2 = 0;
+
+ while (k_index != max_index) {
+ p_k = p_max;
+ k_index = max_index;
+ for (int i = 1; i < n_poly; i++) {
+ sign = cross(in_poly[Stack[top2]], in_poly[i], p_k);
+ if ((sign < 0) || (sign == 0) && (dis(in_poly[Stack[top2]], in_poly[i]) >
+ dis(in_poly[Stack[top2]], p_k))) {
+ p_k = in_poly[i];
+ k_index = i;
+ }
+ }
+ top2++;
+ Stack[top2] = k_index;
+ }
+
+ for (int i = top2 - 1; i >= 0; i--) {
+ left_point[i] = in_poly[Stack[i]];
+ }
+
+ for (int i = 0; i < top1 + top2; i++) {
+ if (i <= top1) {
+ in_poly[i] = right_point[i];
+ } else {
+ in_poly[i] = left_point[top2 - (i - top1)];
+ }
+ }
+ n_poly = top1 + top2;
+}
+
+template
+__device__ inline void Findminbox(T const *const p, T *minpoints) {
+ Point ps1[MAXN];
+ Point convex[MAXN];
+ for (int i = 0; i < 9; i++) {
+ convex[i].x = p[i * 2];
+ convex[i].y = p[i * 2 + 1];
+ }
+ int n_convex = 9;
+ Jarvis(convex, n_convex);
+ int n1 = n_convex;
+ for (int i = 0; i < n1; i++) {
+ ps1[i].x = convex[i].x;
+ ps1[i].y = convex[i].y;
+ }
+ ps1[n1].x = convex[0].x;
+ ps1[n1].y = convex[0].y;
+
+ float minbbox[5] = {0};
+ minBoundingRect(ps1, n1 + 1, minbbox);
+ float angle = minbbox[0];
+ float xmin = minbbox[1];
+ float ymin = minbbox[2];
+ float xmax = minbbox[3];
+ float ymax = minbbox[4];
+ float R[2][2];
+
+ R[0][0] = cos(angle);
+ R[0][1] = -sin(angle);
+ R[1][0] = sin(angle);
+ R[1][1] = cos(angle);
+
+ minpoints[0] = xmax * R[0][0] + ymin * R[1][0];
+ minpoints[1] = xmax * R[0][1] + ymin * R[1][1];
+ minpoints[2] = xmin * R[0][0] + ymin * R[1][0];
+ minpoints[3] = xmin * R[0][1] + ymin * R[1][1];
+ minpoints[4] = xmin * R[0][0] + ymax * R[1][0];
+ minpoints[5] = xmin * R[0][1] + ymax * R[1][1];
+ minpoints[6] = xmax * R[0][0] + ymax * R[1][0];
+ minpoints[7] = xmax * R[0][1] + ymax * R[1][1];
+}
+
+template
+__global__ void min_area_polygons_cuda_kernel(const int ex_n_boxes,
+ const T *ex_boxes, T *minbox) {
+ CUDA_1D_KERNEL_LOOP(index, ex_n_boxes) {
+ const T *cur_box = ex_boxes + index * 18;
+ T *cur_min_box = minbox + index * 8;
+ Findminbox(cur_box, cur_min_box);
+ }
+}
+
+#endif // MIN_AREA_POLYGONS_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/modulated_deform_conv_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/modulated_deform_conv_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..ca0e91a25246569bb7de04649ab4f5afe233670c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/modulated_deform_conv_cuda_kernel.cuh
@@ -0,0 +1,399 @@
+/*!
+ ******************* BEGIN Caffe Copyright Notice and Disclaimer
+ *****************
+ *
+ * COPYRIGHT
+ *
+ * All contributions by the University of California:
+ * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
+ * All rights reserved.
+ *
+ * All other contributions:
+ * Copyright (c) 2014-2017, the respective contributors
+ * All rights reserved.
+ *
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ *
+ * LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * CONTRIBUTION AGREEMENT
+ *
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ *
+ ***************** END Caffe Copyright Notice and Disclaimer
+ *********************
+ *
+ * Copyright (c) 2018 Microsoft
+ * Licensed under The MIT License [see LICENSE for details]
+ * \file modulated_deformable_im2col.cuh
+ * \brief Function definitions of converting an image to
+ * column matrix based on kernel, padding, dilation, and offset.
+ * These functions are mainly used in deformable convolution operators.
+ * \ref: https://arxiv.org/abs/1703.06211
+ * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
+ */
+
+// modified from
+// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
+
+#ifndef MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH
+#define MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH
+
+#include
+#ifdef MMCV_WITH_TRT
+#include "common_cuda_helper.hpp"
+#else // MMCV_WITH_TRT
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else // MMCV_USE_PARROTS
+#include "pytorch_cuda_helper.hpp"
+#endif // MMCV_USE_PARROTS
+#endif // MMCV_WITH_TRT
+
+template
+__device__ T dmcn_im2col_bilinear(const T *input, const int data_width,
+ const int height, const int width, T h, T w) {
+ int h_low = floorf(h);
+ int w_low = floorf(w);
+ int h_high = h_low + 1;
+ int w_high = w_low + 1;
+
+ T lh = h - h_low;
+ T lw = w - w_low;
+ T hh = 1 - lh, hw = 1 - lw;
+
+ T v1 = 0;
+ if (h_low >= 0 && w_low >= 0) v1 = input[h_low * data_width + w_low];
+ T v2 = 0;
+ if (h_low >= 0 && w_high <= width - 1)
+ v2 = input[h_low * data_width + w_high];
+ T v3 = 0;
+ if (h_high <= height - 1 && w_low >= 0)
+ v3 = input[h_high * data_width + w_low];
+ T v4 = 0;
+ if (h_high <= height - 1 && w_high <= width - 1)
+ v4 = input[h_high * data_width + w_high];
+
+ T w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+ T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+ return val;
+}
+
+template
+__device__ T dmcn_get_gradient_weight(T argmax_h, T argmax_w, const int h,
+ const int w, const int height,
+ const int width) {
+ if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
+ argmax_w >= width) {
+ // empty
+ return 0;
+ }
+
+ int argmax_h_low = floorf(argmax_h);
+ int argmax_w_low = floorf(argmax_w);
+ int argmax_h_high = argmax_h_low + 1;
+ int argmax_w_high = argmax_w_low + 1;
+
+ T weight = 0;
+ if (h == argmax_h_low && w == argmax_w_low)
+ weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
+ if (h == argmax_h_low && w == argmax_w_high)
+ weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
+ if (h == argmax_h_high && w == argmax_w_low)
+ weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
+ if (h == argmax_h_high && w == argmax_w_high)
+ weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
+ return weight;
+}
+
+template
+__device__ T dmcn_get_coordinate_weight(T argmax_h, T argmax_w,
+ const int height, const int width,
+ const T *im_data, const int data_width,
+ const int bp_dir) {
+ if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 ||
+ argmax_w >= width) {
+ // empty
+ return 0;
+ }
+
+ int argmax_h_low = floorf(argmax_h);
+ int argmax_w_low = floorf(argmax_w);
+ int argmax_h_high = argmax_h_low + 1;
+ int argmax_w_high = argmax_w_low + 1;
+
+ T weight = 0;
+
+ if (bp_dir == 0) {
+ if (argmax_h_low >= 0 && argmax_w_low >= 0)
+ weight += -1 * (argmax_w_low + 1 - argmax_w) *
+ im_data[argmax_h_low * data_width + argmax_w_low];
+ if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+ weight += -1 * (argmax_w - argmax_w_low) *
+ im_data[argmax_h_low * data_width + argmax_w_high];
+ if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+ weight += (argmax_w_low + 1 - argmax_w) *
+ im_data[argmax_h_high * data_width + argmax_w_low];
+ if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+ weight += (argmax_w - argmax_w_low) *
+ im_data[argmax_h_high * data_width + argmax_w_high];
+ } else if (bp_dir == 1) {
+ if (argmax_h_low >= 0 && argmax_w_low >= 0)
+ weight += -1 * (argmax_h_low + 1 - argmax_h) *
+ im_data[argmax_h_low * data_width + argmax_w_low];
+ if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
+ weight += (argmax_h_low + 1 - argmax_h) *
+ im_data[argmax_h_low * data_width + argmax_w_high];
+ if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
+ weight += -1 * (argmax_h - argmax_h_low) *
+ im_data[argmax_h_high * data_width + argmax_w_low];
+ if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
+ weight += (argmax_h - argmax_h_low) *
+ im_data[argmax_h_high * data_width + argmax_w_high];
+ }
+
+ return weight;
+}
+
+template
+__global__ void modulated_deformable_im2col_gpu_kernel(
+ const int n, const T *data_im, const T *data_offset, const T *data_mask,
+ const int height, const int width, const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int channel_per_deformable_group, const int batch_size,
+ const int num_channels, const int deformable_group, const int height_col,
+ const int width_col, T *data_col) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ // index index of output matrix
+ const int w_col = index % width_col;
+ const int h_col = (index / width_col) % height_col;
+ const int b_col = (index / width_col / height_col) % batch_size;
+ const int c_im = (index / width_col / height_col) / batch_size;
+ const int c_col = c_im * kernel_h * kernel_w;
+
+ // compute deformable group index
+ const int deformable_group_index = c_im / channel_per_deformable_group;
+
+ const int h_in = h_col * stride_h - pad_h;
+ const int w_in = w_col * stride_w - pad_w;
+
+ T *data_col_ptr =
+ data_col +
+ ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col;
+ const T *data_im_ptr =
+ data_im + (b_col * num_channels + c_im) * height * width;
+ const T *data_offset_ptr =
+ data_offset + (b_col * deformable_group + deformable_group_index) * 2 *
+ kernel_h * kernel_w * height_col * width_col;
+
+ const T *data_mask_ptr =
+ data_mask + (b_col * deformable_group + deformable_group_index) *
+ kernel_h * kernel_w * height_col * width_col;
+
+ for (int i = 0; i < kernel_h; ++i) {
+ for (int j = 0; j < kernel_w; ++j) {
+ const int data_offset_h_ptr =
+ ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col;
+ const int data_offset_w_ptr =
+ ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col +
+ w_col;
+ const int data_mask_hw_ptr =
+ ((i * kernel_w + j) * height_col + h_col) * width_col + w_col;
+ const T offset_h = data_offset_ptr[data_offset_h_ptr];
+ const T offset_w = data_offset_ptr[data_offset_w_ptr];
+ const T mask = data_mask_ptr[data_mask_hw_ptr];
+ T val = static_cast(0);
+ const T h_im = h_in + i * dilation_h + offset_h;
+ const T w_im = w_in + j * dilation_w + offset_w;
+ if (h_im > -1 && w_im > -1 && h_im < height && w_im < width)
+ val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im,
+ w_im);
+ *data_col_ptr = val * mask;
+ data_col_ptr += batch_size * height_col * width_col;
+ }
+ }
+ }
+}
+
+template
+__global__ void modulated_deformable_col2im_gpu_kernel(
+ const int n, const T *data_col, const T *data_offset, const T *data_mask,
+ const int channels, const int height, const int width, const int kernel_h,
+ const int kernel_w, const int pad_h, const int pad_w, const int stride_h,
+ const int stride_w, const int dilation_h, const int dilation_w,
+ const int channel_per_deformable_group, const int batch_size,
+ const int deformable_group, const int height_col, const int width_col,
+ T *grad_im) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ const int j = (index / width_col / height_col / batch_size) % kernel_w;
+ const int i =
+ (index / width_col / height_col / batch_size / kernel_w) % kernel_h;
+ const int c =
+ index / width_col / height_col / batch_size / kernel_w / kernel_h;
+ // compute the start and end of the output
+
+ const int deformable_group_index = c / channel_per_deformable_group;
+
+ int w_out = index % width_col;
+ int h_out = (index / width_col) % height_col;
+ int b = (index / width_col / height_col) % batch_size;
+ int w_in = w_out * stride_w - pad_w;
+ int h_in = h_out * stride_h - pad_h;
+
+ const T *data_offset_ptr =
+ data_offset + (b * deformable_group + deformable_group_index) * 2 *
+ kernel_h * kernel_w * height_col * width_col;
+ const T *data_mask_ptr =
+ data_mask + (b * deformable_group + deformable_group_index) * kernel_h *
+ kernel_w * height_col * width_col;
+ const int data_offset_h_ptr =
+ ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out;
+ const int data_offset_w_ptr =
+ ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out;
+ const int data_mask_hw_ptr =
+ ((i * kernel_w + j) * height_col + h_out) * width_col + w_out;
+ const T offset_h = data_offset_ptr[data_offset_h_ptr];
+ const T offset_w = data_offset_ptr[data_offset_w_ptr];
+ const T mask = data_mask_ptr[data_mask_hw_ptr];
+ const T cur_inv_h_data = h_in + i * dilation_h + offset_h;
+ const T cur_inv_w_data = w_in + j * dilation_w + offset_w;
+
+ const T cur_top_grad = data_col[index] * mask;
+ const int cur_h = (int)cur_inv_h_data;
+ const int cur_w = (int)cur_inv_w_data;
+ for (int dy = -2; dy <= 2; dy++) {
+ for (int dx = -2; dx <= 2; dx++) {
+ if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 &&
+ cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 &&
+ abs(cur_inv_w_data - (cur_w + dx)) < 1) {
+ int cur_bottom_grad_pos =
+ ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx;
+ T weight =
+ dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data,
+ cur_h + dy, cur_w + dx, height, width);
+ atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
+ }
+ }
+ }
+ }
+}
+
+template
+__global__ void modulated_deformable_col2im_coord_gpu_kernel(
+ const int n, const T *data_col, const T *data_im, const T *data_offset,
+ const T *data_mask, const int channels, const int height, const int width,
+ const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w, const int dilation_h,
+ const int dilation_w, const int channel_per_deformable_group,
+ const int batch_size, const int offset_channels, const int deformable_group,
+ const int height_col, const int width_col, T *grad_offset, T *grad_mask) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ T val = 0, mval = 0;
+ int w = index % width_col;
+ int h = (index / width_col) % height_col;
+ int c = (index / width_col / height_col) % offset_channels;
+ int b = (index / width_col / height_col) / offset_channels;
+ // compute the start and end of the output
+
+ const int deformable_group_index = c / (2 * kernel_h * kernel_w);
+ const int col_step = kernel_h * kernel_w;
+ int cnt = 0;
+ const T *data_col_ptr = data_col + deformable_group_index *
+ channel_per_deformable_group *
+ batch_size * width_col * height_col;
+ const T *data_im_ptr =
+ data_im + (b * deformable_group + deformable_group_index) *
+ channel_per_deformable_group / kernel_h / kernel_w *
+ height * width;
+ const T *data_offset_ptr =
+ data_offset + (b * deformable_group + deformable_group_index) * 2 *
+ kernel_h * kernel_w * height_col * width_col;
+ const T *data_mask_ptr =
+ data_mask + (b * deformable_group + deformable_group_index) * kernel_h *
+ kernel_w * height_col * width_col;
+
+ const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w;
+
+ for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group;
+ col_c += col_step) {
+ const int col_pos =
+ (((col_c * batch_size + b) * height_col) + h) * width_col + w;
+ const int bp_dir = offset_c % 2;
+
+ int j = (col_pos / width_col / height_col / batch_size) % kernel_w;
+ int i =
+ (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h;
+ int w_out = col_pos % width_col;
+ int h_out = (col_pos / width_col) % height_col;
+ int w_in = w_out * stride_w - pad_w;
+ int h_in = h_out * stride_h - pad_h;
+ const int data_offset_h_ptr =
+ (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out);
+ const int data_offset_w_ptr =
+ (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col +
+ w_out);
+ const int data_mask_hw_ptr =
+ (((i * kernel_w + j) * height_col + h_out) * width_col + w_out);
+ const T offset_h = data_offset_ptr[data_offset_h_ptr];
+ const T offset_w = data_offset_ptr[data_offset_w_ptr];
+ const T mask = data_mask_ptr[data_mask_hw_ptr];
+ T inv_h = h_in + i * dilation_h + offset_h;
+ T inv_w = w_in + j * dilation_w + offset_w;
+ if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width)
+ inv_h = inv_w = -2;
+ else
+ mval += data_col_ptr[col_pos] *
+ dmcn_im2col_bilinear(data_im_ptr + cnt * height * width, width,
+ height, width, inv_h, inv_w);
+ const T weight = dmcn_get_coordinate_weight(
+ inv_h, inv_w, height, width, data_im_ptr + cnt * height * width,
+ width, bp_dir);
+ val += weight * data_col_ptr[col_pos] * mask;
+ cnt += 1;
+ }
+ // KERNEL_ASSIGN(grad_offset[index], offset_req, val);
+ grad_offset[index] = val;
+ if (offset_c % 2 == 0)
+ // KERNEL_ASSIGN(grad_mask[(((b * deformable_group +
+ // deformable_group_index) * kernel_h * kernel_w + offset_c / 2) *
+ // height_col + h) * width_col + w], mask_req, mval);
+ grad_mask[(((b * deformable_group + deformable_group_index) * kernel_h *
+ kernel_w +
+ offset_c / 2) *
+ height_col +
+ h) *
+ width_col +
+ w] = mval;
+ }
+}
+
+#endif // MODULATED_DEFORM_CONV_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/ms_deform_attn_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/ms_deform_attn_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..4e59bd3dcd3c115e4152ebf771eda260b09236f3
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/ms_deform_attn_cuda_kernel.cuh
@@ -0,0 +1,797 @@
+/*!
+**************************************************************************************************
+* Deformable DETR
+* Copyright (c) 2020 SenseTime. All Rights Reserved.
+* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+**************************************************************************************************
+* Modified from
+*https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+**************************************************************************************************
+*/
+#ifndef DEFORM_ATTN_CUDA_KERNEL
+#define DEFORM_ATTN_CUDA_KERNEL
+
+#include "common_cuda_helper.hpp"
+#include "pytorch_cuda_helper.hpp"
+
+const int CUDA_NUM_THREADS = 1024;
+
+template
+__device__ scalar_t ms_deform_attn_im2col_bilinear(
+ const scalar_t *&bottom_data, const int &height, const int &width,
+ const int &nheads, const int &channels, const scalar_t &h,
+ const scalar_t &w, const int &m, const int &c) {
+ const int h_low = floorf(h);
+ const int w_low = floorf(w);
+ const int h_high = h_low + 1;
+ const int w_high = w_low + 1;
+
+ const scalar_t lh = h - h_low;
+ const scalar_t lw = w - w_low;
+ const scalar_t hh = 1 - lh, hw = 1 - lw;
+
+ const int w_stride = nheads * channels;
+ const int h_stride = width * w_stride;
+ const int h_low_ptr_offset = h_low * h_stride;
+ const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
+ const int w_low_ptr_offset = w_low * w_stride;
+ const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
+ const int base_ptr = m * channels + c;
+
+ scalar_t v1 = 0;
+ if (h_low >= 0 && w_low >= 0) {
+ const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
+ v1 = bottom_data[ptr1];
+ }
+ scalar_t v2 = 0;
+ if (h_low >= 0 && w_high <= width - 1) {
+ const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
+ v2 = bottom_data[ptr2];
+ }
+ scalar_t v3 = 0;
+ if (h_high <= height - 1 && w_low >= 0) {
+ const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
+ v3 = bottom_data[ptr3];
+ }
+ scalar_t v4 = 0;
+ if (h_high <= height - 1 && w_high <= width - 1) {
+ const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
+ v4 = bottom_data[ptr4];
+ }
+
+ const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+
+ const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+ return val;
+}
+
+template
+__device__ void ms_deform_attn_col2im_bilinear(
+ const scalar_t *&bottom_data, const int &height, const int &width,
+ const int &nheads, const int &channels, const scalar_t &h,
+ const scalar_t &w, const int &m, const int &c, const scalar_t &top_grad,
+ const scalar_t &attn_weight, scalar_t *&grad_value,
+ scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) {
+ const int h_low = floorf(h);
+ const int w_low = floorf(w);
+ const int h_high = h_low + 1;
+ const int w_high = w_low + 1;
+
+ const scalar_t lh = h - h_low;
+ const scalar_t lw = w - w_low;
+ const scalar_t hh = 1 - lh, hw = 1 - lw;
+
+ const int w_stride = nheads * channels;
+ const int h_stride = width * w_stride;
+ const int h_low_ptr_offset = h_low * h_stride;
+ const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
+ const int w_low_ptr_offset = w_low * w_stride;
+ const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
+ const int base_ptr = m * channels + c;
+
+ const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+ const scalar_t top_grad_value = top_grad * attn_weight;
+ scalar_t grad_h_weight = 0, grad_w_weight = 0;
+
+ scalar_t v1 = 0;
+ if (h_low >= 0 && w_low >= 0) {
+ const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
+ v1 = bottom_data[ptr1];
+ grad_h_weight -= hw * v1;
+ grad_w_weight -= hh * v1;
+ atomicAdd(grad_value + ptr1, w1 * top_grad_value);
+ }
+ scalar_t v2 = 0;
+ if (h_low >= 0 && w_high <= width - 1) {
+ const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
+ v2 = bottom_data[ptr2];
+ grad_h_weight -= lw * v2;
+ grad_w_weight += hh * v2;
+ atomicAdd(grad_value + ptr2, w2 * top_grad_value);
+ }
+ scalar_t v3 = 0;
+ if (h_high <= height - 1 && w_low >= 0) {
+ const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
+ v3 = bottom_data[ptr3];
+ grad_h_weight += hw * v3;
+ grad_w_weight -= lh * v3;
+ atomicAdd(grad_value + ptr3, w3 * top_grad_value);
+ }
+ scalar_t v4 = 0;
+ if (h_high <= height - 1 && w_high <= width - 1) {
+ const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
+ v4 = bottom_data[ptr4];
+ grad_h_weight += lw * v4;
+ grad_w_weight += lh * v4;
+ atomicAdd(grad_value + ptr4, w4 * top_grad_value);
+ }
+
+ const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+ *grad_attn_weight = top_grad * val;
+ *grad_sampling_loc = width * grad_w_weight * top_grad_value;
+ *(grad_sampling_loc + 1) = height * grad_h_weight * top_grad_value;
+}
+
+template
+__device__ void ms_deform_attn_col2im_bilinear_gm(
+ const scalar_t *&bottom_data, const int &height, const int &width,
+ const int &nheads, const int &channels, const scalar_t &h,
+ const scalar_t &w, const int &m, const int &c, const scalar_t &top_grad,
+ const scalar_t &attn_weight, scalar_t *&grad_value,
+ scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) {
+ const int h_low = floorf(h);
+ const int w_low = floorf(w);
+ const int h_high = h_low + 1;
+ const int w_high = w_low + 1;
+
+ const scalar_t lh = h - h_low;
+ const scalar_t lw = w - w_low;
+ const scalar_t hh = 1 - lh, hw = 1 - lw;
+
+ const int w_stride = nheads * channels;
+ const int h_stride = width * w_stride;
+ const int h_low_ptr_offset = h_low * h_stride;
+ const int h_high_ptr_offset = h_low_ptr_offset + h_stride;
+ const int w_low_ptr_offset = w_low * w_stride;
+ const int w_high_ptr_offset = w_low_ptr_offset + w_stride;
+ const int base_ptr = m * channels + c;
+
+ const scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+ const scalar_t top_grad_value = top_grad * attn_weight;
+ scalar_t grad_h_weight = 0, grad_w_weight = 0;
+
+ scalar_t v1 = 0;
+ if (h_low >= 0 && w_low >= 0) {
+ const int ptr1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr;
+ v1 = bottom_data[ptr1];
+ grad_h_weight -= hw * v1;
+ grad_w_weight -= hh * v1;
+ atomicAdd(grad_value + ptr1, w1 * top_grad_value);
+ }
+ scalar_t v2 = 0;
+ if (h_low >= 0 && w_high <= width - 1) {
+ const int ptr2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr;
+ v2 = bottom_data[ptr2];
+ grad_h_weight -= lw * v2;
+ grad_w_weight += hh * v2;
+ atomicAdd(grad_value + ptr2, w2 * top_grad_value);
+ }
+ scalar_t v3 = 0;
+ if (h_high <= height - 1 && w_low >= 0) {
+ const int ptr3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr;
+ v3 = bottom_data[ptr3];
+ grad_h_weight += hw * v3;
+ grad_w_weight -= lh * v3;
+ atomicAdd(grad_value + ptr3, w3 * top_grad_value);
+ }
+ scalar_t v4 = 0;
+ if (h_high <= height - 1 && w_high <= width - 1) {
+ const int ptr4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr;
+ v4 = bottom_data[ptr4];
+ grad_h_weight += lw * v4;
+ grad_w_weight += lh * v4;
+ atomicAdd(grad_value + ptr4, w4 * top_grad_value);
+ }
+
+ const scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+ atomicAdd(grad_attn_weight, top_grad * val);
+ atomicAdd(grad_sampling_loc, width * grad_w_weight * top_grad_value);
+ atomicAdd(grad_sampling_loc + 1, height * grad_h_weight * top_grad_value);
+}
+
+template
+__global__ void ms_deformable_im2col_gpu_kernel(
+ const int n, const scalar_t *data_value, const int64_t *data_spatial_shapes,
+ const int64_t *data_level_start_index, const scalar_t *data_sampling_loc,
+ const scalar_t *data_attn_weight, const int batch_size,
+ const int spatial_size, const int num_heads, const int channels,
+ const int num_levels, const int num_query, const int num_point,
+ scalar_t *data_col) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ int _temp = index;
+ const int c_col = _temp % channels;
+ _temp /= channels;
+ const int sampling_index = _temp;
+ const int m_col = _temp % num_heads;
+ _temp /= num_heads;
+ _temp /= num_query;
+ const int b_col = _temp;
+
+ scalar_t *data_col_ptr = data_col + index;
+ int data_weight_ptr = sampling_index * num_levels * num_point;
+ int data_loc_w_ptr = data_weight_ptr << 1;
+ const int qid_stride = num_heads * channels;
+ const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+ scalar_t col = 0;
+
+ for (int l_col = 0; l_col < num_levels; ++l_col) {
+ const int level_start_id = data_level_start_index[l_col];
+ const int spatial_h_ptr = l_col << 1;
+ const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+ const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+ const scalar_t *data_value_ptr =
+ data_value +
+ (data_value_ptr_init_offset + level_start_id * qid_stride);
+ for (int p_col = 0; p_col < num_point; ++p_col) {
+ const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+ const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+ const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+ const scalar_t h_im = loc_h * spatial_h - 0.5;
+ const scalar_t w_im = loc_w * spatial_w - 0.5;
+
+ if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
+ col += ms_deform_attn_im2col_bilinear(data_value_ptr, spatial_h,
+ spatial_w, num_heads, channels,
+ h_im, w_im, m_col, c_col) *
+ weight;
+ }
+
+ data_weight_ptr += 1;
+ data_loc_w_ptr += 2;
+ }
+ }
+ *data_col_ptr = col;
+ }
+}
+
+template
+__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1(
+ const int n, const scalar_t *grad_col, const scalar_t *data_value,
+ const int64_t *data_spatial_shapes, const int64_t *data_level_start_index,
+ const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight,
+ const int batch_size, const int spatial_size, const int num_heads,
+ const int channels, const int num_levels, const int num_query,
+ const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc,
+ scalar_t *grad_attn_weight) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2];
+ __shared__ scalar_t cache_grad_attn_weight[blockSize];
+ unsigned int tid = threadIdx.x;
+ int _temp = index;
+ const int c_col = _temp % channels;
+ _temp /= channels;
+ const int sampling_index = _temp;
+ const int m_col = _temp % num_heads;
+ _temp /= num_heads;
+ _temp /= num_query;
+ const int b_col = _temp;
+
+ const scalar_t top_grad = grad_col[index];
+
+ int data_weight_ptr = sampling_index * num_levels * num_point;
+ int data_loc_w_ptr = data_weight_ptr << 1;
+ const int grad_sampling_ptr = data_weight_ptr;
+ grad_sampling_loc += grad_sampling_ptr << 1;
+ grad_attn_weight += grad_sampling_ptr;
+ const int grad_weight_stride = 1;
+ const int grad_loc_stride = 2;
+ const int qid_stride = num_heads * channels;
+ const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+ for (int l_col = 0; l_col < num_levels; ++l_col) {
+ const int level_start_id = data_level_start_index[l_col];
+ const int spatial_h_ptr = l_col << 1;
+ const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+ const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+ const int value_ptr_offset =
+ data_value_ptr_init_offset + level_start_id * qid_stride;
+ const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+ scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+ for (int p_col = 0; p_col < num_point; ++p_col) {
+ const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+ const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+ const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+ const scalar_t h_im = loc_h * spatial_h - 0.5;
+ const scalar_t w_im = loc_w * spatial_w - 0.5;
+ *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0;
+ *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0;
+ *(cache_grad_attn_weight + threadIdx.x) = 0;
+ if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
+ ms_deform_attn_col2im_bilinear(
+ data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im,
+ w_im, m_col, c_col, top_grad, weight, grad_value_ptr,
+ cache_grad_sampling_loc + (threadIdx.x << 1),
+ cache_grad_attn_weight + threadIdx.x);
+ }
+
+ __syncthreads();
+ if (tid == 0) {
+ scalar_t _grad_w = cache_grad_sampling_loc[0],
+ _grad_h = cache_grad_sampling_loc[1],
+ _grad_a = cache_grad_attn_weight[0];
+ int sid = 2;
+ for (unsigned int tid = 1; tid < blockSize; ++tid) {
+ _grad_w += cache_grad_sampling_loc[sid];
+ _grad_h += cache_grad_sampling_loc[sid + 1];
+ _grad_a += cache_grad_attn_weight[tid];
+ sid += 2;
+ }
+
+ *grad_sampling_loc = _grad_w;
+ *(grad_sampling_loc + 1) = _grad_h;
+ *grad_attn_weight = _grad_a;
+ }
+ __syncthreads();
+
+ data_weight_ptr += 1;
+ data_loc_w_ptr += 2;
+ grad_attn_weight += grad_weight_stride;
+ grad_sampling_loc += grad_loc_stride;
+ }
+ }
+ }
+}
+
+template
+__global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2(
+ const int n, const scalar_t *grad_col, const scalar_t *data_value,
+ const int64_t *data_spatial_shapes, const int64_t *data_level_start_index,
+ const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight,
+ const int batch_size, const int spatial_size, const int num_heads,
+ const int channels, const int num_levels, const int num_query,
+ const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc,
+ scalar_t *grad_attn_weight) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2];
+ __shared__ scalar_t cache_grad_attn_weight[blockSize];
+ unsigned int tid = threadIdx.x;
+ int _temp = index;
+ const int c_col = _temp % channels;
+ _temp /= channels;
+ const int sampling_index = _temp;
+ const int m_col = _temp % num_heads;
+ _temp /= num_heads;
+ _temp /= num_query;
+ const int b_col = _temp;
+
+ const scalar_t top_grad = grad_col[index];
+
+ int data_weight_ptr = sampling_index * num_levels * num_point;
+ int data_loc_w_ptr = data_weight_ptr << 1;
+ const int grad_sampling_ptr = data_weight_ptr;
+ grad_sampling_loc += grad_sampling_ptr << 1;
+ grad_attn_weight += grad_sampling_ptr;
+ const int grad_weight_stride = 1;
+ const int grad_loc_stride = 2;
+ const int qid_stride = num_heads * channels;
+ const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+ for (int l_col = 0; l_col < num_levels; ++l_col) {
+ const int level_start_id = data_level_start_index[l_col];
+ const int spatial_h_ptr = l_col << 1;
+ const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+ const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+ const int value_ptr_offset =
+ data_value_ptr_init_offset + level_start_id * qid_stride;
+ const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+ scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+ for (int p_col = 0; p_col < num_point; ++p_col) {
+ const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+ const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+ const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+ const scalar_t h_im = loc_h * spatial_h - 0.5;
+ const scalar_t w_im = loc_w * spatial_w - 0.5;
+ *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0;
+ *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0;
+ *(cache_grad_attn_weight + threadIdx.x) = 0;
+ if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
+ ms_deform_attn_col2im_bilinear(
+ data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im,
+ w_im, m_col, c_col, top_grad, weight, grad_value_ptr,
+ cache_grad_sampling_loc + (threadIdx.x << 1),
+ cache_grad_attn_weight + threadIdx.x);
+ }
+
+ __syncthreads();
+
+ for (unsigned int s = blockSize / 2; s > 0; s >>= 1) {
+ if (tid < s) {
+ const unsigned int xid1 = tid << 1;
+ const unsigned int xid2 = (tid + s) << 1;
+ cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
+ cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
+ cache_grad_sampling_loc[xid1 + 1] +=
+ cache_grad_sampling_loc[xid2 + 1];
+ }
+ __syncthreads();
+ }
+
+ if (tid == 0) {
+ *grad_sampling_loc = cache_grad_sampling_loc[0];
+ *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1];
+ *grad_attn_weight = cache_grad_attn_weight[0];
+ }
+ __syncthreads();
+
+ data_weight_ptr += 1;
+ data_loc_w_ptr += 2;
+ grad_attn_weight += grad_weight_stride;
+ grad_sampling_loc += grad_loc_stride;
+ }
+ }
+ }
+}
+
+template
+__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v1(
+ const int n, const scalar_t *grad_col, const scalar_t *data_value,
+ const int64_t *data_spatial_shapes, const int64_t *data_level_start_index,
+ const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight,
+ const int batch_size, const int spatial_size, const int num_heads,
+ const int channels, const int num_levels, const int num_query,
+ const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc,
+ scalar_t *grad_attn_weight) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ extern __shared__ int _s[];
+ scalar_t *cache_grad_sampling_loc = reinterpret_cast(_s);
+ scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
+ unsigned int tid = threadIdx.x;
+ int _temp = index;
+ const int c_col = _temp % channels;
+ _temp /= channels;
+ const int sampling_index = _temp;
+ const int m_col = _temp % num_heads;
+ _temp /= num_heads;
+ _temp /= num_query;
+ const int b_col = _temp;
+
+ const scalar_t top_grad = grad_col[index];
+
+ int data_weight_ptr = sampling_index * num_levels * num_point;
+ int data_loc_w_ptr = data_weight_ptr << 1;
+ const int grad_sampling_ptr = data_weight_ptr;
+ grad_sampling_loc += grad_sampling_ptr << 1;
+ grad_attn_weight += grad_sampling_ptr;
+ const int grad_weight_stride = 1;
+ const int grad_loc_stride = 2;
+ const int qid_stride = num_heads * channels;
+ const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+ for (int l_col = 0; l_col < num_levels; ++l_col) {
+ const int level_start_id = data_level_start_index[l_col];
+ const int spatial_h_ptr = l_col << 1;
+ const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+ const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+ const int value_ptr_offset =
+ data_value_ptr_init_offset + level_start_id * qid_stride;
+ const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+ scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+ for (int p_col = 0; p_col < num_point; ++p_col) {
+ const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+ const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+ const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+ const scalar_t h_im = loc_h * spatial_h - 0.5;
+ const scalar_t w_im = loc_w * spatial_w - 0.5;
+ *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0;
+ *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0;
+ *(cache_grad_attn_weight + threadIdx.x) = 0;
+ if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
+ ms_deform_attn_col2im_bilinear(
+ data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im,
+ w_im, m_col, c_col, top_grad, weight, grad_value_ptr,
+ cache_grad_sampling_loc + (threadIdx.x << 1),
+ cache_grad_attn_weight + threadIdx.x);
+ }
+
+ __syncthreads();
+ if (tid == 0) {
+ scalar_t _grad_w = cache_grad_sampling_loc[0],
+ _grad_h = cache_grad_sampling_loc[1],
+ _grad_a = cache_grad_attn_weight[0];
+ int sid = 2;
+ for (unsigned int tid = 1; tid < blockDim.x; ++tid) {
+ _grad_w += cache_grad_sampling_loc[sid];
+ _grad_h += cache_grad_sampling_loc[sid + 1];
+ _grad_a += cache_grad_attn_weight[tid];
+ sid += 2;
+ }
+
+ *grad_sampling_loc = _grad_w;
+ *(grad_sampling_loc + 1) = _grad_h;
+ *grad_attn_weight = _grad_a;
+ }
+ __syncthreads();
+
+ data_weight_ptr += 1;
+ data_loc_w_ptr += 2;
+ grad_attn_weight += grad_weight_stride;
+ grad_sampling_loc += grad_loc_stride;
+ }
+ }
+ }
+}
+
+template
+__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2(
+ const int n, const scalar_t *grad_col, const scalar_t *data_value,
+ const int64_t *data_spatial_shapes, const int64_t *data_level_start_index,
+ const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight,
+ const int batch_size, const int spatial_size, const int num_heads,
+ const int channels, const int num_levels, const int num_query,
+ const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc,
+ scalar_t *grad_attn_weight) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ extern __shared__ int _s[];
+ scalar_t *cache_grad_sampling_loc = reinterpret_cast(_s);
+ scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
+ unsigned int tid = threadIdx.x;
+ int _temp = index;
+ const int c_col = _temp % channels;
+ _temp /= channels;
+ const int sampling_index = _temp;
+ const int m_col = _temp % num_heads;
+ _temp /= num_heads;
+ _temp /= num_query;
+ const int b_col = _temp;
+
+ const scalar_t top_grad = grad_col[index];
+
+ int data_weight_ptr = sampling_index * num_levels * num_point;
+ int data_loc_w_ptr = data_weight_ptr << 1;
+ const int grad_sampling_ptr = data_weight_ptr;
+ grad_sampling_loc += grad_sampling_ptr << 1;
+ grad_attn_weight += grad_sampling_ptr;
+ const int grad_weight_stride = 1;
+ const int grad_loc_stride = 2;
+ const int qid_stride = num_heads * channels;
+ const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+ for (int l_col = 0; l_col < num_levels; ++l_col) {
+ const int level_start_id = data_level_start_index[l_col];
+ const int spatial_h_ptr = l_col << 1;
+ const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+ const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+ const int value_ptr_offset =
+ data_value_ptr_init_offset + level_start_id * qid_stride;
+ const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+ scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+ for (int p_col = 0; p_col < num_point; ++p_col) {
+ const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+ const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+ const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+ const scalar_t h_im = loc_h * spatial_h - 0.5;
+ const scalar_t w_im = loc_w * spatial_w - 0.5;
+ *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0;
+ *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0;
+ *(cache_grad_attn_weight + threadIdx.x) = 0;
+ if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
+ ms_deform_attn_col2im_bilinear(
+ data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im,
+ w_im, m_col, c_col, top_grad, weight, grad_value_ptr,
+ cache_grad_sampling_loc + (threadIdx.x << 1),
+ cache_grad_attn_weight + threadIdx.x);
+ }
+
+ __syncthreads();
+
+ for (unsigned int s = blockDim.x / 2, spre = blockDim.x; s > 0;
+ s >>= 1, spre >>= 1) {
+ if (tid < s) {
+ const unsigned int xid1 = tid << 1;
+ const unsigned int xid2 = (tid + s) << 1;
+ cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
+ cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
+ cache_grad_sampling_loc[xid1 + 1] +=
+ cache_grad_sampling_loc[xid2 + 1];
+ if (tid + (s << 1) < spre) {
+ cache_grad_attn_weight[tid] +=
+ cache_grad_attn_weight[tid + (s << 1)];
+ cache_grad_sampling_loc[xid1] +=
+ cache_grad_sampling_loc[xid2 + (s << 1)];
+ cache_grad_sampling_loc[xid1 + 1] +=
+ cache_grad_sampling_loc[xid2 + 1 + (s << 1)];
+ }
+ }
+ __syncthreads();
+ }
+
+ if (tid == 0) {
+ *grad_sampling_loc = cache_grad_sampling_loc[0];
+ *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1];
+ *grad_attn_weight = cache_grad_attn_weight[0];
+ }
+ __syncthreads();
+
+ data_weight_ptr += 1;
+ data_loc_w_ptr += 2;
+ grad_attn_weight += grad_weight_stride;
+ grad_sampling_loc += grad_loc_stride;
+ }
+ }
+ }
+}
+
+template
+__global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks(
+ const int n, const scalar_t *grad_col, const scalar_t *data_value,
+ const int64_t *data_spatial_shapes, const int64_t *data_level_start_index,
+ const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight,
+ const int batch_size, const int spatial_size, const int num_heads,
+ const int channels, const int num_levels, const int num_query,
+ const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc,
+ scalar_t *grad_attn_weight) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ extern __shared__ int _s[];
+ scalar_t *cache_grad_sampling_loc = reinterpret_cast(_s);
+ scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x;
+ unsigned int tid = threadIdx.x;
+ int _temp = index;
+ const int c_col = _temp % channels;
+ _temp /= channels;
+ const int sampling_index = _temp;
+ const int m_col = _temp % num_heads;
+ _temp /= num_heads;
+ _temp /= num_query;
+ const int b_col = _temp;
+
+ const scalar_t top_grad = grad_col[index];
+
+ int data_weight_ptr = sampling_index * num_levels * num_point;
+ int data_loc_w_ptr = data_weight_ptr << 1;
+ const int grad_sampling_ptr = data_weight_ptr;
+ grad_sampling_loc += grad_sampling_ptr << 1;
+ grad_attn_weight += grad_sampling_ptr;
+ const int grad_weight_stride = 1;
+ const int grad_loc_stride = 2;
+ const int qid_stride = num_heads * channels;
+ const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+ for (int l_col = 0; l_col < num_levels; ++l_col) {
+ const int level_start_id = data_level_start_index[l_col];
+ const int spatial_h_ptr = l_col << 1;
+ const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+ const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+ const int value_ptr_offset =
+ data_value_ptr_init_offset + level_start_id * qid_stride;
+ const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+ scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+ for (int p_col = 0; p_col < num_point; ++p_col) {
+ const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+ const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+ const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+ const scalar_t h_im = loc_h * spatial_h - 0.5;
+ const scalar_t w_im = loc_w * spatial_w - 0.5;
+ *(cache_grad_sampling_loc + (threadIdx.x << 1)) = 0;
+ *(cache_grad_sampling_loc + ((threadIdx.x << 1) + 1)) = 0;
+ *(cache_grad_attn_weight + threadIdx.x) = 0;
+ if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
+ ms_deform_attn_col2im_bilinear(
+ data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im,
+ w_im, m_col, c_col, top_grad, weight, grad_value_ptr,
+ cache_grad_sampling_loc + (threadIdx.x << 1),
+ cache_grad_attn_weight + threadIdx.x);
+ }
+
+ __syncthreads();
+
+ for (unsigned int s = blockDim.x / 2, spre = blockDim.x; s > 0;
+ s >>= 1, spre >>= 1) {
+ if (tid < s) {
+ const unsigned int xid1 = tid << 1;
+ const unsigned int xid2 = (tid + s) << 1;
+ cache_grad_attn_weight[tid] += cache_grad_attn_weight[tid + s];
+ cache_grad_sampling_loc[xid1] += cache_grad_sampling_loc[xid2];
+ cache_grad_sampling_loc[xid1 + 1] +=
+ cache_grad_sampling_loc[xid2 + 1];
+ if (tid + (s << 1) < spre) {
+ cache_grad_attn_weight[tid] +=
+ cache_grad_attn_weight[tid + (s << 1)];
+ cache_grad_sampling_loc[xid1] +=
+ cache_grad_sampling_loc[xid2 + (s << 1)];
+ cache_grad_sampling_loc[xid1 + 1] +=
+ cache_grad_sampling_loc[xid2 + 1 + (s << 1)];
+ }
+ }
+ __syncthreads();
+ }
+
+ if (tid == 0) {
+ atomicAdd(grad_sampling_loc, cache_grad_sampling_loc[0]);
+ atomicAdd(grad_sampling_loc + 1, cache_grad_sampling_loc[1]);
+ atomicAdd(grad_attn_weight, cache_grad_attn_weight[0]);
+ }
+ __syncthreads();
+
+ data_weight_ptr += 1;
+ data_loc_w_ptr += 2;
+ grad_attn_weight += grad_weight_stride;
+ grad_sampling_loc += grad_loc_stride;
+ }
+ }
+ }
+}
+
+template
+__global__ void ms_deformable_col2im_gpu_kernel_gm(
+ const int n, const scalar_t *grad_col, const scalar_t *data_value,
+ const int64_t *data_spatial_shapes, const int64_t *data_level_start_index,
+ const scalar_t *data_sampling_loc, const scalar_t *data_attn_weight,
+ const int batch_size, const int spatial_size, const int num_heads,
+ const int channels, const int num_levels, const int num_query,
+ const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc,
+ scalar_t *grad_attn_weight) {
+ CUDA_1D_KERNEL_LOOP(index, n) {
+ int _temp = index;
+ const int c_col = _temp % channels;
+ _temp /= channels;
+ const int sampling_index = _temp;
+ const int m_col = _temp % num_heads;
+ _temp /= num_heads;
+ _temp /= num_query;
+ const int b_col = _temp;
+
+ const scalar_t top_grad = grad_col[index];
+
+ int data_weight_ptr = sampling_index * num_levels * num_point;
+ int data_loc_w_ptr = data_weight_ptr << 1;
+ const int grad_sampling_ptr = data_weight_ptr;
+ grad_sampling_loc += grad_sampling_ptr << 1;
+ grad_attn_weight += grad_sampling_ptr;
+ const int grad_weight_stride = 1;
+ const int grad_loc_stride = 2;
+ const int qid_stride = num_heads * channels;
+ const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride;
+
+ for (int l_col = 0; l_col < num_levels; ++l_col) {
+ const int level_start_id = data_level_start_index[l_col];
+ const int spatial_h_ptr = l_col << 1;
+ const int spatial_h = data_spatial_shapes[spatial_h_ptr];
+ const int spatial_w = data_spatial_shapes[spatial_h_ptr + 1];
+ const int value_ptr_offset =
+ data_value_ptr_init_offset + level_start_id * qid_stride;
+ const scalar_t *data_value_ptr = data_value + value_ptr_offset;
+ scalar_t *grad_value_ptr = grad_value + value_ptr_offset;
+
+ for (int p_col = 0; p_col < num_point; ++p_col) {
+ const scalar_t loc_w = data_sampling_loc[data_loc_w_ptr];
+ const scalar_t loc_h = data_sampling_loc[data_loc_w_ptr + 1];
+ const scalar_t weight = data_attn_weight[data_weight_ptr];
+
+ const scalar_t h_im = loc_h * spatial_h - 0.5;
+ const scalar_t w_im = loc_w * spatial_w - 0.5;
+ if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) {
+ ms_deform_attn_col2im_bilinear_gm(
+ data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im,
+ w_im, m_col, c_col, top_grad, weight, grad_value_ptr,
+ grad_sampling_loc, grad_attn_weight);
+ }
+ data_weight_ptr += 1;
+ data_loc_w_ptr += 2;
+ grad_attn_weight += grad_weight_stride;
+ grad_sampling_loc += grad_loc_stride;
+ }
+ }
+ }
+}
+#endif // DEFORM_ATTN_CUDA_KERNEL
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/nms_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/nms_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..caf777e8c1706f9422cb64d8288eab53b9b15a8a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/nms_cuda_kernel.cuh
@@ -0,0 +1,88 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef NMS_CUDA_KERNEL_CUH
+#define NMS_CUDA_KERNEL_CUH
+
+#include
+#ifdef MMCV_WITH_TRT
+#include "common_cuda_helper.hpp"
+#else // MMCV_WITH_TRT
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else // MMCV_USE_PARROTS
+#include "pytorch_cuda_helper.hpp"
+#endif // MMCV_USE_PARROTS
+#endif // MMCV_WITH_TRT
+
+int const threadsPerBlock = sizeof(unsigned long long int) * 8;
+
+__device__ inline bool devIoU(float const *const a, float const *const b,
+ const int offset, const float threshold) {
+ float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
+ float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
+ float width = fmaxf(right - left + offset, 0.f),
+ height = fmaxf(bottom - top + offset, 0.f);
+ float interS = width * height;
+ float Sa = (a[2] - a[0] + offset) * (a[3] - a[1] + offset);
+ float Sb = (b[2] - b[0] + offset) * (b[3] - b[1] + offset);
+ return interS > threshold * (Sa + Sb - interS);
+}
+
+__global__ void nms_cuda(const int n_boxes, const float iou_threshold,
+ const int offset, const float *dev_boxes,
+ unsigned long long *dev_mask) {
+ int blocks = (n_boxes + threadsPerBlock - 1) / threadsPerBlock;
+ CUDA_2D_KERNEL_BLOCK_LOOP(col_start, blocks, row_start, blocks) {
+ const int tid = threadIdx.x;
+
+ if (row_start > col_start) return;
+
+ const int row_size =
+ fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+ const int col_size =
+ fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+ __shared__ float block_boxes[threadsPerBlock * 4];
+ if (tid < col_size) {
+ block_boxes[tid * 4 + 0] =
+ dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 0];
+ block_boxes[tid * 4 + 1] =
+ dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 1];
+ block_boxes[tid * 4 + 2] =
+ dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 2];
+ block_boxes[tid * 4 + 3] =
+ dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 3];
+ }
+ __syncthreads();
+
+ if (tid < row_size) {
+ const int cur_box_idx = threadsPerBlock * row_start + tid;
+ const float *cur_box = dev_boxes + cur_box_idx * 4;
+ int i = 0;
+ unsigned long long int t = 0;
+ int start = 0;
+ if (row_start == col_start) {
+ start = tid + 1;
+ }
+ for (i = start; i < col_size; i++) {
+ if (devIoU(cur_box, block_boxes + i * 4, offset, iou_threshold)) {
+ t |= 1ULL << i;
+ }
+ }
+ dev_mask[cur_box_idx * gridDim.y + col_start] = t;
+ }
+ }
+}
+#endif // NMS_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/nms_rotated_cuda.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/nms_rotated_cuda.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..80bed9681f748390999a2963bd3448570b0dbf6a
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/nms_rotated_cuda.cuh
@@ -0,0 +1,135 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+// modified from
+// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu
+#ifndef NMS_ROTATED_CUDA_CUH
+#define NMS_ROTATED_CUDA_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+#include "box_iou_rotated_utils.hpp"
+
+__host__ __device__ inline int divideUP(const int x, const int y) {
+ return (((x) + (y)-1) / (y));
+}
+
+namespace {
+int const threadsPerBlock = sizeof(unsigned long long) * 8;
+}
+
+template
+__global__ void nms_rotated_cuda_kernel(const int n_boxes,
+ const float iou_threshold,
+ const T* dev_boxes,
+ unsigned long long* dev_mask,
+ const int multi_label) {
+ // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel
+
+ if (multi_label == 1) {
+ const int row_start = blockIdx.y;
+ const int col_start = blockIdx.x;
+
+ // if (row_start > col_start) return;
+
+ const int row_size =
+ min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+ const int col_size =
+ min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+ // Compared to nms_cuda_kernel, where each box is represented with 4 values
+ // (x1, y1, x2, y2), each rotated box is represented with 5 values
+ // (x_center, y_center, width, height, angle_degrees) here.
+ __shared__ T block_boxes[threadsPerBlock * 5];
+ if (threadIdx.x < col_size) {
+ block_boxes[threadIdx.x * 6 + 0] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 0];
+ block_boxes[threadIdx.x * 6 + 1] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 1];
+ block_boxes[threadIdx.x * 6 + 2] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 2];
+ block_boxes[threadIdx.x * 6 + 3] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 3];
+ block_boxes[threadIdx.x * 6 + 4] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 4];
+ block_boxes[threadIdx.x * 6 + 5] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 5];
+ }
+ __syncthreads();
+
+ if (threadIdx.x < row_size) {
+ const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+ const T* cur_box = dev_boxes + cur_box_idx * 6;
+ int i = 0;
+ unsigned long long t = 0;
+ int start = 0;
+ if (row_start == col_start) {
+ start = threadIdx.x + 1;
+ }
+ for (i = start; i < col_size; i++) {
+ // Instead of devIoU used by original horizontal nms, here
+ // we use the single_box_iou_rotated function from
+ // box_iou_rotated_utils.h
+ if (single_box_iou_rotated(cur_box, block_boxes + i * 6, 0) >
+ iou_threshold) {
+ t |= 1ULL << i;
+ }
+ }
+ const int col_blocks = divideUP(n_boxes, threadsPerBlock);
+ dev_mask[cur_box_idx * col_blocks + col_start] = t;
+ }
+ } else {
+ const int row_start = blockIdx.y;
+ const int col_start = blockIdx.x;
+
+ // if (row_start > col_start) return;
+
+ const int row_size =
+ min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+ const int col_size =
+ min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+ // Compared to nms_cuda_kernel, where each box is represented with 4 values
+ // (x1, y1, x2, y2), each rotated box is represented with 5 values
+ // (x_center, y_center, width, height, angle_degrees) here.
+ __shared__ T block_boxes[threadsPerBlock * 5];
+ if (threadIdx.x < col_size) {
+ block_boxes[threadIdx.x * 5 + 0] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
+ block_boxes[threadIdx.x * 5 + 1] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
+ block_boxes[threadIdx.x * 5 + 2] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
+ block_boxes[threadIdx.x * 5 + 3] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
+ block_boxes[threadIdx.x * 5 + 4] =
+ dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
+ }
+ __syncthreads();
+
+ if (threadIdx.x < row_size) {
+ const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+ const T* cur_box = dev_boxes + cur_box_idx * 5;
+ int i = 0;
+ unsigned long long t = 0;
+ int start = 0;
+ if (row_start == col_start) {
+ start = threadIdx.x + 1;
+ }
+ for (i = start; i < col_size; i++) {
+ // Instead of devIoU used by original horizontal nms, here
+ // we use the single_box_iou_rotated function from
+ // box_iou_rotated_utils.h
+ if (single_box_iou_rotated(cur_box, block_boxes + i * 5, 0) >
+ iou_threshold) {
+ t |= 1ULL << i;
+ }
+ }
+ const int col_blocks = divideUP(n_boxes, threadsPerBlock);
+ dev_mask[cur_box_idx * col_blocks + col_start] = t;
+ }
+ }
+}
+
+#endif
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/parrots_cudawarpfunction.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/parrots_cudawarpfunction.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..7918a57452bbde9dc7c249b0c3dd2774aa1961bf
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/parrots_cudawarpfunction.cuh
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2019, SenseTime.
+ */
+
+#ifndef INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
+#define INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
+
+#ifndef __CUDACC__
+#error cudawarpfunction.cuh should only be included by .cu files
+#endif
+#include
+
+#include
+
+#ifdef PARROTS_USE_HALF
+#include
+#endif
+#ifdef __CUDA_ARCH__
+#define CUDA_INTRINSIC_FUNC(Expr) Expr
+#else
+#define CUDA_INTRINSIC_FUNC(Expr)
+#endif
+
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
+
+#ifdef PARROTS_USE_HALF
+
+#if CUDA_VERSION < 9000
+
+__device__ inline float16 __shfl(float16 var, int srcLane, int width) {
+ CUDA_INTRINSIC_FUNC(return __shfl(var.y, srcLane, width););
+}
+
+__device__ inline float16 __shfl_up(float16 var, unsigned delta, int width) {
+ CUDA_INTRINSIC_FUNC(return __shfl_up(var.y, delta, width););
+}
+
+__device__ inline float16 __shfl_down(float16 var, unsigned delta, int width) {
+ CUDA_INTRINSIC_FUNC(return __shfl_down(var.y, delta, width););
+}
+
+__device__ inline float16 __shfl_xor(float16 var, int laneMask, int width) {
+ CUDA_INTRINSIC_FUNC(return __shfl_xor(var.y, laneMask, width););
+}
+
+#else // CUDA_VERSION >= 9000
+
+__device__ inline float16 __shfl_sync(unsigned mask, float16 var, int srcLane,
+ int width = warpSize) {
+ CUDA_INTRINSIC_FUNC(float16 r; r.y = __shfl_sync(mask, var.y, srcLane, width);
+ return r;);
+}
+
+__device__ inline float16 __shfl_up_sync(unsigned mask, float16 var,
+ unsigned delta, int width = warpSize) {
+ CUDA_INTRINSIC_FUNC(
+ float16 r; r.y = __shfl_up_sync(mask, var.y, delta, width); return r;);
+}
+
+__device__ inline float16 __shfl_down_sync(unsigned mask, float16 var,
+ unsigned delta,
+ int width = warpSize) {
+ CUDA_INTRINSIC_FUNC(
+ float16 r; r.y = __shfl_down_sync(mask, var.y, delta, width); return r;);
+}
+
+__device__ inline float16 __shfl_xor_sync(unsigned mask, float16 var,
+ int laneMask, int width) {
+ CUDA_INTRINSIC_FUNC(float16 r;
+ r.y = __shfl_xor_sync(mask, var.y, laneMask, width);
+ return r;);
+}
+
+#endif // CUDA_VERSION < 9000
+
+#endif // PARROTS_USE_HALF
+
+// warp shuffle interface with a dummy mask
+#if CUDA_VERSION < 9000
+
+template
+__device__ inline T __shfl_sync(unsigned mask, T var, int srcLane,
+ int width = warpSize) {
+ CUDA_INTRINSIC_FUNC(return __shfl(var, srcLane, width););
+}
+
+template
+__device__ inline T __shfl_up_sync(unsigned mask, T var, unsigned delta,
+ int width = warpSize) {
+ CUDA_INTRINSIC_FUNC(return __shfl_up(var, delta, width););
+}
+
+template
+__device__ inline T __shfl_down_sync(unsigned mask, T var, unsigned delta,
+ int width = warpSize) {
+ CUDA_INTRINSIC_FUNC(return __shfl_down(var, delta, width););
+}
+
+template
+__device__ inline T __shfl_xor_sync(unsigned mask, T var, int laneMask,
+ int width = warpSize) {
+ CUDA_INTRINSIC_FUNC(return __shfl_xor(var, laneMask, width););
+}
+
+#endif // CUDA_VERSION < 9000
+
+#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
+
+#endif // INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..887293855df6cb611c62a6a4c02f92e787316944
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh
@@ -0,0 +1,108 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef POINT_IN_BOXES_CUDA_KERNEL_CUH
+#define POINT_IN_BOXES_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+template
+__device__ inline void lidar_to_local_coords(T shift_x, T shift_y, T rz,
+ T &local_x, T &local_y) {
+ T cosa = cos(-rz), sina = sin(-rz);
+ local_x = shift_x * cosa + shift_y * (-sina);
+ local_y = shift_x * sina + shift_y * cosa;
+}
+
+template
+__device__ inline int check_pt_in_box3d(const T *pt, const T *box3d, T &local_x,
+ T &local_y) {
+ // param pt: (x, y, z)
+ // param box3d: (cx, cy, cz, x_size, y_size, z_size, rz) in LiDAR coordinate,
+ // cz in the bottom center
+ T x = pt[0], y = pt[1], z = pt[2];
+ T cx = box3d[0], cy = box3d[1], cz = box3d[2];
+ T x_size = box3d[3], y_size = box3d[4], z_size = box3d[5], rz = box3d[6];
+ cz += z_size /
+ 2.0; // shift to the center since cz in box3d is the bottom center
+
+ if (fabsf(z - cz) > z_size / 2.0) return 0;
+ lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
+ float in_flag = (local_x > -x_size / 2.0) & (local_x < x_size / 2.0) &
+ (local_y > -y_size / 2.0) & (local_y < y_size / 2.0);
+ return in_flag;
+}
+
+template
+__global__ void points_in_boxes_part_forward_cuda_kernel(
+ int batch_size, int boxes_num, int pts_num, const T *boxes, const T *pts,
+ int *box_idx_of_points) {
+ // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
+ // coordinate, z is the bottom center, each box DO NOT overlaps params pts:
+ // (B, npoints, 3) [x, y, z] in LiDAR coordinate params boxes_idx_of_points:
+ // (B, npoints), default -1
+
+ int bs_idx = blockIdx.y;
+ CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) {
+ if (bs_idx >= batch_size) return;
+
+ boxes += bs_idx * boxes_num * 7;
+ pts += bs_idx * pts_num * 3 + pt_idx * 3;
+ box_idx_of_points += bs_idx * pts_num + pt_idx;
+
+ T local_x = 0, local_y = 0;
+ int cur_in_flag = 0;
+ for (int k = 0; k < boxes_num; k++) {
+ cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y);
+ if (cur_in_flag) {
+ box_idx_of_points[0] = k;
+ break;
+ }
+ }
+ }
+}
+
+template
+__global__ void points_in_boxes_all_forward_cuda_kernel(
+ int batch_size, int boxes_num, int pts_num, const T *boxes, const T *pts,
+ int *box_idx_of_points) {
+ // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
+ // coordinate, z is the bottom center, each box DO NOT overlaps params pts:
+ // (B, npoints, 3) [x, y, z] in LiDAR coordinate params boxes_idx_of_points:
+ // (B, npoints), default -1
+
+ int bs_idx = blockIdx.y;
+ CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) {
+ if (bs_idx >= batch_size) return;
+
+ boxes += bs_idx * boxes_num * 7;
+ pts += bs_idx * pts_num * 3 + pt_idx * 3;
+ box_idx_of_points += bs_idx * pts_num * boxes_num + pt_idx * boxes_num;
+
+ T local_x = 0, local_y = 0;
+ for (int k = 0; k < boxes_num; k++) {
+ const int cur_in_flag =
+ check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y);
+ if (cur_in_flag) {
+ box_idx_of_points[k] = 1;
+ }
+ }
+ }
+}
+
+#endif // POINT_IN_BOXES_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..5ecc1a5be38b960d8e60aba7eb77efd73ad41ffb
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh
@@ -0,0 +1,92 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef POINTS_IN_POLYGONS_CUDA_KERNEL_CUH
+#define POINTS_IN_POLYGONS_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+struct point {
+ float x, y;
+};
+
+template
+__global__ void points_in_polygons_forward_cuda_kernel(
+ const int nthreads, const scalar_t *vertex1, const scalar_t *vertex2,
+ const int rows, const int cols, scalar_t *inside_flag) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ int row = index / cols;
+ int col = index % cols;
+
+ const scalar_t *offset_vertex1 = vertex1 + row * 2;
+ const scalar_t *offset_vertex2 = vertex2 + col * 8;
+
+ point point_[1];
+ point polygon[4];
+
+ point_[0].x = offset_vertex1[0];
+ point_[0].y = offset_vertex1[1];
+
+ polygon[0].x = offset_vertex2[0];
+ polygon[0].y = offset_vertex2[1];
+ polygon[1].x = offset_vertex2[2];
+ polygon[1].y = offset_vertex2[3];
+ polygon[2].x = offset_vertex2[4];
+ polygon[2].y = offset_vertex2[5];
+ polygon[3].x = offset_vertex2[6];
+ polygon[3].y = offset_vertex2[7];
+
+ int nCross = 0;
+ int i, j;
+ float sx, sy, tx, ty, px, py, x;
+ for (i = 0, j = 3; i < 4; j = i, i++) {
+ sx = polygon[i].x;
+ sy = polygon[i].y;
+ tx = polygon[j].x;
+ ty = polygon[j].y;
+
+ px = point_[0].x;
+ py = point_[0].y;
+
+ if (py < min(sy, ty)) continue;
+ if (py > max(sy, ty)) continue;
+
+ if ((sx == px && sy == py) || (tx == px && ty == py)) {
+ break;
+ } else {
+ if ((sy < py && ty >= py) || (sy >= py && ty < py)) {
+ x = sx + (py - sy) * (tx - sx) / (ty - sy);
+ if (x == px) {
+ break;
+ }
+ if (x > px) {
+ nCross++;
+ }
+ }
+ }
+ }
+ if (nCross % 2 == 1) {
+ inside_flag[index] = 1.0;
+ } else {
+ inside_flag[index] = 0.0;
+ }
+ return;
+ }
+}
+
+#endif // POINTS_IN_POLYGONS_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/psamask_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/psamask_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..739fb4e623b0e75154b818a6e91c9c2c214e3349
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/psamask_cuda_kernel.cuh
@@ -0,0 +1,154 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef PSAMASK_CUDA_KERNEL_CUH
+#define PSAMASK_CUDA_KERNEL_CUH
+
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else
+#include "pytorch_cuda_helper.hpp"
+#endif
+
+// CUDA: grid stride looping
+#ifndef CUDA_KERNEL_LOOP
+#define CUDA_KERNEL_LOOP(i, n) \
+ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
+ i += blockDim.x * gridDim.x)
+#endif
+
+template
+__global__ void psamask_collect_forward_cuda(
+ const int nthreads, const int h_feature, const int w_feature,
+ const int h_mask, const int w_mask, const int half_h_mask,
+ const int half_w_mask, const T* mask_data, T* buffer_data) {
+ CUDA_KERNEL_LOOP(index, nthreads) {
+ const int w = index % w_feature;
+ const int h = (index / w_feature) % h_feature;
+ const int n = index / w_feature / h_feature;
+ // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
+ const int hstart = max(0, half_h_mask - h);
+ const int hend = min(h_mask, h_feature + half_h_mask - h);
+ const int wstart = max(0, half_w_mask - w);
+ const int wend = min(w_mask, w_feature + half_w_mask - w);
+ // (hidx, widx ) with mask-indexed
+ // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed
+ for (int hidx = hstart; hidx < hend; hidx++) {
+ for (int widx = wstart; widx < wend; widx++) {
+ buffer_data[(n * h_feature * w_feature +
+ (hidx + h - half_h_mask) * w_feature +
+ (widx + w - half_w_mask)) *
+ h_feature * w_feature +
+ h * w_feature + w] = mask_data
+ [((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) *
+ w_feature +
+ w];
+ }
+ }
+ }
+}
+
+template
+__global__ void psamask_distribute_forward_cuda(
+ const int nthreads, const int h_feature, const int w_feature,
+ const int h_mask, const int w_mask, const int half_h_mask,
+ const int half_w_mask, const T* mask_data, T* buffer_data) {
+ CUDA_KERNEL_LOOP(index, nthreads) {
+ const int w = index % w_feature;
+ const int h = (index / w_feature) % h_feature;
+ const int n = index / w_feature / h_feature;
+ // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
+ const int hstart = max(0, half_h_mask - h);
+ const int hend = min(h_mask, h_feature + half_h_mask - h);
+ const int wstart = max(0, half_w_mask - w);
+ const int wend = min(w_mask, w_feature + half_w_mask - w);
+ // (hidx, widx ) with mask-indexed
+ // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed
+ for (int hidx = hstart; hidx < hend; hidx++) {
+ for (int widx = wstart; widx < wend; widx++) {
+ buffer_data[(n * h_feature * w_feature + h * w_feature + w) *
+ h_feature * w_feature +
+ (hidx + h - half_h_mask) * w_feature +
+ (widx + w - half_w_mask)] = mask_data
+ [((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature + h) *
+ w_feature +
+ w];
+ }
+ }
+ }
+}
+
+template
+__global__ void psamask_collect_backward_cuda(
+ const int nthreads, const int h_feature, const int w_feature,
+ const int h_mask, const int w_mask, const int half_h_mask,
+ const int half_w_mask, const T* buffer_diff, T* mask_diff) {
+ CUDA_KERNEL_LOOP(index, nthreads) {
+ const int w = index % w_feature;
+ const int h = (index / w_feature) % h_feature;
+ const int n = index / w_feature / h_feature;
+ // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
+ const int hstart = max(0, half_h_mask - h);
+ const int hend = min(h_mask, h_feature + half_h_mask - h);
+ const int wstart = max(0, half_w_mask - w);
+ const int wend = min(w_mask, w_feature + half_w_mask - w);
+ // (hidx, widx ) with mask-indexed
+ // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed
+ for (int hidx = hstart; hidx < hend; hidx++) {
+ for (int widx = wstart; widx < wend; widx++) {
+ mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature +
+ h) *
+ w_feature +
+ w] = buffer_diff[(n * h_feature * w_feature +
+ (hidx + h - half_h_mask) * w_feature +
+ (widx + w - half_w_mask)) *
+ h_feature * w_feature +
+ h * w_feature + w];
+ }
+ }
+ }
+}
+
+template
+__global__ void psamask_distribute_backward_cuda(
+ const int nthreads, const int h_feature, const int w_feature,
+ const int h_mask, const int w_mask, const int half_h_mask,
+ const int half_w_mask, const T* buffer_diff, T* mask_diff) {
+ CUDA_KERNEL_LOOP(index, nthreads) {
+ const int w = index % w_feature;
+ const int h = (index / w_feature) % h_feature;
+ const int n = index / w_feature / h_feature;
+ // effective mask region : [hstart, hend) x [wstart, wend) with mask-indexed
+ const int hstart = max(0, half_h_mask - h);
+ const int hend = min(h_mask, h_feature + half_h_mask - h);
+ const int wstart = max(0, half_w_mask - w);
+ const int wend = min(w_mask, w_feature + half_w_mask - w);
+ // (hidx, widx ) with mask-indexed
+ // (hidx + h - half_h_mask, widx + w - half_w_mask) with feature-indexed
+ for (int hidx = hstart; hidx < hend; hidx++) {
+ for (int widx = wstart; widx < wend; widx++) {
+ mask_diff[((n * h_mask * w_mask + hidx * w_mask + widx) * h_feature +
+ h) *
+ w_feature +
+ w] =
+ buffer_diff[(n * h_feature * w_feature + h * w_feature + w) *
+ h_feature * w_feature +
+ (hidx + h - half_h_mask) * w_feature +
+ (widx + w - half_w_mask)];
+ }
+ }
+ }
+}
+
+#endif // PSAMASK_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..4383d9e82cce97362f53cf799b8dfa30c7b4cd02
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh
@@ -0,0 +1,242 @@
+// Modified from
+// https://github.com/csuhan/ReDet/blob/master/mmdet/ops/riroi_align/src/riroi_align_kernel.cu
+#ifndef RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH
+#define RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH
+
+#include
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else // MMCV_USE_PARROTS
+#include "pytorch_cuda_helper.hpp"
+#endif // MMCV_USE_PARROTS
+
+/*** Forward ***/
+template
+__global__ void riroi_align_rotated_forward_cuda_kernel(
+ const int nthreads, const scalar_t *bottom_data,
+ const scalar_t *bottom_rois, const scalar_t spatial_scale,
+ const int num_samples, const bool clockwise, const int channels,
+ const int height, const int width, const int pooled_height,
+ const int pooled_width, const int num_orientations, scalar_t *top_data) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (n, c, ph, pw) is an element in the pooled output
+ int pw = index % pooled_width;
+ int ph = (index / pooled_width) % pooled_height;
+ int o = (index / pooled_width / pooled_height) % num_orientations;
+ int c =
+ (index / pooled_width / pooled_height / num_orientations) % channels;
+ int n = index / pooled_width / pooled_height / num_orientations / channels;
+
+ const scalar_t *offset_bottom_rois = bottom_rois + n * 6;
+ int roi_batch_ind = offset_bottom_rois[0];
+
+ // Do not using rounding; this implementation detail is critical
+ scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale;
+ scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale;
+ scalar_t roi_width = offset_bottom_rois[3] * spatial_scale;
+ scalar_t roi_height = offset_bottom_rois[4] * spatial_scale;
+ // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0;
+ scalar_t theta = offset_bottom_rois[5];
+ // Force malformed ROIs to be 1x1
+ roi_width = max(roi_width, (scalar_t)1.);
+ roi_height = max(roi_height, (scalar_t)1.);
+ scalar_t bin_size_h = static_cast(roi_height) /
+ static_cast(pooled_height);
+ scalar_t bin_size_w =
+ static_cast(roi_width) / static_cast(pooled_width);
+
+ // find aligned index
+ scalar_t ind_float = theta * num_orientations / (2 * M_PI);
+ int ind = floorf(ind_float);
+ scalar_t l_var = ind_float - (scalar_t)ind;
+ scalar_t r_var = 1.0 - l_var;
+ // correct start channel
+ ind = (ind + num_orientations) % num_orientations;
+ // rotated channel
+ int ind_rot = (o - ind + num_orientations) % num_orientations;
+ int ind_rot_plus = (ind_rot + 1 + num_orientations) % num_orientations;
+ const scalar_t *offset_bottom_data =
+ bottom_data + (roi_batch_ind * channels * num_orientations +
+ c * num_orientations + ind_rot) *
+ height * width;
+
+ const scalar_t *offset_bottom_data_plus =
+ bottom_data + (roi_batch_ind * channels * num_orientations +
+ c * num_orientations + ind_rot_plus) *
+ height * width;
+ // We use roi_bin_grid to sample the grid and mimic integral
+ int roi_bin_grid_h = (num_samples > 0)
+ ? num_samples
+ : ceilf(roi_height / pooled_height); // e.g., = 2
+ int roi_bin_grid_w =
+ (num_samples > 0) ? num_samples : ceilf(roi_width / pooled_width);
+
+ // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+ // Appropriate translation needs to be applied after.
+ if (clockwise) {
+ theta = -theta; // If clockwise, the angle needs to be reversed.
+ }
+ scalar_t roi_start_h = -roi_height / 2.0;
+ scalar_t roi_start_w = -roi_width / 2.0;
+ scalar_t cosscalar_theta = cos(theta);
+ scalar_t sinscalar_theta = sin(theta);
+
+ // We do average (integral) pooling inside a bin
+ const scalar_t count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4
+
+ scalar_t output_val = 0.;
+ for (int iy = 0; iy < roi_bin_grid_h; iy++) { // e.g., iy = 0, 1
+ const scalar_t yy =
+ roi_start_h + ph * bin_size_h +
+ static_cast(iy + .5f) * bin_size_h /
+ static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5
+ for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+ const scalar_t xx = roi_start_w + pw * bin_size_w +
+ static_cast(ix + .5f) * bin_size_w /
+ static_cast(roi_bin_grid_w);
+
+ // Rotate by theta (counterclockwise) around the center and translate
+ scalar_t y = yy * cosscalar_theta - xx * sinscalar_theta + roi_center_h;
+ scalar_t x = yy * sinscalar_theta + xx * cosscalar_theta + roi_center_w;
+
+ scalar_t val = bilinear_interpolate(
+ offset_bottom_data, height, width, y, x, index);
+ scalar_t val_plus = bilinear_interpolate(
+ offset_bottom_data_plus, height, width, y, x, index);
+ output_val += r_var * val + l_var * val_plus;
+ }
+ }
+ output_val /= count;
+
+ top_data[index] = output_val;
+ }
+}
+
+/*** Backward ***/
+template
+__global__ void riroi_align_rotated_backward_cuda_kernel(
+ const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois,
+ const scalar_t spatial_scale, const int num_samples, const bool clockwise,
+ const int channels, const int height, const int width,
+ const int pooled_height, const int pooled_width, const int num_orientations,
+ scalar_t *bottom_diff) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (n, c, ph, pw) is an element in the pooled output
+ int pw = index % pooled_width;
+ int ph = (index / pooled_width) % pooled_height;
+ int o = (index / pooled_width / pooled_height) % num_orientations;
+ int c =
+ (index / pooled_width / pooled_height / num_orientations) % channels;
+ int n = index / pooled_width / pooled_height / num_orientations / channels;
+
+ const scalar_t *offset_bottom_rois = bottom_rois + n * 6;
+ int roi_batch_ind = offset_bottom_rois[0];
+
+ // Do not round
+ scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale;
+ scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale;
+ scalar_t roi_width = offset_bottom_rois[3] * spatial_scale;
+ scalar_t roi_height = offset_bottom_rois[4] * spatial_scale;
+ // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0;
+ scalar_t theta = offset_bottom_rois[5];
+ // Force malformed ROIs to be 1x1
+ roi_width = max(roi_width, (scalar_t)1.);
+ roi_height = max(roi_height, (scalar_t)1.);
+
+ scalar_t bin_size_h = static_cast(roi_height) /
+ static_cast(pooled_height);
+ scalar_t bin_size_w =
+ static_cast(roi_width) / static_cast(pooled_width);
+
+ // find aligned index
+ scalar_t ind_float = theta * num_orientations / (2 * M_PI);
+ int ind = floorf(ind_float);
+ scalar_t l_var = ind_float - (scalar_t)ind;
+ scalar_t r_var = 1.0 - l_var;
+ // correct start channel
+ ind = (ind + num_orientations) % num_orientations;
+ // rotated channel
+ int ind_rot = (o - ind + num_orientations) % num_orientations;
+ int ind_rot_plus = (ind_rot + 1 + num_orientations) % num_orientations;
+ scalar_t *offset_bottom_diff =
+ bottom_diff + (roi_batch_ind * channels * num_orientations +
+ c * num_orientations + ind_rot) *
+ height * width;
+ scalar_t *offset_bottom_diff_plus =
+ bottom_diff + (roi_batch_ind * channels * num_orientations +
+ c * num_orientations + ind_rot_plus) *
+ height * width;
+ int top_offset =
+ (n * channels * num_orientations + c * num_orientations + o) *
+ pooled_height * pooled_width;
+ const scalar_t *offset_top_diff = top_diff + top_offset;
+ const scalar_t top_diff_this_bin = offset_top_diff[ph * pooled_width + pw];
+
+ // We use roi_bin_grid to sample the grid and mimic integral
+ int roi_bin_grid_h = (num_samples > 0)
+ ? num_samples
+ : ceilf(roi_height / pooled_height); // e.g., = 2
+ int roi_bin_grid_w =
+ (num_samples > 0) ? num_samples : ceilf(roi_width / pooled_width);
+
+ // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+ // Appropriate translation needs to be applied after.
+ if (clockwise) {
+ theta = -theta; // If clockwise, the angle needs to be reversed.
+ }
+ scalar_t roi_start_h = -roi_height / 2.0;
+ scalar_t roi_start_w = -roi_width / 2.0;
+ scalar_t cosTheta = cos(theta);
+ scalar_t sinTheta = sin(theta);
+
+ // We do average (integral) pooling inside a bin
+ const scalar_t count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
+
+ for (int iy = 0; iy < roi_bin_grid_h; iy++) { // e.g., iy = 0, 1
+ const scalar_t yy =
+ roi_start_h + ph * bin_size_h +
+ static_cast(iy + .5f) * bin_size_h /
+ static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5
+ for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+ const scalar_t xx = roi_start_w + pw * bin_size_w +
+ static_cast(ix + .5f) * bin_size_w /
+ static_cast(roi_bin_grid_w);
+
+ // Rotate by theta around the center and translate
+ scalar_t y = yy * cosTheta - xx * sinTheta + roi_center_h;
+ scalar_t x = yy * sinTheta + xx * cosTheta + roi_center_w;
+
+ scalar_t w1, w2, w3, w4;
+ int x_low, x_high, y_low, y_high;
+
+ bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3,
+ w4, x_low, x_high, y_low,
+ y_high, index);
+
+ scalar_t g1 = top_diff_this_bin * w1 / count;
+ scalar_t g2 = top_diff_this_bin * w2 / count;
+ scalar_t g3 = top_diff_this_bin * w3 / count;
+ scalar_t g4 = top_diff_this_bin * w4 / count;
+
+ if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+ atomicAdd(offset_bottom_diff + y_low * width + x_low, g1 * r_var);
+ atomicAdd(offset_bottom_diff + y_low * width + x_high, g2 * r_var);
+ atomicAdd(offset_bottom_diff + y_high * width + x_low, g3 * r_var);
+ atomicAdd(offset_bottom_diff + y_high * width + x_high, g4 * r_var);
+
+ atomicAdd(offset_bottom_diff_plus + y_low * width + x_low,
+ g1 * l_var);
+ atomicAdd(offset_bottom_diff_plus + y_low * width + x_high,
+ g2 * l_var);
+ atomicAdd(offset_bottom_diff_plus + y_high * width + x_low,
+ g3 * l_var);
+ atomicAdd(offset_bottom_diff_plus + y_high * width + x_high,
+ g4 * l_var);
+
+ } // if
+ } // ix
+ } // iy
+ } // CUDA_1D_KERNEL_LOOP
+} // RiRoIAlignBackward
+
+#endif // RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/roi_align_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/roi_align_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..53554c0980d3f4f60375c5e2d6ed330c4c2c470c
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/roi_align_cuda_kernel.cuh
@@ -0,0 +1,225 @@
+// encoding=utf-8
+// Copyright 2021 Huawei Technologies Co., Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef ROI_ALIGN_CUDA_KERNEL_CUH
+#define ROI_ALIGN_CUDA_KERNEL_CUH
+
+#include
+#ifdef MMCV_WITH_TRT
+#include "common_cuda_helper.hpp"
+#else // MMCV_WITH_TRT
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else // MMCV_USE_PARROTS
+#include "pytorch_cuda_helper.hpp"
+#endif // MMCV_USE_PARROTS
+#endif // MMCV_WITH_TRT
+
+/*** Forward ***/
+template
+__global__ void roi_align_forward_cuda_kernel(
+ const int nthreads, const T* input, const T* rois, T* output, T* argmax_y,
+ T* argmax_x, const int pooled_height, const int pooled_width,
+ const T spatial_scale, const int sampling_ratio,
+ const int pool_mode, // 0 - max pool, 1 - avg pool
+ const bool aligned, const int channels, const int height, const int width) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (n, c, ph, pw) is an element in the pooled output
+ int pw = index % pooled_width;
+ int ph = (index / pooled_width) % pooled_height;
+ int c = (index / pooled_width / pooled_height) % channels;
+ int n = index / pooled_width / pooled_height / channels;
+
+ const T* offset_rois = rois + n * 5;
+ int roi_batch_ind = offset_rois[0];
+
+ // Do not using rounding; this implementation detail is critical
+ T offset = aligned ? (T)0.5 : (T)0.0;
+ T roi_start_w = offset_rois[1] * spatial_scale - offset;
+ T roi_start_h = offset_rois[2] * spatial_scale - offset;
+ T roi_end_w = offset_rois[3] * spatial_scale - offset;
+ T roi_end_h = offset_rois[4] * spatial_scale - offset;
+
+ T roi_width = roi_end_w - roi_start_w;
+ T roi_height = roi_end_h - roi_start_h;
+ if (!aligned) { // for backward-compatibility only
+ roi_width = max(roi_width, (T)1.);
+ roi_height = max(roi_height, (T)1.);
+ }
+
+ T bin_size_h = static_cast(roi_height) / static_cast(pooled_height);
+ T bin_size_w = static_cast(roi_width) / static_cast(pooled_width);
+
+ const T* offset_input =
+ input + (roi_batch_ind * channels + c) * height * width;
+
+ // We use roi_bin_grid to sample the grid and mimic integral
+ int roi_bin_grid_h =
+ (sampling_ratio > 0)
+ ? sampling_ratio
+ : static_cast(ceilf(roi_height / pooled_height));
+ int roi_bin_grid_w =
+ (sampling_ratio > 0)
+ ? sampling_ratio
+ : static_cast(ceilf(roi_width / pooled_width));
+
+ if (pool_mode == 0) {
+ // We do max pooling inside a bin
+ T maxval = -FLT_MAX;
+ T maxidx_y = -1.f, maxidx_x = -1.f;
+ for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+ const T y = roi_start_h + ph * bin_size_h +
+ static_cast(iy + .5f) * bin_size_h /
+ static_cast(roi_bin_grid_h);
+ for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+ const T x = roi_start_w + pw * bin_size_w +
+ static_cast(ix + .5f) * bin_size_w /
+ static_cast(roi_bin_grid_w);
+ T val =
+ bilinear_interpolate(offset_input, height, width, y, x, index);
+ if (val > maxval) {
+ maxval = val;
+ maxidx_y = y;
+ maxidx_x = x;
+ }
+ }
+ }
+ output[index] = maxval;
+ argmax_y[index] = maxidx_y;
+ argmax_x[index] = maxidx_x;
+ } else if (pool_mode == 1) {
+ // We do average pooling inside a bin
+ const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1);
+ T output_val = 0.;
+ for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+ const T y = roi_start_h + ph * bin_size_h +
+ static_cast(iy + .5f) * bin_size_h /
+ static_cast(roi_bin_grid_h);
+ for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+ const T x = roi_start_w + pw * bin_size_w +
+ static_cast(ix + .5f) * bin_size_w /
+ static_cast(roi_bin_grid_w);
+ T val =
+ bilinear_interpolate(offset_input, height, width, y, x, index);
+ output_val += val;
+ }
+ }
+ output[index] = output_val / count;
+ }
+ }
+}
+
+/*** Backward ***/
+template
+__global__ void roi_align_backward_cuda_kernel(
+ const int nthreads, const T* grad_output, const T* rois, const T* argmax_y,
+ const T* argmax_x, T* grad_input, const int pooled_height,
+ const int pooled_width, const T spatial_scale, const int sampling_ratio,
+ const int pool_mode, // 0 - max pool, 1 - avg pool
+ const bool aligned, const int channels, const int height, const int width) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ // (n, c, ph, pw) is an element in the pooled output
+ int pw = index % pooled_width;
+ int ph = (index / pooled_width) % pooled_height;
+ int c = (index / pooled_width / pooled_height) % channels;
+ int n = index / pooled_width / pooled_height / channels;
+
+ const T grad_output_this_bin = grad_output[index];
+
+ const T* offset_rois = rois + n * 5;
+ int roi_batch_ind = offset_rois[0];
+ T* offset_grad_input =
+ grad_input + ((roi_batch_ind * channels + c) * height * width);
+
+ if (pool_mode == 0) {
+ T y = argmax_y[index], x = argmax_x[index];
+ if (y != -1.f) {
+ T w1, w2, w3, w4;
+ int x_low, x_high, y_low, y_high;
+ bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
+ x_low, x_high, y_low, y_high, index);
+
+ if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+ atomicAdd(offset_grad_input + y_low * width + x_low,
+ grad_output_this_bin * w1);
+ atomicAdd(offset_grad_input + y_low * width + x_high,
+ grad_output_this_bin * w2);
+ atomicAdd(offset_grad_input + y_high * width + x_low,
+ grad_output_this_bin * w3);
+ atomicAdd(offset_grad_input + y_high * width + x_high,
+ grad_output_this_bin * w4);
+ }
+ }
+ } else if (pool_mode == 1) {
+ // Do not using rounding; this implementation detail is critical
+ T offset = aligned ? (T)0.5 : (T)0.0;
+ T roi_start_w = offset_rois[1] * spatial_scale - offset;
+ T roi_start_h = offset_rois[2] * spatial_scale - offset;
+ T roi_end_w = offset_rois[3] * spatial_scale - offset;
+ T roi_end_h = offset_rois[4] * spatial_scale - offset;
+
+ T roi_width = roi_end_w - roi_start_w;
+ T roi_height = roi_end_h - roi_start_h;
+ if (!aligned) { // for backward-compatibility only
+ roi_width = max(roi_width, (T)1.);
+ roi_height = max(roi_height, (T)1.);
+ }
+
+ T bin_size_h = static_cast(roi_height) / static_cast(pooled_height);
+ T bin_size_w = static_cast(roi_width) / static_cast(pooled_width);
+
+ // We use roi_bin_grid to sample the grid and mimic integral
+ int roi_bin_grid_h =
+ (sampling_ratio > 0)
+ ? sampling_ratio
+ : static_cast(ceilf(roi_height / pooled_height));
+ int roi_bin_grid_w =
+ (sampling_ratio > 0)
+ ? sampling_ratio
+ : static_cast(ceilf(roi_width / pooled_width));
+
+ // We do average (integral) pooling inside a bin
+ const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
+
+ for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+ const T y = roi_start_h + ph * bin_size_h +
+ static_cast(iy + .5f) * bin_size_h /
+ static_cast(roi_bin_grid_h);
+ for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+ const T x = roi_start_w + pw * bin_size_w +
+ static_cast(ix + .5f) * bin_size_w /
+ static_cast(roi_bin_grid_w);
+
+ T w1, w2, w3, w4;
+ int x_low, x_high, y_low, y_high;
+ bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
+ x_low, x_high, y_low, y_high, index);
+
+ if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+ atomicAdd(offset_grad_input + y_low * width + x_low,
+ grad_output_this_bin * w1 / count);
+ atomicAdd(offset_grad_input + y_low * width + x_high,
+ grad_output_this_bin * w2 / count);
+ atomicAdd(offset_grad_input + y_high * width + x_low,
+ grad_output_this_bin * w3 / count);
+ atomicAdd(offset_grad_input + y_high * width + x_high,
+ grad_output_this_bin * w4 / count);
+ }
+ }
+ }
+ }
+ }
+}
+
+#endif // ROI_ALIGN_CUDA_KERNEL_CUH
diff --git a/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..33571f29674f53674415afe1bb4cc3ea0d8a9865
--- /dev/null
+++ b/PyTorch/contrib/cv/semantic_segmentation/DPT/mmcv_replace/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
@@ -0,0 +1,202 @@
+// Modified from
+// https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlignRotated
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+#ifndef ROI_ALIGN_ROTATED_CUDA_KERNEL_CUH
+#define ROI_ALIGN_ROTATED_CUDA_KERNEL_CUH
+
+#include
+#ifdef MMCV_WITH_TRT
+#include "common_cuda_helper.hpp"
+#else // MMCV_WITH_TRT
+#ifdef MMCV_USE_PARROTS
+#include "parrots_cuda_helper.hpp"
+#else // MMCV_USE_PARROTS
+#include "pytorch_cuda_helper.hpp"
+#endif // MMCV_USE_PARROTS
+#endif // MMCV_WITH_TRT
+
+/*** Forward ***/
+template